LLVM  15.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/Support/Debug.h"
36 
37 #define DEBUG_TYPE "legalizer"
38 
39 using namespace llvm;
40 using namespace LegalizeActions;
41 using namespace MIPatternMatch;
42 
43 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
44 ///
45 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
46 /// with any leftover piece as type \p LeftoverTy
47 ///
48 /// Returns -1 in the first element of the pair if the breakdown is not
49 /// satisfiable.
50 static std::pair<int, int>
51 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
52  assert(!LeftoverTy.isValid() && "this is an out argument");
53 
54  unsigned Size = OrigTy.getSizeInBits();
55  unsigned NarrowSize = NarrowTy.getSizeInBits();
56  unsigned NumParts = Size / NarrowSize;
57  unsigned LeftoverSize = Size - NumParts * NarrowSize;
58  assert(Size > NarrowSize);
59 
60  if (LeftoverSize == 0)
61  return {NumParts, 0};
62 
63  if (NarrowTy.isVector()) {
64  unsigned EltSize = OrigTy.getScalarSizeInBits();
65  if (LeftoverSize % EltSize != 0)
66  return {-1, -1};
67  LeftoverTy = LLT::scalarOrVector(
68  ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
69  } else {
70  LeftoverTy = LLT::scalar(LeftoverSize);
71  }
72 
73  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
74  return std::make_pair(NumParts, NumLeftover);
75 }
76 
78 
79  if (!Ty.isScalar())
80  return nullptr;
81 
82  switch (Ty.getSizeInBits()) {
83  case 16:
84  return Type::getHalfTy(Ctx);
85  case 32:
86  return Type::getFloatTy(Ctx);
87  case 64:
88  return Type::getDoubleTy(Ctx);
89  case 80:
90  return Type::getX86_FP80Ty(Ctx);
91  case 128:
92  return Type::getFP128Ty(Ctx);
93  default:
94  return nullptr;
95  }
96 }
97 
99  GISelChangeObserver &Observer,
101  : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
102  LI(*MF.getSubtarget().getLegalizerInfo()),
103  TLI(*MF.getSubtarget().getTargetLowering()) { }
104 
106  GISelChangeObserver &Observer,
108  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
109  TLI(*MF.getSubtarget().getTargetLowering()) { }
110 
113  LostDebugLocObserver &LocObserver) {
114  LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
115 
117 
118  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
119  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
120  return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
121  auto Step = LI.getAction(MI, MRI);
122  switch (Step.Action) {
123  case Legal:
124  LLVM_DEBUG(dbgs() << ".. Already legal\n");
125  return AlreadyLegal;
126  case Libcall:
127  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
128  return libcall(MI, LocObserver);
129  case NarrowScalar:
130  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
131  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
132  case WidenScalar:
133  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
134  return widenScalar(MI, Step.TypeIdx, Step.NewType);
135  case Bitcast:
136  LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
137  return bitcast(MI, Step.TypeIdx, Step.NewType);
138  case Lower:
139  LLVM_DEBUG(dbgs() << ".. Lower\n");
140  return lower(MI, Step.TypeIdx, Step.NewType);
141  case FewerElements:
142  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
143  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
144  case MoreElements:
145  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
146  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
147  case Custom:
148  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
149  return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
150  default:
151  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
152  return UnableToLegalize;
153  }
154 }
155 
156 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
157  SmallVectorImpl<Register> &VRegs) {
158  for (int i = 0; i < NumParts; ++i)
159  VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
160  MIRBuilder.buildUnmerge(VRegs, Reg);
161 }
162 
163 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
164  LLT MainTy, LLT &LeftoverTy,
166  SmallVectorImpl<Register> &LeftoverRegs) {
167  assert(!LeftoverTy.isValid() && "this is an out argument");
168 
169  unsigned RegSize = RegTy.getSizeInBits();
170  unsigned MainSize = MainTy.getSizeInBits();
171  unsigned NumParts = RegSize / MainSize;
172  unsigned LeftoverSize = RegSize - NumParts * MainSize;
173 
174  // Use an unmerge when possible.
175  if (LeftoverSize == 0) {
176  for (unsigned I = 0; I < NumParts; ++I)
177  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
178  MIRBuilder.buildUnmerge(VRegs, Reg);
179  return true;
180  }
181 
182  // Perform irregular split. Leftover is last element of RegPieces.
183  if (MainTy.isVector()) {
184  SmallVector<Register, 8> RegPieces;
185  extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
186  for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
187  VRegs.push_back(RegPieces[i]);
188  LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
189  LeftoverTy = MRI.getType(LeftoverRegs[0]);
190  return true;
191  }
192 
193  LeftoverTy = LLT::scalar(LeftoverSize);
194  // For irregular sizes, extract the individual parts.
195  for (unsigned I = 0; I != NumParts; ++I) {
196  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
197  VRegs.push_back(NewReg);
198  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
199  }
200 
201  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
202  Offset += LeftoverSize) {
203  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
204  LeftoverRegs.push_back(NewReg);
205  MIRBuilder.buildExtract(NewReg, Reg, Offset);
206  }
207 
208  return true;
209 }
210 
211 void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
212  SmallVectorImpl<Register> &VRegs) {
213  LLT RegTy = MRI.getType(Reg);
214  assert(RegTy.isVector() && "Expected a vector type");
215 
216  LLT EltTy = RegTy.getElementType();
217  LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
218  unsigned RegNumElts = RegTy.getNumElements();
219  unsigned LeftoverNumElts = RegNumElts % NumElts;
220  unsigned NumNarrowTyPieces = RegNumElts / NumElts;
221 
222  // Perfect split without leftover
223  if (LeftoverNumElts == 0)
224  return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
225 
226  // Irregular split. Provide direct access to all elements for artifact
227  // combiner using unmerge to elements. Then build vectors with NumElts
228  // elements. Remaining element(s) will be (used to build vector) Leftover.
230  extractParts(Reg, EltTy, RegNumElts, Elts);
231 
232  unsigned Offset = 0;
233  // Requested sub-vectors of NarrowTy.
234  for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
235  ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
236  VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
237  }
238 
239  // Leftover element(s).
240  if (LeftoverNumElts == 1) {
241  VRegs.push_back(Elts[Offset]);
242  } else {
243  LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
244  ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
245  VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
246  }
247 }
248 
249 void LegalizerHelper::insertParts(Register DstReg,
250  LLT ResultTy, LLT PartTy,
251  ArrayRef<Register> PartRegs,
252  LLT LeftoverTy,
253  ArrayRef<Register> LeftoverRegs) {
254  if (!LeftoverTy.isValid()) {
255  assert(LeftoverRegs.empty());
256 
257  if (!ResultTy.isVector()) {
258  MIRBuilder.buildMerge(DstReg, PartRegs);
259  return;
260  }
261 
262  if (PartTy.isVector())
263  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
264  else
265  MIRBuilder.buildBuildVector(DstReg, PartRegs);
266  return;
267  }
268 
269  // Merge sub-vectors with different number of elements and insert into DstReg.
270  if (ResultTy.isVector()) {
271  assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
272  SmallVector<Register, 8> AllRegs;
273  for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
274  AllRegs.push_back(Reg);
275  return mergeMixedSubvectors(DstReg, AllRegs);
276  }
277 
278  SmallVector<Register> GCDRegs;
279  LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
280  for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
281  extractGCDType(GCDRegs, GCDTy, PartReg);
282  LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
283  buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
284 }
285 
286 void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
287  Register Reg) {
288  LLT Ty = MRI.getType(Reg);
289  SmallVector<Register, 8> RegElts;
290  extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
291  Elts.append(RegElts);
292 }
293 
294 /// Merge \p PartRegs with different types into \p DstReg.
295 void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
296  ArrayRef<Register> PartRegs) {
297  SmallVector<Register, 8> AllElts;
298  for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
299  appendVectorElts(AllElts, PartRegs[i]);
300 
301  Register Leftover = PartRegs[PartRegs.size() - 1];
302  if (MRI.getType(Leftover).isScalar())
303  AllElts.push_back(Leftover);
304  else
305  appendVectorElts(AllElts, Leftover);
306 
307  MIRBuilder.buildMerge(DstReg, AllElts);
308 }
309 
310 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
312  const MachineInstr &MI) {
313  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
314 
315  const int StartIdx = Regs.size();
316  const int NumResults = MI.getNumOperands() - 1;
317  Regs.resize(Regs.size() + NumResults);
318  for (int I = 0; I != NumResults; ++I)
319  Regs[StartIdx + I] = MI.getOperand(I).getReg();
320 }
321 
322 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
323  LLT GCDTy, Register SrcReg) {
324  LLT SrcTy = MRI.getType(SrcReg);
325  if (SrcTy == GCDTy) {
326  // If the source already evenly divides the result type, we don't need to do
327  // anything.
328  Parts.push_back(SrcReg);
329  } else {
330  // Need to split into common type sized pieces.
331  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
332  getUnmergeResults(Parts, *Unmerge);
333  }
334 }
335 
336 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
337  LLT NarrowTy, Register SrcReg) {
338  LLT SrcTy = MRI.getType(SrcReg);
339  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
340  extractGCDType(Parts, GCDTy, SrcReg);
341  return GCDTy;
342 }
343 
344 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
346  unsigned PadStrategy) {
347  LLT LCMTy = getLCMType(DstTy, NarrowTy);
348 
349  int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
350  int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
351  int NumOrigSrc = VRegs.size();
352 
353  Register PadReg;
354 
355  // Get a value we can use to pad the source value if the sources won't evenly
356  // cover the result type.
357  if (NumOrigSrc < NumParts * NumSubParts) {
358  if (PadStrategy == TargetOpcode::G_ZEXT)
359  PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
360  else if (PadStrategy == TargetOpcode::G_ANYEXT)
361  PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
362  else {
363  assert(PadStrategy == TargetOpcode::G_SEXT);
364 
365  // Shift the sign bit of the low register through the high register.
366  auto ShiftAmt =
368  PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
369  }
370  }
371 
372  // Registers for the final merge to be produced.
373  SmallVector<Register, 4> Remerge(NumParts);
374 
375  // Registers needed for intermediate merges, which will be merged into a
376  // source for Remerge.
377  SmallVector<Register, 4> SubMerge(NumSubParts);
378 
379  // Once we've fully read off the end of the original source bits, we can reuse
380  // the same high bits for remaining padding elements.
381  Register AllPadReg;
382 
383  // Build merges to the LCM type to cover the original result type.
384  for (int I = 0; I != NumParts; ++I) {
385  bool AllMergePartsArePadding = true;
386 
387  // Build the requested merges to the requested type.
388  for (int J = 0; J != NumSubParts; ++J) {
389  int Idx = I * NumSubParts + J;
390  if (Idx >= NumOrigSrc) {
391  SubMerge[J] = PadReg;
392  continue;
393  }
394 
395  SubMerge[J] = VRegs[Idx];
396 
397  // There are meaningful bits here we can't reuse later.
398  AllMergePartsArePadding = false;
399  }
400 
401  // If we've filled up a complete piece with padding bits, we can directly
402  // emit the natural sized constant if applicable, rather than a merge of
403  // smaller constants.
404  if (AllMergePartsArePadding && !AllPadReg) {
405  if (PadStrategy == TargetOpcode::G_ANYEXT)
406  AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
407  else if (PadStrategy == TargetOpcode::G_ZEXT)
408  AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
409 
410  // If this is a sign extension, we can't materialize a trivial constant
411  // with the right type and have to produce a merge.
412  }
413 
414  if (AllPadReg) {
415  // Avoid creating additional instructions if we're just adding additional
416  // copies of padding bits.
417  Remerge[I] = AllPadReg;
418  continue;
419  }
420 
421  if (NumSubParts == 1)
422  Remerge[I] = SubMerge[0];
423  else
424  Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
425 
426  // In the sign extend padding case, re-use the first all-signbit merge.
427  if (AllMergePartsArePadding && !AllPadReg)
428  AllPadReg = Remerge[I];
429  }
430 
431  VRegs = std::move(Remerge);
432  return LCMTy;
433 }
434 
435 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
436  ArrayRef<Register> RemergeRegs) {
437  LLT DstTy = MRI.getType(DstReg);
438 
439  // Create the merge to the widened source, and extract the relevant bits into
440  // the result.
441 
442  if (DstTy == LCMTy) {
443  MIRBuilder.buildMerge(DstReg, RemergeRegs);
444  return;
445  }
446 
447  auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
448  if (DstTy.isScalar() && LCMTy.isScalar()) {
449  MIRBuilder.buildTrunc(DstReg, Remerge);
450  return;
451  }
452 
453  if (LCMTy.isVector()) {
454  unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
455  SmallVector<Register, 8> UnmergeDefs(NumDefs);
456  UnmergeDefs[0] = DstReg;
457  for (unsigned I = 1; I != NumDefs; ++I)
458  UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
459 
460  MIRBuilder.buildUnmerge(UnmergeDefs,
461  MIRBuilder.buildMerge(LCMTy, RemergeRegs));
462  return;
463  }
464 
465  llvm_unreachable("unhandled case");
466 }
467 
468 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
469 #define RTLIBCASE_INT(LibcallPrefix) \
470  do { \
471  switch (Size) { \
472  case 32: \
473  return RTLIB::LibcallPrefix##32; \
474  case 64: \
475  return RTLIB::LibcallPrefix##64; \
476  case 128: \
477  return RTLIB::LibcallPrefix##128; \
478  default: \
479  llvm_unreachable("unexpected size"); \
480  } \
481  } while (0)
482 
483 #define RTLIBCASE(LibcallPrefix) \
484  do { \
485  switch (Size) { \
486  case 32: \
487  return RTLIB::LibcallPrefix##32; \
488  case 64: \
489  return RTLIB::LibcallPrefix##64; \
490  case 80: \
491  return RTLIB::LibcallPrefix##80; \
492  case 128: \
493  return RTLIB::LibcallPrefix##128; \
494  default: \
495  llvm_unreachable("unexpected size"); \
496  } \
497  } while (0)
498 
499  switch (Opcode) {
500  case TargetOpcode::G_SDIV:
501  RTLIBCASE_INT(SDIV_I);
502  case TargetOpcode::G_UDIV:
503  RTLIBCASE_INT(UDIV_I);
504  case TargetOpcode::G_SREM:
505  RTLIBCASE_INT(SREM_I);
506  case TargetOpcode::G_UREM:
507  RTLIBCASE_INT(UREM_I);
508  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
509  RTLIBCASE_INT(CTLZ_I);
510  case TargetOpcode::G_FADD:
511  RTLIBCASE(ADD_F);
512  case TargetOpcode::G_FSUB:
513  RTLIBCASE(SUB_F);
514  case TargetOpcode::G_FMUL:
515  RTLIBCASE(MUL_F);
516  case TargetOpcode::G_FDIV:
517  RTLIBCASE(DIV_F);
518  case TargetOpcode::G_FEXP:
519  RTLIBCASE(EXP_F);
520  case TargetOpcode::G_FEXP2:
521  RTLIBCASE(EXP2_F);
522  case TargetOpcode::G_FREM:
523  RTLIBCASE(REM_F);
524  case TargetOpcode::G_FPOW:
525  RTLIBCASE(POW_F);
526  case TargetOpcode::G_FMA:
527  RTLIBCASE(FMA_F);
528  case TargetOpcode::G_FSIN:
529  RTLIBCASE(SIN_F);
530  case TargetOpcode::G_FCOS:
531  RTLIBCASE(COS_F);
532  case TargetOpcode::G_FLOG10:
533  RTLIBCASE(LOG10_F);
534  case TargetOpcode::G_FLOG:
535  RTLIBCASE(LOG_F);
536  case TargetOpcode::G_FLOG2:
537  RTLIBCASE(LOG2_F);
538  case TargetOpcode::G_FCEIL:
539  RTLIBCASE(CEIL_F);
540  case TargetOpcode::G_FFLOOR:
541  RTLIBCASE(FLOOR_F);
542  case TargetOpcode::G_FMINNUM:
543  RTLIBCASE(FMIN_F);
544  case TargetOpcode::G_FMAXNUM:
545  RTLIBCASE(FMAX_F);
546  case TargetOpcode::G_FSQRT:
547  RTLIBCASE(SQRT_F);
548  case TargetOpcode::G_FRINT:
549  RTLIBCASE(RINT_F);
550  case TargetOpcode::G_FNEARBYINT:
551  RTLIBCASE(NEARBYINT_F);
552  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
553  RTLIBCASE(ROUNDEVEN_F);
554  }
555  llvm_unreachable("Unknown libcall function");
556 }
557 
558 /// True if an instruction is in tail position in its caller. Intended for
559 /// legalizing libcalls as tail calls when possible.
561  const TargetInstrInfo &TII,
563  MachineBasicBlock &MBB = *MI.getParent();
564  const Function &F = MBB.getParent()->getFunction();
565 
566  // Conservatively require the attributes of the call to match those of
567  // the return. Ignore NoAlias and NonNull because they don't affect the
568  // call sequence.
569  AttributeList CallerAttrs = F.getAttributes();
570  if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
571  .removeAttribute(Attribute::NoAlias)
572  .removeAttribute(Attribute::NonNull)
573  .hasAttributes())
574  return false;
575 
576  // It's not safe to eliminate the sign / zero extension of the return value.
577  if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
578  CallerAttrs.hasRetAttr(Attribute::SExt))
579  return false;
580 
581  // Only tail call if the following instruction is a standard return or if we
582  // have a `thisreturn` callee, and a sequence like:
583  //
584  // G_MEMCPY %0, %1, %2
585  // $x0 = COPY %0
586  // RET_ReallyLR implicit $x0
587  auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
588  if (Next != MBB.instr_end() && Next->isCopy()) {
589  switch (MI.getOpcode()) {
590  default:
591  llvm_unreachable("unsupported opcode");
592  case TargetOpcode::G_BZERO:
593  return false;
594  case TargetOpcode::G_MEMCPY:
595  case TargetOpcode::G_MEMMOVE:
596  case TargetOpcode::G_MEMSET:
597  break;
598  }
599 
600  Register VReg = MI.getOperand(0).getReg();
601  if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
602  return false;
603 
604  Register PReg = Next->getOperand(0).getReg();
605  if (!PReg.isPhysical())
606  return false;
607 
608  auto Ret = next_nodbg(Next, MBB.instr_end());
609  if (Ret == MBB.instr_end() || !Ret->isReturn())
610  return false;
611 
612  if (Ret->getNumImplicitOperands() != 1)
613  return false;
614 
615  if (PReg != Ret->getOperand(0).getReg())
616  return false;
617 
618  // Skip over the COPY that we just validated.
619  Next = Ret;
620  }
621 
622  if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
623  return false;
624 
625  return true;
626 }
627 
629 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
630  const CallLowering::ArgInfo &Result,
632  const CallingConv::ID CC) {
633  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
634 
636  Info.CallConv = CC;
638  Info.OrigRet = Result;
639  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
640  if (!CLI.lowerCall(MIRBuilder, Info))
642 
644 }
645 
648  const CallLowering::ArgInfo &Result,
650  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
651  const char *Name = TLI.getLibcallName(Libcall);
652  const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
653  return createLibcall(MIRBuilder, Name, Result, Args, CC);
654 }
655 
656 // Useful for libcalls where all operands have the same type.
658 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
659  Type *OpType) {
660  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
661 
662  // FIXME: What does the original arg index mean here?
664  for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
665  Args.push_back({MO.getReg(), OpType, 0});
666  return createLibcall(MIRBuilder, Libcall,
667  {MI.getOperand(0).getReg(), OpType, 0}, Args);
668 }
669 
672  MachineInstr &MI, LostDebugLocObserver &LocObserver) {
673  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
674 
676  // Add all the args, except for the last which is an imm denoting 'tail'.
677  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
678  Register Reg = MI.getOperand(i).getReg();
679 
680  // Need derive an IR type for call lowering.
681  LLT OpLLT = MRI.getType(Reg);
682  Type *OpTy = nullptr;
683  if (OpLLT.isPointer())
684  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
685  else
686  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
687  Args.push_back({Reg, OpTy, 0});
688  }
689 
690  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
691  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
692  RTLIB::Libcall RTLibcall;
693  unsigned Opc = MI.getOpcode();
694  switch (Opc) {
695  case TargetOpcode::G_BZERO:
696  RTLibcall = RTLIB::BZERO;
697  break;
698  case TargetOpcode::G_MEMCPY:
699  RTLibcall = RTLIB::MEMCPY;
700  Args[0].Flags[0].setReturned();
701  break;
702  case TargetOpcode::G_MEMMOVE:
703  RTLibcall = RTLIB::MEMMOVE;
704  Args[0].Flags[0].setReturned();
705  break;
706  case TargetOpcode::G_MEMSET:
707  RTLibcall = RTLIB::MEMSET;
708  Args[0].Flags[0].setReturned();
709  break;
710  default:
711  llvm_unreachable("unsupported opcode");
712  }
713  const char *Name = TLI.getLibcallName(RTLibcall);
714 
715  // Unsupported libcall on the target.
716  if (!Name) {
717  LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
718  << MIRBuilder.getTII().getName(Opc) << "\n");
720  }
721 
723  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
725  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
726  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
727  isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
728 
729  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
730  if (!CLI.lowerCall(MIRBuilder, Info))
732 
733  if (Info.LoweredTailCall) {
734  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
735 
736  // Check debug locations before removing the return.
737  LocObserver.checkpoint(true);
738 
739  // We must have a return following the call (or debug insts) to get past
740  // isLibCallInTailPosition.
741  do {
742  MachineInstr *Next = MI.getNextNode();
743  assert(Next &&
744  (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
745  "Expected instr following MI to be return or debug inst?");
746  // We lowered a tail call, so the call is now the return from the block.
747  // Delete the old return.
748  Next->eraseFromParent();
749  } while (MI.getNextNode());
750 
751  // We expect to lose the debug location from the return.
752  LocObserver.checkpoint(false);
753  }
754 
756 }
757 
758 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
759  Type *FromType) {
760  auto ToMVT = MVT::getVT(ToType);
761  auto FromMVT = MVT::getVT(FromType);
762 
763  switch (Opcode) {
764  case TargetOpcode::G_FPEXT:
765  return RTLIB::getFPEXT(FromMVT, ToMVT);
766  case TargetOpcode::G_FPTRUNC:
767  return RTLIB::getFPROUND(FromMVT, ToMVT);
768  case TargetOpcode::G_FPTOSI:
769  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
770  case TargetOpcode::G_FPTOUI:
771  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
772  case TargetOpcode::G_SITOFP:
773  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
774  case TargetOpcode::G_UITOFP:
775  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
776  }
777  llvm_unreachable("Unsupported libcall function");
778 }
779 
782  Type *FromType) {
783  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
784  return createLibcall(MIRBuilder, Libcall,
785  {MI.getOperand(0).getReg(), ToType, 0},
786  {{MI.getOperand(1).getReg(), FromType, 0}});
787 }
788 
791  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
792  unsigned Size = LLTy.getSizeInBits();
793  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
794 
795  switch (MI.getOpcode()) {
796  default:
797  return UnableToLegalize;
798  case TargetOpcode::G_SDIV:
799  case TargetOpcode::G_UDIV:
800  case TargetOpcode::G_SREM:
801  case TargetOpcode::G_UREM:
802  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
803  Type *HLTy = IntegerType::get(Ctx, Size);
804  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
805  if (Status != Legalized)
806  return Status;
807  break;
808  }
809  case TargetOpcode::G_FADD:
810  case TargetOpcode::G_FSUB:
811  case TargetOpcode::G_FMUL:
812  case TargetOpcode::G_FDIV:
813  case TargetOpcode::G_FMA:
814  case TargetOpcode::G_FPOW:
815  case TargetOpcode::G_FREM:
816  case TargetOpcode::G_FCOS:
817  case TargetOpcode::G_FSIN:
818  case TargetOpcode::G_FLOG10:
819  case TargetOpcode::G_FLOG:
820  case TargetOpcode::G_FLOG2:
821  case TargetOpcode::G_FEXP:
822  case TargetOpcode::G_FEXP2:
823  case TargetOpcode::G_FCEIL:
824  case TargetOpcode::G_FFLOOR:
825  case TargetOpcode::G_FMINNUM:
826  case TargetOpcode::G_FMAXNUM:
827  case TargetOpcode::G_FSQRT:
828  case TargetOpcode::G_FRINT:
829  case TargetOpcode::G_FNEARBYINT:
830  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
831  Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
832  if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
833  LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
834  return UnableToLegalize;
835  }
836  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
837  if (Status != Legalized)
838  return Status;
839  break;
840  }
841  case TargetOpcode::G_FPEXT:
842  case TargetOpcode::G_FPTRUNC: {
843  Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
844  Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
845  if (!FromTy || !ToTy)
846  return UnableToLegalize;
848  if (Status != Legalized)
849  return Status;
850  break;
851  }
852  case TargetOpcode::G_FPTOSI:
853  case TargetOpcode::G_FPTOUI: {
854  // FIXME: Support other types
855  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
856  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
857  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
858  return UnableToLegalize;
860  MI, MIRBuilder,
861  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
862  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
863  if (Status != Legalized)
864  return Status;
865  break;
866  }
867  case TargetOpcode::G_SITOFP:
868  case TargetOpcode::G_UITOFP: {
869  // FIXME: Support other types
870  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
871  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
872  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
873  return UnableToLegalize;
875  MI, MIRBuilder,
876  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
877  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
878  if (Status != Legalized)
879  return Status;
880  break;
881  }
882  case TargetOpcode::G_BZERO:
883  case TargetOpcode::G_MEMCPY:
884  case TargetOpcode::G_MEMMOVE:
885  case TargetOpcode::G_MEMSET: {
886  LegalizeResult Result =
887  createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
888  if (Result != Legalized)
889  return Result;
890  MI.eraseFromParent();
891  return Result;
892  }
893  }
894 
895  MI.eraseFromParent();
896  return Legalized;
897 }
898 
900  unsigned TypeIdx,
901  LLT NarrowTy) {
902  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
903  uint64_t NarrowSize = NarrowTy.getSizeInBits();
904 
905  switch (MI.getOpcode()) {
906  default:
907  return UnableToLegalize;
908  case TargetOpcode::G_IMPLICIT_DEF: {
909  Register DstReg = MI.getOperand(0).getReg();
910  LLT DstTy = MRI.getType(DstReg);
911 
912  // If SizeOp0 is not an exact multiple of NarrowSize, emit
913  // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
914  // FIXME: Although this would also be legal for the general case, it causes
915  // a lot of regressions in the emitted code (superfluous COPYs, artifact
916  // combines not being hit). This seems to be a problem related to the
917  // artifact combiner.
918  if (SizeOp0 % NarrowSize != 0) {
919  LLT ImplicitTy = NarrowTy;
920  if (DstTy.isVector())
921  ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
922 
923  Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
924  MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
925 
926  MI.eraseFromParent();
927  return Legalized;
928  }
929 
930  int NumParts = SizeOp0 / NarrowSize;
931 
932  SmallVector<Register, 2> DstRegs;
933  for (int i = 0; i < NumParts; ++i)
934  DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
935 
936  if (DstTy.isVector())
937  MIRBuilder.buildBuildVector(DstReg, DstRegs);
938  else
939  MIRBuilder.buildMerge(DstReg, DstRegs);
940  MI.eraseFromParent();
941  return Legalized;
942  }
943  case TargetOpcode::G_CONSTANT: {
944  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
945  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
946  unsigned TotalSize = Ty.getSizeInBits();
947  unsigned NarrowSize = NarrowTy.getSizeInBits();
948  int NumParts = TotalSize / NarrowSize;
949 
950  SmallVector<Register, 4> PartRegs;
951  for (int I = 0; I != NumParts; ++I) {
952  unsigned Offset = I * NarrowSize;
953  auto K = MIRBuilder.buildConstant(NarrowTy,
954  Val.lshr(Offset).trunc(NarrowSize));
955  PartRegs.push_back(K.getReg(0));
956  }
957 
958  LLT LeftoverTy;
959  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
960  SmallVector<Register, 1> LeftoverRegs;
961  if (LeftoverBits != 0) {
962  LeftoverTy = LLT::scalar(LeftoverBits);
963  auto K = MIRBuilder.buildConstant(
964  LeftoverTy,
965  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
966  LeftoverRegs.push_back(K.getReg(0));
967  }
968 
969  insertParts(MI.getOperand(0).getReg(),
970  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
971 
972  MI.eraseFromParent();
973  return Legalized;
974  }
975  case TargetOpcode::G_SEXT:
976  case TargetOpcode::G_ZEXT:
977  case TargetOpcode::G_ANYEXT:
978  return narrowScalarExt(MI, TypeIdx, NarrowTy);
979  case TargetOpcode::G_TRUNC: {
980  if (TypeIdx != 1)
981  return UnableToLegalize;
982 
983  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
984  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
985  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
986  return UnableToLegalize;
987  }
988 
989  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
990  MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
991  MI.eraseFromParent();
992  return Legalized;
993  }
994 
995  case TargetOpcode::G_FREEZE: {
996  if (TypeIdx != 0)
997  return UnableToLegalize;
998 
999  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1000  // Should widen scalar first
1001  if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1002  return UnableToLegalize;
1003 
1004  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1006  for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1007  Parts.push_back(
1008  MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1009  }
1010 
1011  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
1012  MI.eraseFromParent();
1013  return Legalized;
1014  }
1015  case TargetOpcode::G_ADD:
1016  case TargetOpcode::G_SUB:
1017  case TargetOpcode::G_SADDO:
1018  case TargetOpcode::G_SSUBO:
1019  case TargetOpcode::G_SADDE:
1020  case TargetOpcode::G_SSUBE:
1021  case TargetOpcode::G_UADDO:
1022  case TargetOpcode::G_USUBO:
1023  case TargetOpcode::G_UADDE:
1024  case TargetOpcode::G_USUBE:
1025  return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1026  case TargetOpcode::G_MUL:
1027  case TargetOpcode::G_UMULH:
1028  return narrowScalarMul(MI, NarrowTy);
1029  case TargetOpcode::G_EXTRACT:
1030  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1031  case TargetOpcode::G_INSERT:
1032  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1033  case TargetOpcode::G_LOAD: {
1034  auto &LoadMI = cast<GLoad>(MI);
1035  Register DstReg = LoadMI.getDstReg();
1036  LLT DstTy = MRI.getType(DstReg);
1037  if (DstTy.isVector())
1038  return UnableToLegalize;
1039 
1040  if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
1041  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1042  MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1043  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1044  LoadMI.eraseFromParent();
1045  return Legalized;
1046  }
1047 
1048  return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1049  }
1050  case TargetOpcode::G_ZEXTLOAD:
1051  case TargetOpcode::G_SEXTLOAD: {
1052  auto &LoadMI = cast<GExtLoad>(MI);
1053  Register DstReg = LoadMI.getDstReg();
1054  Register PtrReg = LoadMI.getPointerReg();
1055 
1056  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1057  auto &MMO = LoadMI.getMMO();
1058  unsigned MemSize = MMO.getSizeInBits();
1059 
1060  if (MemSize == NarrowSize) {
1061  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1062  } else if (MemSize < NarrowSize) {
1063  MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1064  } else if (MemSize > NarrowSize) {
1065  // FIXME: Need to split the load.
1066  return UnableToLegalize;
1067  }
1068 
1069  if (isa<GZExtLoad>(LoadMI))
1070  MIRBuilder.buildZExt(DstReg, TmpReg);
1071  else
1072  MIRBuilder.buildSExt(DstReg, TmpReg);
1073 
1074  LoadMI.eraseFromParent();
1075  return Legalized;
1076  }
1077  case TargetOpcode::G_STORE: {
1078  auto &StoreMI = cast<GStore>(MI);
1079 
1080  Register SrcReg = StoreMI.getValueReg();
1081  LLT SrcTy = MRI.getType(SrcReg);
1082  if (SrcTy.isVector())
1083  return UnableToLegalize;
1084 
1085  int NumParts = SizeOp0 / NarrowSize;
1086  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1087  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1088  if (SrcTy.isVector() && LeftoverBits != 0)
1089  return UnableToLegalize;
1090 
1091  if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
1092  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1093  MIRBuilder.buildTrunc(TmpReg, SrcReg);
1094  MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1095  StoreMI.eraseFromParent();
1096  return Legalized;
1097  }
1098 
1099  return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1100  }
1101  case TargetOpcode::G_SELECT:
1102  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1103  case TargetOpcode::G_AND:
1104  case TargetOpcode::G_OR:
1105  case TargetOpcode::G_XOR: {
1106  // Legalize bitwise operation:
1107  // A = BinOp<Ty> B, C
1108  // into:
1109  // B1, ..., BN = G_UNMERGE_VALUES B
1110  // C1, ..., CN = G_UNMERGE_VALUES C
1111  // A1 = BinOp<Ty/N> B1, C2
1112  // ...
1113  // AN = BinOp<Ty/N> BN, CN
1114  // A = G_MERGE_VALUES A1, ..., AN
1115  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1116  }
1117  case TargetOpcode::G_SHL:
1118  case TargetOpcode::G_LSHR:
1119  case TargetOpcode::G_ASHR:
1120  return narrowScalarShift(MI, TypeIdx, NarrowTy);
1121  case TargetOpcode::G_CTLZ:
1122  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1123  case TargetOpcode::G_CTTZ:
1124  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1125  case TargetOpcode::G_CTPOP:
1126  if (TypeIdx == 1)
1127  switch (MI.getOpcode()) {
1128  case TargetOpcode::G_CTLZ:
1129  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1130  return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1131  case TargetOpcode::G_CTTZ:
1132  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1133  return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1134  case TargetOpcode::G_CTPOP:
1135  return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1136  default:
1137  return UnableToLegalize;
1138  }
1139 
1141  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1143  return Legalized;
1144  case TargetOpcode::G_INTTOPTR:
1145  if (TypeIdx != 1)
1146  return UnableToLegalize;
1147 
1149  narrowScalarSrc(MI, NarrowTy, 1);
1151  return Legalized;
1152  case TargetOpcode::G_PTRTOINT:
1153  if (TypeIdx != 0)
1154  return UnableToLegalize;
1155 
1157  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1159  return Legalized;
1160  case TargetOpcode::G_PHI: {
1161  // FIXME: add support for when SizeOp0 isn't an exact multiple of
1162  // NarrowSize.
1163  if (SizeOp0 % NarrowSize != 0)
1164  return UnableToLegalize;
1165 
1166  unsigned NumParts = SizeOp0 / NarrowSize;
1167  SmallVector<Register, 2> DstRegs(NumParts);
1168  SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1170  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1171  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1172  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1173  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1174  SrcRegs[i / 2]);
1175  }
1176  MachineBasicBlock &MBB = *MI.getParent();
1178  for (unsigned i = 0; i < NumParts; ++i) {
1179  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1180  MachineInstrBuilder MIB =
1181  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1182  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1183  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1184  }
1186  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1188  MI.eraseFromParent();
1189  return Legalized;
1190  }
1191  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1192  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1193  if (TypeIdx != 2)
1194  return UnableToLegalize;
1195 
1196  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1198  narrowScalarSrc(MI, NarrowTy, OpIdx);
1200  return Legalized;
1201  }
1202  case TargetOpcode::G_ICMP: {
1203  Register LHS = MI.getOperand(2).getReg();
1204  LLT SrcTy = MRI.getType(LHS);
1205  uint64_t SrcSize = SrcTy.getSizeInBits();
1206  CmpInst::Predicate Pred =
1207  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1208 
1209  // TODO: Handle the non-equality case for weird sizes.
1210  if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1211  return UnableToLegalize;
1212 
1213  LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1214  SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1215  if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1216  LHSLeftoverRegs))
1217  return UnableToLegalize;
1218 
1219  LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1220  SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1221  if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1222  RHSPartRegs, RHSLeftoverRegs))
1223  return UnableToLegalize;
1224 
1225  // We now have the LHS and RHS of the compare split into narrow-type
1226  // registers, plus potentially some leftover type.
1227  Register Dst = MI.getOperand(0).getReg();
1228  LLT ResTy = MRI.getType(Dst);
1229  if (ICmpInst::isEquality(Pred)) {
1230  // For each part on the LHS and RHS, keep track of the result of XOR-ing
1231  // them together. For each equal part, the result should be all 0s. For
1232  // each non-equal part, we'll get at least one 1.
1233  auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1235  for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1236  auto LHS = std::get<0>(LHSAndRHS);
1237  auto RHS = std::get<1>(LHSAndRHS);
1238  auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1239  Xors.push_back(Xor);
1240  }
1241 
1242  // Build a G_XOR for each leftover register. Each G_XOR must be widened
1243  // to the desired narrow type so that we can OR them together later.
1244  SmallVector<Register, 4> WidenedXors;
1245  for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1246  auto LHS = std::get<0>(LHSAndRHS);
1247  auto RHS = std::get<1>(LHSAndRHS);
1248  auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1249  LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1250  buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1251  /* PadStrategy = */ TargetOpcode::G_ZEXT);
1252  Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1253  }
1254 
1255  // Now, for each part we broke up, we know if they are equal/not equal
1256  // based off the G_XOR. We can OR these all together and compare against
1257  // 0 to get the result.
1258  assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1259  auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1260  for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1261  Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1262  MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1263  } else {
1264  // TODO: Handle non-power-of-two types.
1265  assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1266  assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1267  Register LHSL = LHSPartRegs[0];
1268  Register LHSH = LHSPartRegs[1];
1269  Register RHSL = RHSPartRegs[0];
1270  Register RHSH = RHSPartRegs[1];
1271  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1272  MachineInstrBuilder CmpHEQ =
1273  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1275  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1276  MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1277  }
1278  MI.eraseFromParent();
1279  return Legalized;
1280  }
1281  case TargetOpcode::G_SEXT_INREG: {
1282  if (TypeIdx != 0)
1283  return UnableToLegalize;
1284 
1285  int64_t SizeInBits = MI.getOperand(2).getImm();
1286 
1287  // So long as the new type has more bits than the bits we're extending we
1288  // don't need to break it apart.
1289  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1291  // We don't lose any non-extension bits by truncating the src and
1292  // sign-extending the dst.
1293  MachineOperand &MO1 = MI.getOperand(1);
1294  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1295  MO1.setReg(TruncMIB.getReg(0));
1296 
1297  MachineOperand &MO2 = MI.getOperand(0);
1298  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1300  MIRBuilder.buildSExt(MO2, DstExt);
1301  MO2.setReg(DstExt);
1303  return Legalized;
1304  }
1305 
1306  // Break it apart. Components below the extension point are unmodified. The
1307  // component containing the extension point becomes a narrower SEXT_INREG.
1308  // Components above it are ashr'd from the component containing the
1309  // extension point.
1310  if (SizeOp0 % NarrowSize != 0)
1311  return UnableToLegalize;
1312  int NumParts = SizeOp0 / NarrowSize;
1313 
1314  // List the registers where the destination will be scattered.
1315  SmallVector<Register, 2> DstRegs;
1316  // List the registers where the source will be split.
1317  SmallVector<Register, 2> SrcRegs;
1318 
1319  // Create all the temporary registers.
1320  for (int i = 0; i < NumParts; ++i) {
1321  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1322 
1323  SrcRegs.push_back(SrcReg);
1324  }
1325 
1326  // Explode the big arguments into smaller chunks.
1327  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1328 
1329  Register AshrCstReg =
1330  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1331  .getReg(0);
1332  Register FullExtensionReg = 0;
1333  Register PartialExtensionReg = 0;
1334 
1335  // Do the operation on each small part.
1336  for (int i = 0; i < NumParts; ++i) {
1337  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1338  DstRegs.push_back(SrcRegs[i]);
1339  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1340  assert(PartialExtensionReg &&
1341  "Expected to visit partial extension before full");
1342  if (FullExtensionReg) {
1343  DstRegs.push_back(FullExtensionReg);
1344  continue;
1345  }
1346  DstRegs.push_back(
1347  MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1348  .getReg(0));
1349  FullExtensionReg = DstRegs.back();
1350  } else {
1351  DstRegs.push_back(
1352  MIRBuilder
1353  .buildInstr(
1354  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1355  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1356  .getReg(0));
1357  PartialExtensionReg = DstRegs.back();
1358  }
1359  }
1360 
1361  // Gather the destination registers into the final destination.
1362  Register DstReg = MI.getOperand(0).getReg();
1363  MIRBuilder.buildMerge(DstReg, DstRegs);
1364  MI.eraseFromParent();
1365  return Legalized;
1366  }
1367  case TargetOpcode::G_BSWAP:
1368  case TargetOpcode::G_BITREVERSE: {
1369  if (SizeOp0 % NarrowSize != 0)
1370  return UnableToLegalize;
1371 
1373  SmallVector<Register, 2> SrcRegs, DstRegs;
1374  unsigned NumParts = SizeOp0 / NarrowSize;
1375  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1376 
1377  for (unsigned i = 0; i < NumParts; ++i) {
1378  auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1379  {SrcRegs[NumParts - 1 - i]});
1380  DstRegs.push_back(DstPart.getReg(0));
1381  }
1382 
1383  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1384 
1386  MI.eraseFromParent();
1387  return Legalized;
1388  }
1389  case TargetOpcode::G_PTR_ADD:
1390  case TargetOpcode::G_PTRMASK: {
1391  if (TypeIdx != 1)
1392  return UnableToLegalize;
1394  narrowScalarSrc(MI, NarrowTy, 2);
1396  return Legalized;
1397  }
1398  case TargetOpcode::G_FPTOUI:
1399  case TargetOpcode::G_FPTOSI:
1400  return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1401  case TargetOpcode::G_FPEXT:
1402  if (TypeIdx != 0)
1403  return UnableToLegalize;
1405  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1407  return Legalized;
1408  }
1409 }
1410 
1412  LLT Ty = MRI.getType(Val);
1413  if (Ty.isScalar())
1414  return Val;
1415 
1417  LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1418  if (Ty.isPointer()) {
1419  if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1420  return Register();
1421  return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1422  }
1423 
1424  Register NewVal = Val;
1425 
1426  assert(Ty.isVector());
1427  LLT EltTy = Ty.getElementType();
1428  if (EltTy.isPointer())
1429  NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1430  return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1431 }
1432 
1434  unsigned OpIdx, unsigned ExtOpcode) {
1435  MachineOperand &MO = MI.getOperand(OpIdx);
1436  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1437  MO.setReg(ExtB.getReg(0));
1438 }
1439 
1441  unsigned OpIdx) {
1442  MachineOperand &MO = MI.getOperand(OpIdx);
1443  auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1444  MO.setReg(ExtB.getReg(0));
1445 }
1446 
1448  unsigned OpIdx, unsigned TruncOpcode) {
1449  MachineOperand &MO = MI.getOperand(OpIdx);
1450  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1452  MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1453  MO.setReg(DstExt);
1454 }
1455 
1457  unsigned OpIdx, unsigned ExtOpcode) {
1458  MachineOperand &MO = MI.getOperand(OpIdx);
1459  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1461  MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1462  MO.setReg(DstTrunc);
1463 }
1464 
1466  unsigned OpIdx) {
1467  MachineOperand &MO = MI.getOperand(OpIdx);
1469  Register Dst = MO.getReg();
1470  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1471  MO.setReg(DstExt);
1473 }
1474 
1476  unsigned OpIdx) {
1477  MachineOperand &MO = MI.getOperand(OpIdx);
1480 }
1481 
1482 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1483  MachineOperand &Op = MI.getOperand(OpIdx);
1484  Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1485 }
1486 
1487 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1488  MachineOperand &MO = MI.getOperand(OpIdx);
1489  Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1491  MIRBuilder.buildBitcast(MO, CastDst);
1492  MO.setReg(CastDst);
1493 }
1494 
1496 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1497  LLT WideTy) {
1498  if (TypeIdx != 1)
1499  return UnableToLegalize;
1500 
1501  Register DstReg = MI.getOperand(0).getReg();
1502  LLT DstTy = MRI.getType(DstReg);
1503  if (DstTy.isVector())
1504  return UnableToLegalize;
1505 
1506  Register Src1 = MI.getOperand(1).getReg();
1507  LLT SrcTy = MRI.getType(Src1);
1508  const int DstSize = DstTy.getSizeInBits();
1509  const int SrcSize = SrcTy.getSizeInBits();
1510  const int WideSize = WideTy.getSizeInBits();
1511  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1512 
1513  unsigned NumOps = MI.getNumOperands();
1514  unsigned NumSrc = MI.getNumOperands() - 1;
1515  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1516 
1517  if (WideSize >= DstSize) {
1518  // Directly pack the bits in the target type.
1519  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1520 
1521  for (unsigned I = 2; I != NumOps; ++I) {
1522  const unsigned Offset = (I - 1) * PartSize;
1523 
1524  Register SrcReg = MI.getOperand(I).getReg();
1525  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1526 
1527  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1528 
1529  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1530  MRI.createGenericVirtualRegister(WideTy);
1531 
1532  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1533  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1534  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1535  ResultReg = NextResult;
1536  }
1537 
1538  if (WideSize > DstSize)
1539  MIRBuilder.buildTrunc(DstReg, ResultReg);
1540  else if (DstTy.isPointer())
1541  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1542 
1543  MI.eraseFromParent();
1544  return Legalized;
1545  }
1546 
1547  // Unmerge the original values to the GCD type, and recombine to the next
1548  // multiple greater than the original type.
1549  //
1550  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1551  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1552  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1553  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1554  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1555  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1556  // %12:_(s12) = G_MERGE_VALUES %10, %11
1557  //
1558  // Padding with undef if necessary:
1559  //
1560  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1561  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1562  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1563  // %7:_(s2) = G_IMPLICIT_DEF
1564  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1565  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1566  // %10:_(s12) = G_MERGE_VALUES %8, %9
1567 
1568  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1569  LLT GCDTy = LLT::scalar(GCD);
1570 
1572  SmallVector<Register, 8> NewMergeRegs;
1573  SmallVector<Register, 8> Unmerges;
1574  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1575 
1576  // Decompose the original operands if they don't evenly divide.
1577  for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1578  Register SrcReg = MO.getReg();
1579  if (GCD == SrcSize) {
1580  Unmerges.push_back(SrcReg);
1581  } else {
1582  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1583  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1584  Unmerges.push_back(Unmerge.getReg(J));
1585  }
1586  }
1587 
1588  // Pad with undef to the next size that is a multiple of the requested size.
1589  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1590  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1591  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1592  Unmerges.push_back(UndefReg);
1593  }
1594 
1595  const int PartsPerGCD = WideSize / GCD;
1596 
1597  // Build merges of each piece.
1598  ArrayRef<Register> Slicer(Unmerges);
1599  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1600  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1601  NewMergeRegs.push_back(Merge.getReg(0));
1602  }
1603 
1604  // A truncate may be necessary if the requested type doesn't evenly divide the
1605  // original result type.
1606  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1607  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1608  } else {
1609  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1610  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1611  }
1612 
1613  MI.eraseFromParent();
1614  return Legalized;
1615 }
1616 
1618 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1619  LLT WideTy) {
1620  if (TypeIdx != 0)
1621  return UnableToLegalize;
1622 
1623  int NumDst = MI.getNumOperands() - 1;
1624  Register SrcReg = MI.getOperand(NumDst).getReg();
1625  LLT SrcTy = MRI.getType(SrcReg);
1626  if (SrcTy.isVector())
1627  return UnableToLegalize;
1628 
1629  Register Dst0Reg = MI.getOperand(0).getReg();
1630  LLT DstTy = MRI.getType(Dst0Reg);
1631  if (!DstTy.isScalar())
1632  return UnableToLegalize;
1633 
1634  if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1635  if (SrcTy.isPointer()) {
1637  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1638  LLVM_DEBUG(
1639  dbgs() << "Not casting non-integral address space integer\n");
1640  return UnableToLegalize;
1641  }
1642 
1643  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1644  SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1645  }
1646 
1647  // Widen SrcTy to WideTy. This does not affect the result, but since the
1648  // user requested this size, it is probably better handled than SrcTy and
1649  // should reduce the total number of legalization artifacts.
1650  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1651  SrcTy = WideTy;
1652  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1653  }
1654 
1655  // Theres no unmerge type to target. Directly extract the bits from the
1656  // source type
1657  unsigned DstSize = DstTy.getSizeInBits();
1658 
1659  MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1660  for (int I = 1; I != NumDst; ++I) {
1661  auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1662  auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1663  MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1664  }
1665 
1666  MI.eraseFromParent();
1667  return Legalized;
1668  }
1669 
1670  // Extend the source to a wider type.
1671  LLT LCMTy = getLCMType(SrcTy, WideTy);
1672 
1673  Register WideSrc = SrcReg;
1674  if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1675  // TODO: If this is an integral address space, cast to integer and anyext.
1676  if (SrcTy.isPointer()) {
1677  LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1678  return UnableToLegalize;
1679  }
1680 
1681  WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1682  }
1683 
1684  auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1685 
1686  // Create a sequence of unmerges and merges to the original results. Since we
1687  // may have widened the source, we will need to pad the results with dead defs
1688  // to cover the source register.
1689  // e.g. widen s48 to s64:
1690  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1691  //
1692  // =>
1693  // %4:_(s192) = G_ANYEXT %0:_(s96)
1694  // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1695  // ; unpack to GCD type, with extra dead defs
1696  // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1697  // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1698  // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1699  // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1700  // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1701  const LLT GCDTy = getGCDType(WideTy, DstTy);
1702  const int NumUnmerge = Unmerge->getNumOperands() - 1;
1703  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1704 
1705  // Directly unmerge to the destination without going through a GCD type
1706  // if possible
1707  if (PartsPerRemerge == 1) {
1708  const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1709 
1710  for (int I = 0; I != NumUnmerge; ++I) {
1711  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1712 
1713  for (int J = 0; J != PartsPerUnmerge; ++J) {
1714  int Idx = I * PartsPerUnmerge + J;
1715  if (Idx < NumDst)
1716  MIB.addDef(MI.getOperand(Idx).getReg());
1717  else {
1718  // Create dead def for excess components.
1719  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1720  }
1721  }
1722 
1723  MIB.addUse(Unmerge.getReg(I));
1724  }
1725  } else {
1727  for (int J = 0; J != NumUnmerge; ++J)
1728  extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1729 
1730  SmallVector<Register, 8> RemergeParts;
1731  for (int I = 0; I != NumDst; ++I) {
1732  for (int J = 0; J < PartsPerRemerge; ++J) {
1733  const int Idx = I * PartsPerRemerge + J;
1734  RemergeParts.emplace_back(Parts[Idx]);
1735  }
1736 
1737  MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1738  RemergeParts.clear();
1739  }
1740  }
1741 
1742  MI.eraseFromParent();
1743  return Legalized;
1744 }
1745 
1747 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1748  LLT WideTy) {
1749  Register DstReg = MI.getOperand(0).getReg();
1750  Register SrcReg = MI.getOperand(1).getReg();
1751  LLT SrcTy = MRI.getType(SrcReg);
1752 
1753  LLT DstTy = MRI.getType(DstReg);
1754  unsigned Offset = MI.getOperand(2).getImm();
1755 
1756  if (TypeIdx == 0) {
1757  if (SrcTy.isVector() || DstTy.isVector())
1758  return UnableToLegalize;
1759 
1760  SrcOp Src(SrcReg);
1761  if (SrcTy.isPointer()) {
1762  // Extracts from pointers can be handled only if they are really just
1763  // simple integers.
1765  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1766  return UnableToLegalize;
1767 
1768  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1769  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1770  SrcTy = SrcAsIntTy;
1771  }
1772 
1773  if (DstTy.isPointer())
1774  return UnableToLegalize;
1775 
1776  if (Offset == 0) {
1777  // Avoid a shift in the degenerate case.
1778  MIRBuilder.buildTrunc(DstReg,
1779  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1780  MI.eraseFromParent();
1781  return Legalized;
1782  }
1783 
1784  // Do a shift in the source type.
1785  LLT ShiftTy = SrcTy;
1786  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1787  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1788  ShiftTy = WideTy;
1789  }
1790 
1791  auto LShr = MIRBuilder.buildLShr(
1792  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1793  MIRBuilder.buildTrunc(DstReg, LShr);
1794  MI.eraseFromParent();
1795  return Legalized;
1796  }
1797 
1798  if (SrcTy.isScalar()) {
1800  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1802  return Legalized;
1803  }
1804 
1805  if (!SrcTy.isVector())
1806  return UnableToLegalize;
1807 
1808  if (DstTy != SrcTy.getElementType())
1809  return UnableToLegalize;
1810 
1811  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1812  return UnableToLegalize;
1813 
1815  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1816 
1817  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1818  Offset);
1819  widenScalarDst(MI, WideTy.getScalarType(), 0);
1821  return Legalized;
1822 }
1823 
1825 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1826  LLT WideTy) {
1827  if (TypeIdx != 0 || WideTy.isVector())
1828  return UnableToLegalize;
1830  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1831  widenScalarDst(MI, WideTy);
1833  return Legalized;
1834 }
1835 
1837 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1838  LLT WideTy) {
1839  unsigned Opcode;
1840  unsigned ExtOpcode;
1841  Optional<Register> CarryIn = None;
1842  switch (MI.getOpcode()) {
1843  default:
1844  llvm_unreachable("Unexpected opcode!");
1845  case TargetOpcode::G_SADDO:
1846  Opcode = TargetOpcode::G_ADD;
1847  ExtOpcode = TargetOpcode::G_SEXT;
1848  break;
1849  case TargetOpcode::G_SSUBO:
1850  Opcode = TargetOpcode::G_SUB;
1851  ExtOpcode = TargetOpcode::G_SEXT;
1852  break;
1853  case TargetOpcode::G_UADDO:
1854  Opcode = TargetOpcode::G_ADD;
1855  ExtOpcode = TargetOpcode::G_ZEXT;
1856  break;
1857  case TargetOpcode::G_USUBO:
1858  Opcode = TargetOpcode::G_SUB;
1859  ExtOpcode = TargetOpcode::G_ZEXT;
1860  break;
1861  case TargetOpcode::G_SADDE:
1862  Opcode = TargetOpcode::G_UADDE;
1863  ExtOpcode = TargetOpcode::G_SEXT;
1864  CarryIn = MI.getOperand(4).getReg();
1865  break;
1866  case TargetOpcode::G_SSUBE:
1867  Opcode = TargetOpcode::G_USUBE;
1868  ExtOpcode = TargetOpcode::G_SEXT;
1869  CarryIn = MI.getOperand(4).getReg();
1870  break;
1871  case TargetOpcode::G_UADDE:
1872  Opcode = TargetOpcode::G_UADDE;
1873  ExtOpcode = TargetOpcode::G_ZEXT;
1874  CarryIn = MI.getOperand(4).getReg();
1875  break;
1876  case TargetOpcode::G_USUBE:
1877  Opcode = TargetOpcode::G_USUBE;
1878  ExtOpcode = TargetOpcode::G_ZEXT;
1879  CarryIn = MI.getOperand(4).getReg();
1880  break;
1881  }
1882 
1883  if (TypeIdx == 1) {
1884  unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
1885 
1887  widenScalarDst(MI, WideTy, 1);
1888  if (CarryIn)
1889  widenScalarSrc(MI, WideTy, 4, BoolExtOp);
1890 
1892  return Legalized;
1893  }
1894 
1895  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1896  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1897  // Do the arithmetic in the larger type.
1898  Register NewOp;
1899  if (CarryIn) {
1900  LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1901  NewOp = MIRBuilder
1902  .buildInstr(Opcode, {WideTy, CarryOutTy},
1903  {LHSExt, RHSExt, *CarryIn})
1904  .getReg(0);
1905  } else {
1906  NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1907  }
1908  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1909  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1910  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1911  // There is no overflow if the ExtOp is the same as NewOp.
1912  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1913  // Now trunc the NewOp to the original result.
1914  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1915  MI.eraseFromParent();
1916  return Legalized;
1917 }
1918 
1920 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1921  LLT WideTy) {
1922  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1923  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1924  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1925  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1926  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1927  // We can convert this to:
1928  // 1. Any extend iN to iM
1929  // 2. SHL by M-N
1930  // 3. [US][ADD|SUB|SHL]SAT
1931  // 4. L/ASHR by M-N
1932  //
1933  // It may be more efficient to lower this to a min and a max operation in
1934  // the higher precision arithmetic if the promoted operation isn't legal,
1935  // but this decision is up to the target's lowering request.
1936  Register DstReg = MI.getOperand(0).getReg();
1937 
1938  unsigned NewBits = WideTy.getScalarSizeInBits();
1939  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1940 
1941  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1942  // must not left shift the RHS to preserve the shift amount.
1943  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1944  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1945  : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1946  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1947  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1948  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1949 
1950  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1951  {ShiftL, ShiftR}, MI.getFlags());
1952 
1953  // Use a shift that will preserve the number of sign bits when the trunc is
1954  // folded away.
1955  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1956  : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1957 
1958  MIRBuilder.buildTrunc(DstReg, Result);
1959  MI.eraseFromParent();
1960  return Legalized;
1961 }
1962 
1964 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1965  LLT WideTy) {
1966  if (TypeIdx == 1) {
1968  widenScalarDst(MI, WideTy, 1);
1970  return Legalized;
1971  }
1972 
1973  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1974  Register Result = MI.getOperand(0).getReg();
1975  Register OriginalOverflow = MI.getOperand(1).getReg();
1976  Register LHS = MI.getOperand(2).getReg();
1977  Register RHS = MI.getOperand(3).getReg();
1978  LLT SrcTy = MRI.getType(LHS);
1979  LLT OverflowTy = MRI.getType(OriginalOverflow);
1980  unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1981 
1982  // To determine if the result overflowed in the larger type, we extend the
1983  // input to the larger type, do the multiply (checking if it overflows),
1984  // then also check the high bits of the result to see if overflow happened
1985  // there.
1986  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1987  auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1988  auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1989 
1990  auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1991  {LeftOperand, RightOperand});
1992  auto Mul = Mulo->getOperand(0);
1993  MIRBuilder.buildTrunc(Result, Mul);
1994 
1995  MachineInstrBuilder ExtResult;
1996  // Overflow occurred if it occurred in the larger type, or if the high part
1997  // of the result does not zero/sign-extend the low part. Check this second
1998  // possibility first.
1999  if (IsSigned) {
2000  // For signed, overflow occurred when the high part does not sign-extend
2001  // the low part.
2002  ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2003  } else {
2004  // Unsigned overflow occurred when the high part does not zero-extend the
2005  // low part.
2006  ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2007  }
2008 
2009  // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2010  // so we don't need to check the overflow result of larger type Mulo.
2011  if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
2012  auto Overflow =
2013  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2014  // Finally check if the multiplication in the larger type itself overflowed.
2015  MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2016  } else {
2017  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2018  }
2019  MI.eraseFromParent();
2020  return Legalized;
2021 }
2022 
2024 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
2025  switch (MI.getOpcode()) {
2026  default:
2027  return UnableToLegalize;
2028  case TargetOpcode::G_ATOMICRMW_XCHG:
2029  case TargetOpcode::G_ATOMICRMW_ADD:
2030  case TargetOpcode::G_ATOMICRMW_SUB:
2031  case TargetOpcode::G_ATOMICRMW_AND:
2032  case TargetOpcode::G_ATOMICRMW_OR:
2033  case TargetOpcode::G_ATOMICRMW_XOR:
2034  case TargetOpcode::G_ATOMICRMW_MIN:
2035  case TargetOpcode::G_ATOMICRMW_MAX:
2036  case TargetOpcode::G_ATOMICRMW_UMIN:
2037  case TargetOpcode::G_ATOMICRMW_UMAX:
2038  assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2040  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2041  widenScalarDst(MI, WideTy, 0);
2043  return Legalized;
2044  case TargetOpcode::G_ATOMIC_CMPXCHG:
2045  assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2047  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2048  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2049  widenScalarDst(MI, WideTy, 0);
2051  return Legalized;
2052  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2053  if (TypeIdx == 0) {
2055  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2056  widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2057  widenScalarDst(MI, WideTy, 0);
2059  return Legalized;
2060  }
2061  assert(TypeIdx == 1 &&
2062  "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2064  widenScalarDst(MI, WideTy, 1);
2066  return Legalized;
2067  case TargetOpcode::G_EXTRACT:
2068  return widenScalarExtract(MI, TypeIdx, WideTy);
2069  case TargetOpcode::G_INSERT:
2070  return widenScalarInsert(MI, TypeIdx, WideTy);
2071  case TargetOpcode::G_MERGE_VALUES:
2072  return widenScalarMergeValues(MI, TypeIdx, WideTy);
2073  case TargetOpcode::G_UNMERGE_VALUES:
2074  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2075  case TargetOpcode::G_SADDO:
2076  case TargetOpcode::G_SSUBO:
2077  case TargetOpcode::G_UADDO:
2078  case TargetOpcode::G_USUBO:
2079  case TargetOpcode::G_SADDE:
2080  case TargetOpcode::G_SSUBE:
2081  case TargetOpcode::G_UADDE:
2082  case TargetOpcode::G_USUBE:
2083  return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2084  case TargetOpcode::G_UMULO:
2085  case TargetOpcode::G_SMULO:
2086  return widenScalarMulo(MI, TypeIdx, WideTy);
2087  case TargetOpcode::G_SADDSAT:
2088  case TargetOpcode::G_SSUBSAT:
2089  case TargetOpcode::G_SSHLSAT:
2090  case TargetOpcode::G_UADDSAT:
2091  case TargetOpcode::G_USUBSAT:
2092  case TargetOpcode::G_USHLSAT:
2093  return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2094  case TargetOpcode::G_CTTZ:
2095  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2096  case TargetOpcode::G_CTLZ:
2097  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2098  case TargetOpcode::G_CTPOP: {
2099  if (TypeIdx == 0) {
2101  widenScalarDst(MI, WideTy, 0);
2103  return Legalized;
2104  }
2105 
2106  Register SrcReg = MI.getOperand(1).getReg();
2107 
2108  // First extend the input.
2109  unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2110  MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2111  ? TargetOpcode::G_ANYEXT
2112  : TargetOpcode::G_ZEXT;
2113  auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2114  LLT CurTy = MRI.getType(SrcReg);
2115  unsigned NewOpc = MI.getOpcode();
2116  if (NewOpc == TargetOpcode::G_CTTZ) {
2117  // The count is the same in the larger type except if the original
2118  // value was zero. This can be handled by setting the bit just off
2119  // the top of the original type.
2120  auto TopBit =
2121  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
2122  MIBSrc = MIRBuilder.buildOr(
2123  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2124  // Now we know the operand is non-zero, use the more relaxed opcode.
2125  NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2126  }
2127 
2128  // Perform the operation at the larger size.
2129  auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2130  // This is already the correct result for CTPOP and CTTZs
2131  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2132  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2133  // The correct result is NewOp - (Difference in widety and current ty).
2134  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2135  MIBNewOp = MIRBuilder.buildSub(
2136  WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2137  }
2138 
2139  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2140  MI.eraseFromParent();
2141  return Legalized;
2142  }
2143  case TargetOpcode::G_BSWAP: {
2145  Register DstReg = MI.getOperand(0).getReg();
2146 
2147  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2148  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2149  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2150  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2151 
2152  MI.getOperand(0).setReg(DstExt);
2153 
2155 
2156  LLT Ty = MRI.getType(DstReg);
2157  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2158  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2159  MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2160 
2161  MIRBuilder.buildTrunc(DstReg, ShrReg);
2163  return Legalized;
2164  }
2165  case TargetOpcode::G_BITREVERSE: {
2167 
2168  Register DstReg = MI.getOperand(0).getReg();
2169  LLT Ty = MRI.getType(DstReg);
2170  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2171 
2172  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2173  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2174  MI.getOperand(0).setReg(DstExt);
2176 
2177  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2178  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2179  MIRBuilder.buildTrunc(DstReg, Shift);
2181  return Legalized;
2182  }
2183  case TargetOpcode::G_FREEZE:
2185  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2186  widenScalarDst(MI, WideTy);
2188  return Legalized;
2189 
2190  case TargetOpcode::G_ABS:
2192  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2193  widenScalarDst(MI, WideTy);
2195  return Legalized;
2196 
2197  case TargetOpcode::G_ADD:
2198  case TargetOpcode::G_AND:
2199  case TargetOpcode::G_MUL:
2200  case TargetOpcode::G_OR:
2201  case TargetOpcode::G_XOR:
2202  case TargetOpcode::G_SUB:
2203  // Perform operation at larger width (any extension is fines here, high bits
2204  // don't affect the result) and then truncate the result back to the
2205  // original type.
2207  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2208  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2209  widenScalarDst(MI, WideTy);
2211  return Legalized;
2212 
2213  case TargetOpcode::G_SBFX:
2214  case TargetOpcode::G_UBFX:
2216 
2217  if (TypeIdx == 0) {
2218  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2219  widenScalarDst(MI, WideTy);
2220  } else {
2221  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2222  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2223  }
2224 
2226  return Legalized;
2227 
2228  case TargetOpcode::G_SHL:
2230 
2231  if (TypeIdx == 0) {
2232  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2233  widenScalarDst(MI, WideTy);
2234  } else {
2235  assert(TypeIdx == 1);
2236  // The "number of bits to shift" operand must preserve its value as an
2237  // unsigned integer:
2238  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2239  }
2240 
2242  return Legalized;
2243 
2244  case TargetOpcode::G_SDIV:
2245  case TargetOpcode::G_SREM:
2246  case TargetOpcode::G_SMIN:
2247  case TargetOpcode::G_SMAX:
2249  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2250  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2251  widenScalarDst(MI, WideTy);
2253  return Legalized;
2254 
2255  case TargetOpcode::G_SDIVREM:
2257  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2258  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2259  widenScalarDst(MI, WideTy);
2260  widenScalarDst(MI, WideTy, 1);
2262  return Legalized;
2263 
2264  case TargetOpcode::G_ASHR:
2265  case TargetOpcode::G_LSHR:
2267 
2268  if (TypeIdx == 0) {
2269  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2270  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2271 
2272  widenScalarSrc(MI, WideTy, 1, CvtOp);
2273  widenScalarDst(MI, WideTy);
2274  } else {
2275  assert(TypeIdx == 1);
2276  // The "number of bits to shift" operand must preserve its value as an
2277  // unsigned integer:
2278  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2279  }
2280 
2282  return Legalized;
2283  case TargetOpcode::G_UDIV:
2284  case TargetOpcode::G_UREM:
2285  case TargetOpcode::G_UMIN:
2286  case TargetOpcode::G_UMAX:
2288  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2289  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2290  widenScalarDst(MI, WideTy);
2292  return Legalized;
2293 
2294  case TargetOpcode::G_UDIVREM:
2296  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2297  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2298  widenScalarDst(MI, WideTy);
2299  widenScalarDst(MI, WideTy, 1);
2301  return Legalized;
2302 
2303  case TargetOpcode::G_SELECT:
2305  if (TypeIdx == 0) {
2306  // Perform operation at larger width (any extension is fine here, high
2307  // bits don't affect the result) and then truncate the result back to the
2308  // original type.
2309  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2310  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2311  widenScalarDst(MI, WideTy);
2312  } else {
2313  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2314  // Explicit extension is required here since high bits affect the result.
2315  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2316  }
2318  return Legalized;
2319 
2320  case TargetOpcode::G_FPTOSI:
2321  case TargetOpcode::G_FPTOUI:
2323 
2324  if (TypeIdx == 0)
2325  widenScalarDst(MI, WideTy);
2326  else
2327  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2328 
2330  return Legalized;
2331  case TargetOpcode::G_SITOFP:
2333 
2334  if (TypeIdx == 0)
2335  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2336  else
2337  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2338 
2340  return Legalized;
2341  case TargetOpcode::G_UITOFP:
2343 
2344  if (TypeIdx == 0)
2345  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2346  else
2347  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2348 
2350  return Legalized;
2351  case TargetOpcode::G_LOAD:
2352  case TargetOpcode::G_SEXTLOAD:
2353  case TargetOpcode::G_ZEXTLOAD:
2355  widenScalarDst(MI, WideTy);
2357  return Legalized;
2358 
2359  case TargetOpcode::G_STORE: {
2360  if (TypeIdx != 0)
2361  return UnableToLegalize;
2362 
2363  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2364  if (!Ty.isScalar())
2365  return UnableToLegalize;
2366 
2368 
2369  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2370  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2371  widenScalarSrc(MI, WideTy, 0, ExtType);
2372 
2374  return Legalized;
2375  }
2376  case TargetOpcode::G_CONSTANT: {
2377  MachineOperand &SrcMO = MI.getOperand(1);
2379  unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2380  MRI.getType(MI.getOperand(0).getReg()));
2381  assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2382  ExtOpc == TargetOpcode::G_ANYEXT) &&
2383  "Illegal Extend");
2384  const APInt &SrcVal = SrcMO.getCImm()->getValue();
2385  const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2386  ? SrcVal.sext(WideTy.getSizeInBits())
2387  : SrcVal.zext(WideTy.getSizeInBits());
2389  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2390 
2391  widenScalarDst(MI, WideTy);
2393  return Legalized;
2394  }
2395  case TargetOpcode::G_FCONSTANT: {
2396  MachineOperand &SrcMO = MI.getOperand(1);
2398  APFloat Val = SrcMO.getFPImm()->getValueAPF();
2399  bool LosesInfo;
2400  switch (WideTy.getSizeInBits()) {
2401  case 32:
2403  &LosesInfo);
2404  break;
2405  case 64:
2407  &LosesInfo);
2408  break;
2409  default:
2410  return UnableToLegalize;
2411  }
2412 
2413  assert(!LosesInfo && "extend should always be lossless");
2414 
2416  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2417 
2418  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2420  return Legalized;
2421  }
2422  case TargetOpcode::G_IMPLICIT_DEF: {
2424  widenScalarDst(MI, WideTy);
2426  return Legalized;
2427  }
2428  case TargetOpcode::G_BRCOND:
2430  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2432  return Legalized;
2433 
2434  case TargetOpcode::G_FCMP:
2436  if (TypeIdx == 0)
2437  widenScalarDst(MI, WideTy);
2438  else {
2439  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2440  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2441  }
2443  return Legalized;
2444 
2445  case TargetOpcode::G_ICMP:
2447  if (TypeIdx == 0)
2448  widenScalarDst(MI, WideTy);
2449  else {
2450  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2451  MI.getOperand(1).getPredicate()))
2452  ? TargetOpcode::G_SEXT
2453  : TargetOpcode::G_ZEXT;
2454  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2455  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2456  }
2458  return Legalized;
2459 
2460  case TargetOpcode::G_PTR_ADD:
2461  assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2463  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2465  return Legalized;
2466 
2467  case TargetOpcode::G_PHI: {
2468  assert(TypeIdx == 0 && "Expecting only Idx 0");
2469 
2471  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2472  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2473  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2474  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2475  }
2476 
2477  MachineBasicBlock &MBB = *MI.getParent();
2479  widenScalarDst(MI, WideTy);
2481  return Legalized;
2482  }
2483  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2484  if (TypeIdx == 0) {
2485  Register VecReg = MI.getOperand(1).getReg();
2486  LLT VecTy = MRI.getType(VecReg);
2488 
2490  MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2491  TargetOpcode::G_ANYEXT);
2492 
2493  widenScalarDst(MI, WideTy, 0);
2495  return Legalized;
2496  }
2497 
2498  if (TypeIdx != 2)
2499  return UnableToLegalize;
2501  // TODO: Probably should be zext
2502  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2504  return Legalized;
2505  }
2506  case TargetOpcode::G_INSERT_VECTOR_ELT: {
2507  if (TypeIdx == 1) {
2509 
2510  Register VecReg = MI.getOperand(1).getReg();
2511  LLT VecTy = MRI.getType(VecReg);
2512  LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2513 
2514  widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2515  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2516  widenScalarDst(MI, WideVecTy, 0);
2518  return Legalized;
2519  }
2520 
2521  if (TypeIdx == 2) {
2523  // TODO: Probably should be zext
2524  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2526  return Legalized;
2527  }
2528 
2529  return UnableToLegalize;
2530  }
2531  case TargetOpcode::G_FADD:
2532  case TargetOpcode::G_FMUL:
2533  case TargetOpcode::G_FSUB:
2534  case TargetOpcode::G_FMA:
2535  case TargetOpcode::G_FMAD:
2536  case TargetOpcode::G_FNEG:
2537  case TargetOpcode::G_FABS:
2538  case TargetOpcode::G_FCANONICALIZE:
2539  case TargetOpcode::G_FMINNUM:
2540  case TargetOpcode::G_FMAXNUM:
2541  case TargetOpcode::G_FMINNUM_IEEE:
2542  case TargetOpcode::G_FMAXNUM_IEEE:
2543  case TargetOpcode::G_FMINIMUM:
2544  case TargetOpcode::G_FMAXIMUM:
2545  case TargetOpcode::G_FDIV:
2546  case TargetOpcode::G_FREM:
2547  case TargetOpcode::G_FCEIL:
2548  case TargetOpcode::G_FFLOOR:
2549  case TargetOpcode::G_FCOS:
2550  case TargetOpcode::G_FSIN:
2551  case TargetOpcode::G_FLOG10:
2552  case TargetOpcode::G_FLOG:
2553  case TargetOpcode::G_FLOG2:
2554  case TargetOpcode::G_FRINT:
2555  case TargetOpcode::G_FNEARBYINT:
2556  case TargetOpcode::G_FSQRT:
2557  case TargetOpcode::G_FEXP:
2558  case TargetOpcode::G_FEXP2:
2559  case TargetOpcode::G_FPOW:
2560  case TargetOpcode::G_INTRINSIC_TRUNC:
2561  case TargetOpcode::G_INTRINSIC_ROUND:
2562  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2563  assert(TypeIdx == 0);
2565 
2566  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2567  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2568 
2569  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2571  return Legalized;
2572  case TargetOpcode::G_FPOWI: {
2573  if (TypeIdx != 0)
2574  return UnableToLegalize;
2576  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2577  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2579  return Legalized;
2580  }
2581  case TargetOpcode::G_INTTOPTR:
2582  if (TypeIdx != 1)
2583  return UnableToLegalize;
2584 
2586  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2588  return Legalized;
2589  case TargetOpcode::G_PTRTOINT:
2590  if (TypeIdx != 0)
2591  return UnableToLegalize;
2592 
2594  widenScalarDst(MI, WideTy, 0);
2596  return Legalized;
2597  case TargetOpcode::G_BUILD_VECTOR: {
2599 
2600  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2601  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2602  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2603 
2604  // Avoid changing the result vector type if the source element type was
2605  // requested.
2606  if (TypeIdx == 1) {
2607  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2608  } else {
2609  widenScalarDst(MI, WideTy, 0);
2610  }
2611 
2613  return Legalized;
2614  }
2615  case TargetOpcode::G_SEXT_INREG:
2616  if (TypeIdx != 0)
2617  return UnableToLegalize;
2618 
2620  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2621  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2623  return Legalized;
2624  case TargetOpcode::G_PTRMASK: {
2625  if (TypeIdx != 1)
2626  return UnableToLegalize;
2628  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2630  return Legalized;
2631  }
2632  }
2633 }
2634 
2636  MachineIRBuilder &B, Register Src, LLT Ty) {
2637  auto Unmerge = B.buildUnmerge(Ty, Src);
2638  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2639  Pieces.push_back(Unmerge.getReg(I));
2640 }
2641 
2644  Register Dst = MI.getOperand(0).getReg();
2645  Register Src = MI.getOperand(1).getReg();
2646  LLT DstTy = MRI.getType(Dst);
2647  LLT SrcTy = MRI.getType(Src);
2648 
2649  if (SrcTy.isVector()) {
2650  LLT SrcEltTy = SrcTy.getElementType();
2651  SmallVector<Register, 8> SrcRegs;
2652 
2653  if (DstTy.isVector()) {
2654  int NumDstElt = DstTy.getNumElements();
2655  int NumSrcElt = SrcTy.getNumElements();
2656 
2657  LLT DstEltTy = DstTy.getElementType();
2658  LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2659  LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2660 
2661  // If there's an element size mismatch, insert intermediate casts to match
2662  // the result element type.
2663  if (NumSrcElt < NumDstElt) { // Source element type is larger.
2664  // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2665  //
2666  // =>
2667  //
2668  // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2669  // %3:_(<2 x s8>) = G_BITCAST %2
2670  // %4:_(<2 x s8>) = G_BITCAST %3
2671  // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2672  DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
2673  SrcPartTy = SrcEltTy;
2674  } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2675  //
2676  // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2677  //
2678  // =>
2679  //
2680  // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2681  // %3:_(s16) = G_BITCAST %2
2682  // %4:_(s16) = G_BITCAST %3
2683  // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2684  SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
2685  DstCastTy = DstEltTy;
2686  }
2687 
2688  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2689  for (Register &SrcReg : SrcRegs)
2690  SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2691  } else
2692  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2693 
2694  MIRBuilder.buildMerge(Dst, SrcRegs);
2695  MI.eraseFromParent();
2696  return Legalized;
2697  }
2698 
2699  if (DstTy.isVector()) {
2700  SmallVector<Register, 8> SrcRegs;
2701  getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2702  MIRBuilder.buildMerge(Dst, SrcRegs);
2703  MI.eraseFromParent();
2704  return Legalized;
2705  }
2706 
2707  return UnableToLegalize;
2708 }
2709 
2710 /// Figure out the bit offset into a register when coercing a vector index for
2711 /// the wide element type. This is only for the case when promoting vector to
2712 /// one with larger elements.
2713 //
2714 ///
2715 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2716 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2718  Register Idx,
2719  unsigned NewEltSize,
2720  unsigned OldEltSize) {
2721  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2722  LLT IdxTy = B.getMRI()->getType(Idx);
2723 
2724  // Now figure out the amount we need to shift to get the target bits.
2725  auto OffsetMask = B.buildConstant(
2726  IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
2727  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2728  return B.buildShl(IdxTy, OffsetIdx,
2729  B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2730 }
2731 
2732 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2733 /// is casting to a vector with a smaller element size, perform multiple element
2734 /// extracts and merge the results. If this is coercing to a vector with larger
2735 /// elements, index the bitcasted vector and extract the target element with bit
2736 /// operations. This is intended to force the indexing in the native register
2737 /// size for architectures that can dynamically index the register file.
2740  LLT CastTy) {
2741  if (TypeIdx != 1)
2742  return UnableToLegalize;
2743 
2744  Register Dst = MI.getOperand(0).getReg();
2745  Register SrcVec = MI.getOperand(1).getReg();
2746  Register Idx = MI.getOperand(2).getReg();
2747  LLT SrcVecTy = MRI.getType(SrcVec);
2748  LLT IdxTy = MRI.getType(Idx);
2749 
2750  LLT SrcEltTy = SrcVecTy.getElementType();
2751  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2752  unsigned OldNumElts = SrcVecTy.getNumElements();
2753 
2754  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2755  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2756 
2757  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2758  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2759  if (NewNumElts > OldNumElts) {
2760  // Decreasing the vector element size
2761  //
2762  // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2763  // =>
2764  // v4i32:castx = bitcast x:v2i64
2765  //
2766  // i64 = bitcast
2767  // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2768  // (i32 (extract_vector_elt castx, (2 * y + 1)))
2769  //
2770  if (NewNumElts % OldNumElts != 0)
2771  return UnableToLegalize;
2772 
2773  // Type of the intermediate result vector.
2774  const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2775  LLT MidTy =
2776  LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
2777 
2778  auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2779 
2780  SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2781  auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2782 
2783  for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2784  auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2785  auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2786  auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2787  NewOps[I] = Elt.getReg(0);
2788  }
2789 
2790  auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2791  MIRBuilder.buildBitcast(Dst, NewVec);
2792  MI.eraseFromParent();
2793  return Legalized;
2794  }
2795 
2796  if (NewNumElts < OldNumElts) {
2797  if (NewEltSize % OldEltSize != 0)
2798  return UnableToLegalize;
2799 
2800  // This only depends on powers of 2 because we use bit tricks to figure out
2801  // the bit offset we need to shift to get the target element. A general
2802  // expansion could emit division/multiply.
2803  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2804  return UnableToLegalize;
2805 
2806  // Increasing the vector element size.
2807  // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2808  //
2809  // =>
2810  //
2811  // %cast = G_BITCAST %vec
2812  // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2813  // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2814  // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2815  // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2816  // %elt_bits = G_LSHR %wide_elt, %offset_bits
2817  // %elt = G_TRUNC %elt_bits
2818 
2819  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2820  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2821 
2822  // Divide to get the index in the wider element type.
2823  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2824 
2825  Register WideElt = CastVec;
2826  if (CastTy.isVector()) {
2827  WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2828  ScaledIdx).getReg(0);
2829  }
2830 
2831  // Compute the bit offset into the register of the target element.
2833  MIRBuilder, Idx, NewEltSize, OldEltSize);
2834 
2835  // Shift the wide element to get the target element.
2836  auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2837  MIRBuilder.buildTrunc(Dst, ExtractedBits);
2838  MI.eraseFromParent();
2839  return Legalized;
2840  }
2841 
2842  return UnableToLegalize;
2843 }
2844 
2845 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2846 /// TargetReg, while preserving other bits in \p TargetReg.
2847 ///
2848 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2850  Register TargetReg, Register InsertReg,
2851  Register OffsetBits) {
2852  LLT TargetTy = B.getMRI()->getType(TargetReg);
2853  LLT InsertTy = B.getMRI()->getType(InsertReg);
2854  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2855  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2856 
2857  // Produce a bitmask of the value to insert
2858  auto EltMask = B.buildConstant(
2859  TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2860  InsertTy.getSizeInBits()));
2861  // Shift it into position
2862  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2863  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2864 
2865  // Clear out the bits in the wide element
2866  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2867 
2868  // The value to insert has all zeros already, so stick it into the masked
2869  // wide element.
2870  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2871 }
2872 
2873 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2874 /// is increasing the element size, perform the indexing in the target element
2875 /// type, and use bit operations to insert at the element position. This is
2876 /// intended for architectures that can dynamically index the register file and
2877 /// want to force indexing in the native register size.
2880  LLT CastTy) {
2881  if (TypeIdx != 0)
2882  return UnableToLegalize;
2883 
2884  Register Dst = MI.getOperand(0).getReg();
2885  Register SrcVec = MI.getOperand(1).getReg();
2886  Register Val = MI.getOperand(2).getReg();
2887  Register Idx = MI.getOperand(3).getReg();
2888 
2889  LLT VecTy = MRI.getType(Dst);
2890  LLT IdxTy = MRI.getType(Idx);
2891 
2892  LLT VecEltTy = VecTy.getElementType();
2893  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2894  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2895  const unsigned OldEltSize = VecEltTy.getSizeInBits();
2896 
2897  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2898  unsigned OldNumElts = VecTy.getNumElements();
2899 
2900  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2901  if (NewNumElts < OldNumElts) {
2902  if (NewEltSize % OldEltSize != 0)
2903  return UnableToLegalize;
2904 
2905  // This only depends on powers of 2 because we use bit tricks to figure out
2906  // the bit offset we need to shift to get the target element. A general
2907  // expansion could emit division/multiply.
2908  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2909  return UnableToLegalize;
2910 
2911  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2912  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2913 
2914  // Divide to get the index in the wider element type.
2915  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2916 
2917  Register ExtractedElt = CastVec;
2918  if (CastTy.isVector()) {
2919  ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2920  ScaledIdx).getReg(0);
2921  }
2922 
2923  // Compute the bit offset into the register of the target element.
2925  MIRBuilder, Idx, NewEltSize, OldEltSize);
2926 
2927  Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2928  Val, OffsetBits);
2929  if (CastTy.isVector()) {
2930  InsertedElt = MIRBuilder.buildInsertVectorElement(
2931  CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2932  }
2933 
2934  MIRBuilder.buildBitcast(Dst, InsertedElt);
2935  MI.eraseFromParent();
2936  return Legalized;
2937  }
2938 
2939  return UnableToLegalize;
2940 }
2941 
2943  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2944  Register DstReg = LoadMI.getDstReg();
2945  Register PtrReg = LoadMI.getPointerReg();
2946  LLT DstTy = MRI.getType(DstReg);
2947  MachineMemOperand &MMO = LoadMI.getMMO();
2948  LLT MemTy = MMO.getMemoryType();
2950 
2951  unsigned MemSizeInBits = MemTy.getSizeInBits();
2952  unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
2953 
2954  if (MemSizeInBits != MemStoreSizeInBits) {
2955  if (MemTy.isVector())
2956  return UnableToLegalize;
2957 
2958  // Promote to a byte-sized load if not loading an integral number of
2959  // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
2960  LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
2961  MachineMemOperand *NewMMO =
2962  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
2963 
2964  Register LoadReg = DstReg;
2965  LLT LoadTy = DstTy;
2966 
2967  // If this wasn't already an extending load, we need to widen the result
2968  // register to avoid creating a load with a narrower result than the source.
2969  if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
2970  LoadTy = WideMemTy;
2971  LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
2972  }
2973 
2974  if (isa<GSExtLoad>(LoadMI)) {
2975  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2976  MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
2977  } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
2978  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2979  // The extra bits are guaranteed to be zero, since we stored them that
2980  // way. A zext load from Wide thus automatically gives zext from MemVT.
2981  MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
2982  } else {
2983  MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
2984  }
2985 
2986  if (DstTy != LoadTy)
2987  MIRBuilder.buildTrunc(DstReg, LoadReg);
2988 
2989  LoadMI.eraseFromParent();
2990  return Legalized;
2991  }
2992 
2993  // Big endian lowering not implemented.
2995  return UnableToLegalize;
2996 
2997  // This load needs splitting into power of 2 sized loads.
2998  //
2999  // Our strategy here is to generate anyextending loads for the smaller
3000  // types up to next power-2 result type, and then combine the two larger
3001  // result values together, before truncating back down to the non-pow-2
3002  // type.
3003  // E.g. v1 = i24 load =>
3004  // v2 = i32 zextload (2 byte)
3005  // v3 = i32 load (1 byte)
3006  // v4 = i32 shl v3, 16
3007  // v5 = i32 or v4, v2
3008  // v1 = i24 trunc v5
3009  // By doing this we generate the correct truncate which should get
3010  // combined away as an artifact with a matching extend.
3011 
3012  uint64_t LargeSplitSize, SmallSplitSize;
3013 
3014  if (!isPowerOf2_32(MemSizeInBits)) {
3015  // This load needs splitting into power of 2 sized loads.
3016  LargeSplitSize = PowerOf2Floor(MemSizeInBits);
3017  SmallSplitSize = MemSizeInBits - LargeSplitSize;
3018  } else {
3019  // This is already a power of 2, but we still need to split this in half.
3020  //
3021  // Assume we're being asked to decompose an unaligned load.
3022  // TODO: If this requires multiple splits, handle them all at once.
3023  auto &Ctx = MF.getFunction().getContext();
3024  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3025  return UnableToLegalize;
3026 
3027  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3028  }
3029 
3030  if (MemTy.isVector()) {
3031  // TODO: Handle vector extloads
3032  if (MemTy != DstTy)
3033  return UnableToLegalize;
3034 
3035  // TODO: We can do better than scalarizing the vector and at least split it
3036  // in half.
3037  return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3038  }
3039 
3040  MachineMemOperand *LargeMMO =
3041  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3042  MachineMemOperand *SmallMMO =
3043  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3044 
3045  LLT PtrTy = MRI.getType(PtrReg);
3046  unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3047  LLT AnyExtTy = LLT::scalar(AnyExtSize);
3048  auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3049  PtrReg, *LargeMMO);
3050 
3051  auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3052  LargeSplitSize / 8);
3053  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3054  auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3055  auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3056  SmallPtr, *SmallMMO);
3057 
3058  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3059  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3060 
3061  if (AnyExtTy == DstTy)
3062  MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3063  else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3064  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3065  MIRBuilder.buildTrunc(DstReg, {Or});
3066  } else {
3067  assert(DstTy.isPointer() && "expected pointer");
3068  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3069 
3070  // FIXME: We currently consider this to be illegal for non-integral address
3071  // spaces, but we need still need a way to reinterpret the bits.
3072  MIRBuilder.buildIntToPtr(DstReg, Or);
3073  }
3074 
3075  LoadMI.eraseFromParent();
3076  return Legalized;
3077 }
3078 
3080  // Lower a non-power of 2 store into multiple pow-2 stores.
3081  // E.g. split an i24 store into an i16 store + i8 store.
3082  // We do this by first extending the stored value to the next largest power
3083  // of 2 type, and then using truncating stores to store the components.
3084  // By doing this, likewise with G_LOAD, generate an extend that can be
3085  // artifact-combined away instead of leaving behind extracts.
3086  Register SrcReg = StoreMI.getValueReg();
3087  Register PtrReg = StoreMI.getPointerReg();
3088  LLT SrcTy = MRI.getType(SrcReg);
3090  MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3091  LLT MemTy = MMO.getMemoryType();
3092 
3093  unsigned StoreWidth = MemTy.getSizeInBits();
3094  unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3095 
3096  if (StoreWidth != StoreSizeInBits) {
3097  if (SrcTy.isVector())
3098  return UnableToLegalize;
3099 
3100  // Promote to a byte-sized store with upper bits zero if not
3101  // storing an integral number of bytes. For example, promote
3102  // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3103  LLT WideTy = LLT::scalar(StoreSizeInBits);
3104 
3105  if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3106  // Avoid creating a store with a narrower source than result.
3107  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3108  SrcTy = WideTy;
3109  }
3110 
3111  auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3112 
3113  MachineMemOperand *NewMMO =
3114  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3115  MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3116  StoreMI.eraseFromParent();
3117  return Legalized;
3118  }
3119 
3120  if (MemTy.isVector()) {
3121  // TODO: Handle vector trunc stores
3122  if (MemTy != SrcTy)
3123  return UnableToLegalize;
3124 
3125  // TODO: We can do better than scalarizing the vector and at least split it
3126  // in half.
3127  return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3128  }
3129 
3130  unsigned MemSizeInBits = MemTy.getSizeInBits();
3131  uint64_t LargeSplitSize, SmallSplitSize;
3132 
3133  if (!isPowerOf2_32(MemSizeInBits)) {
3134  LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
3135  SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3136  } else {
3137  auto &Ctx = MF.getFunction().getContext();
3138  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3139  return UnableToLegalize; // Don't know what we're being asked to do.
3140 
3141  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3142  }
3143 
3144  // Extend to the next pow-2. If this store was itself the result of lowering,
3145  // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3146  // that's wider than the stored size.
3147  unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3148  const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3149 
3150  if (SrcTy.isPointer()) {
3151  const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3152  SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3153  }
3154 
3155  auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3156 
3157  // Obtain the smaller value by shifting away the larger value.
3158  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3159  auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3160 
3161  // Generate the PtrAdd and truncating stores.
3162  LLT PtrTy = MRI.getType(PtrReg);
3163  auto OffsetCst = MIRBuilder.buildConstant(
3164  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3165  auto SmallPtr =
3166  MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3167 
3168  MachineMemOperand *LargeMMO =
3169  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3170  MachineMemOperand *SmallMMO =
3171  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3172  MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3173  MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3174  StoreMI.eraseFromParent();
3175  return Legalized;
3176 }
3177 
3179 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3180  switch (MI.getOpcode()) {
3181  case TargetOpcode::G_LOAD: {
3182  if (TypeIdx != 0)
3183  return UnableToLegalize;
3184  MachineMemOperand &MMO = **MI.memoperands_begin();
3185 
3186  // Not sure how to interpret a bitcast of an extending load.
3187  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3188  return UnableToLegalize;
3189 
3191  bitcastDst(MI, CastTy, 0);
3192  MMO.setType(CastTy);
3194  return Legalized;
3195  }
3196  case TargetOpcode::G_STORE: {
3197  if (TypeIdx != 0)
3198  return UnableToLegalize;
3199 
3200  MachineMemOperand &MMO = **MI.memoperands_begin();
3201 
3202  // Not sure how to interpret a bitcast of a truncating store.
3203  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3204  return UnableToLegalize;
3205 
3207  bitcastSrc(MI, CastTy, 0);
3208  MMO.setType(CastTy);
3210  return Legalized;
3211  }
3212  case TargetOpcode::G_SELECT: {
3213  if (TypeIdx != 0)
3214  return UnableToLegalize;
3215 
3216  if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3217  LLVM_DEBUG(
3218  dbgs() << "bitcast action not implemented for vector select\n");
3219  return UnableToLegalize;
3220  }
3221 
3223  bitcastSrc(MI, CastTy, 2);
3224  bitcastSrc(MI, CastTy, 3);
3225  bitcastDst(MI, CastTy, 0);
3227  return Legalized;
3228  }
3229  case TargetOpcode::G_AND:
3230  case TargetOpcode::G_OR:
3231  case TargetOpcode::G_XOR: {
3233  bitcastSrc(MI, CastTy, 1);
3234  bitcastSrc(MI, CastTy, 2);
3235  bitcastDst(MI, CastTy, 0);
3237  return Legalized;
3238  }
3239  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3240  return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3241  case TargetOpcode::G_INSERT_VECTOR_ELT:
3242  return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3243  default:
3244  return UnableToLegalize;
3245  }
3246 }
3247 
3248 // Legalize an instruction by changing the opcode in place.
3249 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3251  MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3253 }
3254 
3256 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3257  using namespace TargetOpcode;
3258 
3259  switch(MI.getOpcode()) {
3260  default:
3261  return UnableToLegalize;
3262  case TargetOpcode::G_BITCAST:
3263  return lowerBitcast(MI);
3264  case TargetOpcode::G_SREM:
3265  case TargetOpcode::G_UREM: {
3266  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3267  auto Quot =
3268  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3269  {MI.getOperand(1), MI.getOperand(2)});
3270 
3271  auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3272  MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3273  MI.eraseFromParent();
3274  return Legalized;
3275  }
3276  case TargetOpcode::G_SADDO:
3277  case TargetOpcode::G_SSUBO:
3278  return lowerSADDO_SSUBO(MI);
3279  case TargetOpcode::G_UMULH:
3280  case TargetOpcode::G_SMULH:
3281  return lowerSMULH_UMULH(MI);
3282  case TargetOpcode::G_SMULO:
3283  case TargetOpcode::G_UMULO: {
3284  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3285  // result.
3286  Register Res = MI.getOperand(0).getReg();
3287  Register Overflow = MI.getOperand(1).getReg();
3288  Register LHS = MI.getOperand(2).getReg();
3289  Register RHS = MI.getOperand(3).getReg();
3290  LLT Ty = MRI.getType(Res);
3291 
3292  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3293  ? TargetOpcode::G_SMULH
3294  : TargetOpcode::G_UMULH;
3295 
3297  const auto &TII = MIRBuilder.getTII();
3298  MI.setDesc(TII.get(TargetOpcode::G_MUL));
3299  MI.removeOperand(1);
3301 
3302  auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3303  auto Zero = MIRBuilder.buildConstant(Ty, 0);
3304 
3305  // Move insert point forward so we can use the Res register if needed.
3307 
3308  // For *signed* multiply, overflow is detected by checking:
3309  // (hi != (lo >> bitwidth-1))
3310  if (Opcode == TargetOpcode::G_SMULH) {
3311  auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3312  auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3313  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3314  } else {
3315  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3316  }
3317  return Legalized;
3318  }
3319  case TargetOpcode::G_FNEG: {
3320  Register Res = MI.getOperand(0).getReg();
3321  LLT Ty = MRI.getType(Res);
3322 
3323  // TODO: Handle vector types once we are able to
3324  // represent them.
3325  if (Ty.isVector())
3326  return UnableToLegalize;
3327  auto SignMask =
3329  Register SubByReg = MI.getOperand(1).getReg();
3330  MIRBuilder.buildXor(Res, SubByReg, SignMask);
3331  MI.eraseFromParent();
3332  return Legalized;
3333  }
3334  case TargetOpcode::G_FSUB: {
3335  Register Res = MI.getOperand(0).getReg();
3336  LLT Ty = MRI.getType(Res);
3337 
3338  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3339  // First, check if G_FNEG is marked as Lower. If so, we may
3340  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3341  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3342  return UnableToLegalize;
3343  Register LHS = MI.getOperand(1).getReg();
3344  Register RHS = MI.getOperand(2).getReg();
3346  MIRBuilder.buildFNeg(Neg, RHS);
3347  MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3348  MI.eraseFromParent();
3349  return Legalized;
3350  }
3351  case TargetOpcode::G_FMAD:
3352  return lowerFMad(MI);
3353  case TargetOpcode::G_FFLOOR:
3354  return lowerFFloor(MI);
3355  case TargetOpcode::G_INTRINSIC_ROUND:
3356  return lowerIntrinsicRound(MI);
3357  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3358  // Since round even is the assumed rounding mode for unconstrained FP
3359  // operations, rint and roundeven are the same operation.
3360  changeOpcode(MI, TargetOpcode::G_FRINT);
3361  return Legalized;
3362  }
3363  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3364  Register OldValRes = MI.getOperand(0).getReg();
3365  Register SuccessRes = MI.getOperand(1).getReg();
3366  Register Addr = MI.getOperand(2).getReg();
3367  Register CmpVal = MI.getOperand(3).getReg();
3368  Register NewVal = MI.getOperand(4).getReg();
3369  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3370  **MI.memoperands_begin());
3371  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3372  MI.eraseFromParent();
3373  return Legalized;
3374  }
3375  case TargetOpcode::G_LOAD:
3376  case TargetOpcode::G_SEXTLOAD:
3377  case TargetOpcode::G_ZEXTLOAD:
3378  return lowerLoad(cast<GAnyLoad>(MI));
3379  case TargetOpcode::G_STORE:
3380  return lowerStore(cast<GStore>(MI));
3381  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3382  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3383  case TargetOpcode::G_CTLZ:
3384  case TargetOpcode::G_CTTZ:
3385  case TargetOpcode::G_CTPOP:
3386  return lowerBitCount(MI);
3387  case G_UADDO: {
3388  Register Res = MI.getOperand(0).getReg();
3389  Register CarryOut = MI.getOperand(1).getReg();
3390  Register LHS = MI.getOperand(2).getReg();
3391  Register RHS = MI.getOperand(3).getReg();
3392 
3393  MIRBuilder.buildAdd(Res, LHS, RHS);
3394  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3395 
3396  MI.eraseFromParent();
3397  return Legalized;
3398  }
3399  case G_UADDE: {
3400  Register Res = MI.getOperand(0).getReg();
3401  Register CarryOut = MI.getOperand(1).getReg();
3402  Register LHS = MI.getOperand(2).getReg();
3403  Register RHS = MI.getOperand(3).getReg();
3404  Register CarryIn = MI.getOperand(4).getReg();
3405  LLT Ty = MRI.getType(Res);
3406 
3407  auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3408  auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3409  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3410  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3411 
3412  MI.eraseFromParent();
3413  return Legalized;
3414  }
3415  case G_USUBO: {
3416  Register Res = MI.getOperand(0).getReg();
3417  Register BorrowOut = MI.getOperand(1).getReg();
3418  Register LHS = MI.getOperand(2).getReg();
3419  Register RHS = MI.getOperand(3).getReg();
3420 
3421  MIRBuilder.buildSub(Res, LHS, RHS);
3423 
3424  MI.eraseFromParent();
3425  return Legalized;
3426  }
3427  case G_USUBE: {
3428  Register Res = MI.getOperand(0).getReg();
3429  Register BorrowOut = MI.getOperand(1).getReg();
3430  Register LHS = MI.getOperand(2).getReg();
3431  Register RHS = MI.getOperand(3).getReg();
3432  Register BorrowIn = MI.getOperand(4).getReg();
3433  const LLT CondTy = MRI.getType(BorrowOut);
3434  const LLT Ty = MRI.getType(Res);
3435 
3436  auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3437  auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3438  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3439 
3440  auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3441  auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3442  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3443 
3444  MI.eraseFromParent();
3445  return Legalized;
3446  }
3447  case G_UITOFP:
3448  return lowerUITOFP(MI);
3449  case G_SITOFP:
3450  return lowerSITOFP(MI);
3451  case G_FPTOUI:
3452  return lowerFPTOUI(MI);
3453  case G_FPTOSI:
3454  return lowerFPTOSI(MI);
3455  case G_FPTRUNC:
3456  return lowerFPTRUNC(MI);
3457  case G_FPOWI:
3458  return lowerFPOWI(MI);
3459  case G_SMIN:
3460  case G_SMAX:
3461  case G_UMIN:
3462  case G_UMAX:
3463  return lowerMinMax(MI);
3464  case G_FCOPYSIGN:
3465  return lowerFCopySign(MI);
3466  case G_FMINNUM:
3467  case G_FMAXNUM:
3468  return lowerFMinNumMaxNum(MI);
3469  case G_MERGE_VALUES:
3470  return lowerMergeValues(MI);
3471  case G_UNMERGE_VALUES:
3472  return lowerUnmergeValues(MI);
3473  case TargetOpcode::G_SEXT_INREG: {
3474  assert(MI.getOperand(2).isImm() && "Expected immediate");
3475  int64_t SizeInBits = MI.getOperand(2).getImm();
3476 
3477  Register DstReg = MI.getOperand(0).getReg();
3478  Register SrcReg = MI.getOperand(1).getReg();
3479  LLT DstTy = MRI.getType(DstReg);
3480  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3481 
3482  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3483  MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3484  MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3485  MI.eraseFromParent();
3486  return Legalized;
3487  }
3488  case G_EXTRACT_VECTOR_ELT:
3489  case G_INSERT_VECTOR_ELT:
3491  case G_SHUFFLE_VECTOR:
3492  return lowerShuffleVector(MI);
3493  case G_DYN_STACKALLOC:
3494  return lowerDynStackAlloc(MI);
3495  case G_EXTRACT:
3496  return lowerExtract(MI);
3497  case G_INSERT:
3498  return lowerInsert(MI);
3499  case G_BSWAP:
3500  return lowerBswap(MI);
3501  case G_BITREVERSE:
3502  return lowerBitreverse(MI);
3503  case G_READ_REGISTER:
3504  case G_WRITE_REGISTER:
3505  return lowerReadWriteRegister(MI);
3506  case G_UADDSAT:
3507  case G_USUBSAT: {
3508  // Try to make a reasonable guess about which lowering strategy to use. The
3509  // target can override this with custom lowering and calling the
3510  // implementation functions.
3511  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3512  if (LI.isLegalOrCustom({G_UMIN, Ty}))
3513  return lowerAddSubSatToMinMax(MI);
3514  return lowerAddSubSatToAddoSubo(MI);
3515  }
3516  case G_SADDSAT:
3517  case G_SSUBSAT: {
3518  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3519 
3520  // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3521  // since it's a shorter expansion. However, we would need to figure out the
3522  // preferred boolean type for the carry out for the query.
3523  if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3524  return lowerAddSubSatToMinMax(MI);
3525  return lowerAddSubSatToAddoSubo(MI);
3526  }
3527  case G_SSHLSAT:
3528  case G_USHLSAT:
3529  return lowerShlSat(MI);
3530  case G_ABS:
3531  return lowerAbsToAddXor(MI);
3532  case G_SELECT:
3533  return lowerSelect(MI);
3534  case G_SDIVREM:
3535  case G_UDIVREM:
3536  return lowerDIVREM(MI);
3537  case G_FSHL:
3538  case G_FSHR:
3539  return lowerFunnelShift(MI);
3540  case G_ROTL:
3541  case G_ROTR:
3542  return lowerRotate(MI);
3543  case G_MEMSET:
3544  case G_MEMCPY:
3545  case G_MEMMOVE:
3546  return lowerMemCpyFamily(MI);
3547  case G_MEMCPY_INLINE:
3548  return lowerMemcpyInline(MI);
3550  return lowerVectorReduction(MI);
3551  }
3552 }
3553 
3555  Align MinAlign) const {
3556  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3557  // datalayout for the preferred alignment. Also there should be a target hook
3558  // for this to allow targets to reduce the alignment and ignore the
3559  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3560  // the type.
3562 }
3563 
3566  MachinePointerInfo &PtrInfo) {
3569  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3570 
3571  unsigned AddrSpace = DL.getAllocaAddrSpace();
3572  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3573 
3574  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3575  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3576 }
3577 
3579  LLT VecTy) {
3580  int64_t IdxVal;
3581  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3582  return IdxReg;
3583 
3584  LLT IdxTy = B.getMRI()->getType(IdxReg);
3585  unsigned NElts = VecTy.getNumElements();
3586  if (isPowerOf2_32(NElts)) {
3587  APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3588  return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3589  }
3590 
3591  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3592  .getReg(0);
3593 }
3594 
3596  Register Index) {
3597  LLT EltTy = VecTy.getElementType();
3598 
3599  // Calculate the element offset and add it to the pointer.
3600  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3601  assert(EltSize * 8 == EltTy.getSizeInBits() &&
3602  "Converting bits to bytes lost precision");
3603 
3605 
3606  LLT IdxTy = MRI.getType(Index);
3607  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3608  MIRBuilder.buildConstant(IdxTy, EltSize));
3609 
3610  LLT PtrTy = MRI.getType(VecPtr);
3611  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3612 }
3613 
3614 #ifndef NDEBUG
3615 /// Check that all vector operands have same number of elements. Other operands
3616 /// should be listed in NonVecOp.
3619  std::initializer_list<unsigned> NonVecOpIndices) {
3620  if (MI.getNumMemOperands() != 0)
3621  return false;
3622 
3623  LLT VecTy = MRI.getType(MI.getReg(0));
3624  if (!VecTy.isVector())
3625  return false;
3626  unsigned NumElts = VecTy.getNumElements();
3627 
3628  for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
3629  MachineOperand &Op = MI.getOperand(OpIdx);
3630  if (!Op.isReg()) {
3631  if (!is_contained(NonVecOpIndices, OpIdx))
3632  return false;
3633  continue;
3634  }
3635 
3636  LLT Ty = MRI.getType(Op.getReg());
3637  if (!Ty.isVector()) {
3638  if (!is_contained(NonVecOpIndices, OpIdx))
3639  return false;
3640  continue;
3641  }
3642 
3643  if (Ty.getNumElements() != NumElts)
3644  return false;
3645  }
3646 
3647  return true;
3648 }
3649 #endif
3650 
3651 /// Fill \p DstOps with DstOps that have same number of elements combined as
3652 /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
3653 /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
3654 /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
3655 static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
3656  unsigned NumElts) {
3657  LLT LeftoverTy;
3658  assert(Ty.isVector() && "Expected vector type");
3659  LLT EltTy = Ty.getElementType();
3660  LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
3661  int NumParts, NumLeftover;
3662  std::tie(NumParts, NumLeftover) =
3663  getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
3664 
3665  assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
3666  for (int i = 0; i < NumParts; ++i) {
3667  DstOps.push_back(NarrowTy);
3668  }
3669 
3670  if (LeftoverTy.isValid()) {
3671  assert(NumLeftover == 1 && "expected exactly one leftover");
3672  DstOps.push_back(LeftoverTy);
3673  }
3674 }
3675 
3676 /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
3677 /// made from \p Op depending on operand type.
3678 static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
3679  MachineOperand &Op) {
3680  for (unsigned i = 0; i < N; ++i) {
3681  if (Op.isReg())
3682  Ops.push_back(Op.getReg());
3683  else if (Op.isImm())
3684  Ops.push_back(Op.getImm());
3685  else if (Op.isPredicate())
3686  Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
3687  else
3688  llvm_unreachable("Unsupported type");
3689  }
3690 }
3691 
3692 // Handle splitting vector operations which need to have the same number of
3693 // elements in each type index, but each type index may have a different element
3694 // type.
3695 //
3696 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3697 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3698 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3699 //
3700 // Also handles some irregular breakdown cases, e.g.
3701 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3702 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3703 // s64 = G_SHL s64, s32
3706  GenericMachineInstr &MI, unsigned NumElts,
3707  std::initializer_list<unsigned> NonVecOpIndices) {
3708  assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
3709  "Non-compatible opcode or not specified non-vector operands");
3710  unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3711 
3712  unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3713  unsigned NumDefs = MI.getNumDefs();
3714 
3715  // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
3716  // Build instructions with DstOps to use instruction found by CSE directly.
3717  // CSE copies found instruction into given vreg when building with vreg dest.
3718  SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
3719  // Output registers will be taken from created instructions.
3720  SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
3721  for (unsigned i = 0; i < NumDefs; ++i) {
3722  makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
3723  }
3724 
3725  // Split vector input operands into sub-vectors with NumElts elts + Leftover.
3726  // Operands listed in NonVecOpIndices will be used as is without splitting;
3727  // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
3728  // scalar condition (op 1), immediate in sext_inreg (op 2).
3729  SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
3730  for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3731  ++UseIdx, ++UseNo) {
3732  if (is_contained(NonVecOpIndices, UseIdx)) {
3733  broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
3734  MI.getOperand(UseIdx));
3735  } else {
3736  SmallVector<Register, 8> SplitPieces;
3737  extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
3738  for (auto Reg : SplitPieces)
3739  InputOpsPieces[UseNo].push_back(Reg);
3740  }
3741  }
3742 
3743  unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3744 
3745  // Take i-th piece of each input operand split and build sub-vector/scalar
3746  // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
3747  for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3748  SmallVector<DstOp, 2> Defs;
3749  for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3750  Defs.push_back(OutputOpsPieces[DstNo][i]);
3751 
3753  for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
3754  Uses.push_back(InputOpsPieces[InputNo][i]);
3755 
3756  auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
3757  for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3758  OutputRegs[DstNo].push_back(I.getReg(DstNo));
3759  }
3760 
3761  // Merge small outputs into MI's output for each def operand.
3762  if (NumLeftovers) {
3763  for (unsigned i = 0; i < NumDefs; ++i)
3764  mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
3765  } else {
3766  for (unsigned i = 0; i < NumDefs; ++i)
3767  MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
3768  }
3769 
3770  MI.eraseFromParent();
3771  return Legalized;
3772 }
3773 
3776  unsigned NumElts) {
3777  unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3778 
3779  unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3780  unsigned NumDefs = MI.getNumDefs();
3781 
3782  SmallVector<DstOp, 8> OutputOpsPieces;
3783  SmallVector<Register, 8> OutputRegs;
3784  makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
3785 
3786  // Instructions that perform register split will be inserted in basic block
3787  // where register is defined (basic block is in the next operand).
3788  SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
3789  for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3790  UseIdx += 2, ++UseNo) {
3791  MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
3792  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3793  extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
3794  }
3795 
3796  // Build PHIs with fewer elements.
3797  unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3798  MIRBuilder.setInsertPt(*MI.getParent(), MI);
3799  for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3800  auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
3801  Phi.addDef(
3802  MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
3803  OutputRegs.push_back(Phi.getReg(0));
3804 
3805  for (unsigned j = 0; j < NumInputs / 2; ++j) {
3806  Phi.addUse(InputOpsPieces[j][i]);
3807  Phi.add(MI.getOperand(1 + j * 2 + 1));
3808  }
3809  }
3810 
3811  // Merge small outputs into MI's def.
3812  if (NumLeftovers) {
3813  mergeMixedSubvectors(MI.getReg(0), OutputRegs);
3814  } else {
3815  MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
3816  }
3817 
3818  MI.eraseFromParent();
3819  return Legalized;
3820 }
3821 
3824  unsigned TypeIdx,
3825  LLT NarrowTy) {
3826  const int NumDst = MI.getNumOperands() - 1;
3827  const Register SrcReg = MI.getOperand(NumDst).getReg();
3828  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3829  LLT SrcTy = MRI.getType(SrcReg);
3830 
3831  if (TypeIdx != 1 || NarrowTy == DstTy)
3832  return UnableToLegalize;
3833 
3834  // Requires compatible types. Otherwise SrcReg should have been defined by
3835  // merge-like instruction that would get artifact combined. Most likely
3836  // instruction that defines SrcReg has to perform more/fewer elements
3837  // legalization compatible with NarrowTy.
3838  assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3839  assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3840 
3841  if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3842  (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
3843  return UnableToLegalize;
3844 
3845  // This is most likely DstTy (smaller then register size) packed in SrcTy
3846  // (larger then register size) and since unmerge was not combined it will be
3847  // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
3848  // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
3849 
3850  // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
3851  //
3852  // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
3853  // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
3854  // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
3855  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
3856  const int NumUnmerge = Unmerge->getNumOperands() - 1;
3857  const int PartsPerUnmerge = NumDst / NumUnmerge;
3858 
3859  for (int I = 0; I != NumUnmerge; ++I) {
3860  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3861 
3862  for (int J = 0; J != PartsPerUnmerge; ++J)
3863  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3864  MIB.addUse(Unmerge.getReg(I));
3865  }
3866 
3867  MI.eraseFromParent();
3868  return Legalized;
3869 }
3870 
3873  LLT NarrowTy) {
3874  Register DstReg = MI.getOperand(0).getReg();
3875  LLT DstTy = MRI.getType(DstReg);
3876  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3877  // Requires compatible types. Otherwise user of DstReg did not perform unmerge
3878  // that should have been artifact combined. Most likely instruction that uses
3879  // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
3880  assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3881  assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3882  if (NarrowTy == SrcTy)
3883  return UnableToLegalize;
3884 
3885  // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
3886  // is for old mir tests. Since the changes to more/fewer elements it should no
3887  // longer be possible to generate MIR like this when starting from llvm-ir
3888  // because LCMTy approach was replaced with merge/unmerge to vector elements.
3889  if (TypeIdx == 1) {
3890  assert(SrcTy.isVector() && "Expected vector types");
3891  assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3892  if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3893  (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
3894  return UnableToLegalize;
3895  // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
3896  //
3897  // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
3898  // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
3899  // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
3900  // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
3901  // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
3902  // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
3903 
3905  LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
3906  for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
3907  auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
3908  for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
3909  Elts.push_back(Unmerge.getReg(j));
3910  }
3911 
3912  SmallVector<Register, 8> NarrowTyElts;
3913  unsigned NumNarrowTyElts = NarrowTy.getNumElements();
3914  unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
3915  for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
3916  ++i, Offset += NumNarrowTyElts) {
3917  ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
3918  NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
3919  }
3920 
3921  MIRBuilder.buildMerge(DstReg, NarrowTyElts);
3922  MI.eraseFromParent();
3923  return Legalized;
3924  }
3925 
3926  assert(TypeIdx == 0 && "Bad type index");
3927  if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
3928  (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
3929  return UnableToLegalize;
3930 
3931  // This is most likely SrcTy (smaller then register size) packed in DstTy
3932  // (larger then register size) and since merge was not combined it will be
3933  // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
3934  // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
3935 
3936  // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
3937  //
3938  // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
3939  // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
3940  // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
3941  SmallVector<Register, 8> NarrowTyElts;
3942  unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3943  unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
3944  unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
3945  for (unsigned i = 0; i < NumParts; ++i) {
3946  SmallVector<Register, 8> Sources;
3947  for (unsigned j = 0; j < NumElts; ++j)
3948  Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
3949  NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
3950  }
3951 
3952  MIRBuilder.buildMerge(DstReg, NarrowTyElts);
3953  MI.eraseFromParent();
3954  return Legalized;
3955 }
3956 
3959  unsigned TypeIdx,
3960  LLT NarrowVecTy) {
3961  Register DstReg = MI.getOperand(0).getReg();
3962  Register SrcVec = MI.getOperand(1).getReg();
3963  Register InsertVal;
3964  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3965 
3966  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3967  if (IsInsert)
3968  InsertVal = MI.getOperand(2).getReg();
3969 
3970  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3971 
3972  // TODO: Handle total scalarization case.
3973  if (!NarrowVecTy.isVector())
3974  return UnableToLegalize;
3975 
3976  LLT VecTy = MRI.getType(SrcVec);
3977 
3978  // If the index is a constant, we can really break this down as you would
3979  // expect, and index into the target size pieces.
3980  int64_t IdxVal;
3981  auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
3982  if (MaybeCst) {
3983  IdxVal = MaybeCst->Value.getSExtValue();
3984  // Avoid out of bounds indexing the pieces.
3985  if (IdxVal >= VecTy.getNumElements()) {
3986  MIRBuilder.buildUndef(DstReg);
3987  MI.eraseFromParent();
3988  return Legalized;
3989  }
3990 
3991  SmallVector<Register, 8> VecParts;
3992  LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3993 
3994  // Build a sequence of NarrowTy pieces in VecParts for this operand.
3995  LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3996  TargetOpcode::G_ANYEXT);
3997 
3998  unsigned NewNumElts = NarrowVecTy.getNumElements();
3999 
4000  LLT IdxTy = MRI.getType(Idx);
4001  int64_t PartIdx = IdxVal / NewNumElts;
4002  auto NewIdx =
4003  MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4004 
4005  if (IsInsert) {
4006  LLT PartTy = MRI.getType(VecParts[PartIdx]);
4007 
4008  // Use the adjusted index to insert into one of the subvectors.
4009  auto InsertPart = MIRBuilder.buildInsertVectorElement(
4010  PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4011  VecParts[PartIdx] = InsertPart.getReg(0);
4012 
4013  // Recombine the inserted subvector with the others to reform the result
4014  // vector.
4015  buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4016  } else {
4017  MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4018  }
4019 
4020  MI.eraseFromParent();
4021  return Legalized;
4022  }
4023 
4024  // With a variable index, we can't perform the operation in a smaller type, so
4025  // we're forced to expand this.
4026  //
4027  // TODO: We could emit a chain of compare/select to figure out which piece to
4028  // index.
4030 }
4031 
4034  LLT NarrowTy) {
4035  // FIXME: Don't know how to handle secondary types yet.
4036  if (TypeIdx != 0)
4037  return UnableToLegalize;
4038 
4039  // This implementation doesn't work for atomics. Give up instead of doing
4040  // something invalid.
4041  if (LdStMI.isAtomic())
4042  return UnableToLegalize;
4043 
4044  bool IsLoad = isa<GLoad>(LdStMI);
4045  Register ValReg = LdStMI.getReg(0);
4046  Register AddrReg = LdStMI.getPointerReg();
4047  LLT ValTy = MRI.getType(ValReg);
4048 
4049  // FIXME: Do we need a distinct NarrowMemory legalize action?
4050  if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
4051  LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4052  return UnableToLegalize;
4053  }
4054 
4055  int NumParts = -1;
4056  int NumLeftover = -1;
4057  LLT LeftoverTy;
4058  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4059  if (IsLoad) {
4060  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4061  } else {
4062  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4063  NarrowLeftoverRegs)) {
4064  NumParts = NarrowRegs.size();
4065  NumLeftover = NarrowLeftoverRegs.size();
4066  }
4067  }
4068 
4069  if (NumParts == -1)
4070  return UnableToLegalize;
4071 
4072  LLT PtrTy = MRI.getType(AddrReg);
4073  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4074 
4075  unsigned TotalSize = ValTy.getSizeInBits();
4076 
4077  // Split the load/store into PartTy sized pieces starting at Offset. If this
4078  // is a load, return the new registers in ValRegs. For a store, each elements
4079  // of ValRegs should be PartTy. Returns the next offset that needs to be
4080  // handled.
4082  auto MMO = LdStMI.getMMO();
4083  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4084  unsigned NumParts, unsigned Offset) -> unsigned {
4086  unsigned PartSize = PartTy.getSizeInBits();
4087  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4088  ++Idx) {
4089  unsigned ByteOffset = Offset / 8;
4090  Register NewAddrReg;
4091 
4092  MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4093 
4094  MachineMemOperand *NewMMO =
4095  MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4096 
4097  if (IsLoad) {
4098  Register Dst = MRI.createGenericVirtualRegister(PartTy);
4099  ValRegs.push_back(Dst);
4100  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4101  } else {
4102  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4103  }
4104  Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4105  }
4106 
4107  return Offset;
4108  };
4109 
4110  unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4111  unsigned HandledOffset =
4112  splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4113 
4114  // Handle the rest of the register if this isn't an even type breakdown.
4115  if (LeftoverTy.isValid())
4116  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4117 
4118  if (IsLoad) {
4119  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4120  LeftoverTy, NarrowLeftoverRegs);
4121  }
4122 
4123  LdStMI.eraseFromParent();
4124  return Legalized;
4125 }
4126 
4129  LLT NarrowTy) {
4130  using namespace TargetOpcode;
4131  GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4132  unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4133 
4134  switch (MI.getOpcode()) {
4135  case G_IMPLICIT_DEF:
4136  case G_TRUNC:
4137  case G_AND:
4138  case G_OR:
4139  case G_XOR:
4140  case G_ADD:
4141  case G_SUB:
4142  case G_MUL:
4143  case G_PTR_ADD:
4144  case G_SMULH:
4145  case G_UMULH:
4146  case G_FADD:
4147  case G_FMUL:
4148  case G_FSUB:
4149  case G_FNEG:
4150  case G_FABS:
4151  case G_FCANONICALIZE:
4152  case G_FDIV:
4153  case G_FREM:
4154  case G_FMA:
4155  case G_FMAD:
4156  case G_FPOW:
4157  case G_FEXP:
4158  case G_FEXP2:
4159  case G_FLOG:
4160  case G_FLOG2:
4161  case G_FLOG10:
4162  case G_FNEARBYINT:
4163  case G_FCEIL:
4164  case G_FFLOOR:
4165  case G_FRINT:
4166  case G_INTRINSIC_ROUND:
4167  case G_INTRINSIC_ROUNDEVEN:
4168  case G_INTRINSIC_TRUNC:
4169  case G_FCOS:
4170  case G_FSIN:
4171  case G_FSQRT:
4172  case G_BSWAP:
4173  case G_BITREVERSE:
4174  case G_SDIV:
4175  case G_UDIV:
4176  case G_SREM:
4177  case G_UREM:
4178  case G_SDIVREM:
4179  case G_UDIVREM:
4180  case G_SMIN:
4181  case G_SMAX:
4182  case G_UMIN:
4183  case G_UMAX:
4184  case G_ABS:
4185  case G_FMINNUM:
4186  case G_FMAXNUM:
4187  case G_FMINNUM_IEEE:
4188  case G_FMAXNUM_IEEE:
4189  case G_FMINIMUM:
4190  case G_FMAXIMUM:
4191  case G_FSHL:
4192  case G_FSHR:
4193  case G_ROTL:
4194  case G_ROTR:
4195  case G_FREEZE:
4196  case G_SADDSAT:
4197  case G_SSUBSAT:
4198  case G_UADDSAT:
4199  case G_USUBSAT:
4200  case G_UMULO:
4201  case G_SMULO:
4202  case G_SHL:
4203  case G_LSHR:
4204  case G_ASHR:
4205  case G_SSHLSAT:
4206  case G_USHLSAT:
4207  case G_CTLZ:
4208  case G_CTLZ_ZERO_UNDEF:
4209  case G_CTTZ:
4210  case G_CTTZ_ZERO_UNDEF:
4211  case G_CTPOP:
4212  case G_FCOPYSIGN:
4213  case G_ZEXT:
4214  case G_SEXT:
4215  case G_ANYEXT:
4216  case G_FPEXT:
4217  case G_FPTRUNC:
4218  case G_SITOFP:
4219  case G_UITOFP:
4220  case G_FPTOSI:
4221  case G_FPTOUI:
4222  case G_INTTOPTR:
4223  case G_PTRTOINT:
4224  case G_ADDRSPACE_CAST:
4225  case G_UADDO:
4226  case G_USUBO:
4227  case G_UADDE:
4228  case G_USUBE:
4229  case G_SADDO:
4230  case G_SSUBO:
4231  case G_SADDE:
4232  case G_SSUBE:
4233  return fewerElementsVectorMultiEltType(GMI, NumElts);
4234  case G_ICMP:
4235  case G_FCMP:
4236  return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4237  case G_SELECT:
4238  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4239  return fewerElementsVectorMultiEltType(GMI, NumElts);
4240  return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4241  case G_PHI:
4242  return fewerElementsVectorPhi(GMI, NumElts);
4243  case G_UNMERGE_VALUES:
4244  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4245  case G_BUILD_VECTOR:
4246  assert(TypeIdx == 0 && "not a vector type index");
4247  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4248  case G_CONCAT_VECTORS:
4249  if (TypeIdx != 1) // TODO: This probably does work as expected already.
4250  return UnableToLegalize;
4251  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4252  case G_EXTRACT_VECTOR_ELT:
4253  case G_INSERT_VECTOR_ELT:
4254  return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4255  case G_LOAD:
4256  case G_STORE:
4257  return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4258  case G_SEXT_INREG:
4259  return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4261  return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4262  case G_SHUFFLE_VECTOR:
4263  return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4264  default:
4265  return UnableToLegalize;
4266  }
4267 }
4268 
4270  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4271  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4272  if (TypeIdx != 0)
4273  return UnableToLegalize;
4274 
4275  Register DstReg = MI.getOperand(0).getReg();
4276  Register Src1Reg = MI.getOperand(1).getReg();
4277  Register Src2Reg = MI.getOperand(2).getReg();
4278  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4279  LLT DstTy = MRI.getType(DstReg);
4280  LLT Src1Ty = MRI.getType(Src1Reg);
4281  LLT Src2Ty = MRI.getType(Src2Reg);
4282  // The shuffle should be canonicalized by now.
4283  if (DstTy != Src1Ty)
4284  return UnableToLegalize;
4285  if (DstTy != Src2Ty)
4286  return UnableToLegalize;
4287 
4288  if (!isPowerOf2_32(DstTy.getNumElements()))
4289  return UnableToLegalize;
4290 
4291  // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4292  // Further legalization attempts will be needed to do split further.
4293  NarrowTy =
4295  unsigned NewElts = NarrowTy.getNumElements();
4296 
4297  SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4298  extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4299  extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4300  Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4301  SplitSrc2Regs[1]};
4302 
4303  Register Hi, Lo;
4304 
4305  // If Lo or Hi uses elements from at most two of the four input vectors, then
4306  // express it as a vector shuffle of those two inputs. Otherwise extract the
4307  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4309  for (unsigned High = 0; High < 2; ++High) {
4310  Register &Output = High ? Hi : Lo;
4311 
4312  // Build a shuffle mask for the output, discovering on the fly which
4313  // input vectors to use as shuffle operands (recorded in InputUsed).
4314  // If building a suitable shuffle vector proves too hard, then bail
4315  // out with useBuildVector set.
4316  unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4317  unsigned FirstMaskIdx = High * NewElts;
4318  bool UseBuildVector = false;
4319  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4320  // The mask element. This indexes into the input.
4321  int Idx = Mask[FirstMaskIdx + MaskOffset];
4322 
4323  // The input vector this mask element indexes into.
4324  unsigned Input = (unsigned)Idx / NewElts;
4325 
4326  if (Input >= array_lengthof(Inputs)) {
4327  // The mask element does not index into any input vector.
4328  Ops.push_back(-1);
4329  continue;
4330  }
4331 
4332  // Turn the index into an offset from the start of the input vector.
4333  Idx -= Input * NewElts;
4334 
4335  // Find or create a shuffle vector operand to hold this input.
4336  unsigned OpNo;
4337  for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
4338  if (InputUsed[OpNo] == Input) {
4339  // This input vector is already an operand.
4340  break;
4341  } else if (InputUsed[OpNo] == -1U) {
4342  // Create a new operand for this input vector.
4343  InputUsed[OpNo] = Input;
4344  break;
4345  }
4346  }
4347 
4348  if (OpNo >= array_lengthof(InputUsed)) {
4349  // More than two input vectors used! Give up on trying to create a
4350  // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4351  UseBuildVector = true;
4352  break;
4353  }
4354 
4355  // Add the mask index for the new shuffle vector.
4356  Ops.push_back(Idx + OpNo * NewElts);
4357  }
4358 
4359  if (UseBuildVector) {
4360  LLT EltTy = NarrowTy.getElementType();
4362 
4363  // Extract the input elements by hand.
4364  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4365  // The mask element. This indexes into the input.
4366  int Idx = Mask[FirstMaskIdx + MaskOffset];
4367 
4368  // The input vector this mask element indexes into.
4369  unsigned Input = (unsigned)Idx / NewElts;
4370 
4371  if (Input >= array_lengthof(Inputs)) {
4372  // The mask element is "undef" or indexes off the end of the input.
4373  SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4374  continue;
4375  }
4376 
4377  // Turn the index into an offset from the start of the input vector.
4378  Idx -= Input * NewElts;
4379 
4380  // Extract the vector element by hand.
4381  SVOps.push_back(MIRBuilder
4382  .buildExtractVectorElement(
4383  EltTy, Inputs[Input],
4385  .getReg(0));
4386  }
4387 
4388  // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4389  Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4390  } else if (InputUsed[0] == -1U) {
4391  // No input vectors were used! The result is undefined.
4392  Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4393  } else {
4394  Register Op0 = Inputs[InputUsed[0]];
4395  // If only one input was used, use an undefined vector for the other.
4396  Register Op1 = InputUsed[1] == -1U
4397  ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4398  : Inputs[InputUsed[1]];
4399  // At least one input vector was used. Create a new shuffle vector.
4400  Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4401  }
4402 
4403  Ops.clear();
4404  }
4405 
4406  MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4407  MI.eraseFromParent();
4408  return Legalized;
4409 }
4410 
4411 static unsigned getScalarOpcForReduction(unsigned Opc) {
4412  unsigned ScalarOpc;
4413  switch (Opc) {
4414  case TargetOpcode::G_VECREDUCE_FADD:
4415  ScalarOpc = TargetOpcode::G_FADD;
4416  break;
4417  case TargetOpcode::G_VECREDUCE_FMUL:
4418  ScalarOpc = TargetOpcode::G_FMUL;
4419  break;
4420  case TargetOpcode::G_VECREDUCE_FMAX:
4421  ScalarOpc = TargetOpcode::G_FMAXNUM;
4422  break;
4423  case TargetOpcode::G_VECREDUCE_FMIN:
4424  ScalarOpc = TargetOpcode::G_FMINNUM;
4425  break;
4426  case TargetOpcode::G_VECREDUCE_ADD:
4427  ScalarOpc = TargetOpcode::G_ADD;
4428  break;
4429  case TargetOpcode::G_VECREDUCE_MUL:
4430  ScalarOpc = TargetOpcode::G_MUL;
4431  break;
4432  case TargetOpcode::G_VECREDUCE_AND:
4433  ScalarOpc = TargetOpcode::G_AND;
4434  break;
4435  case TargetOpcode::G_VECREDUCE_OR:
4436  ScalarOpc = TargetOpcode::G_OR;
4437  break;
4438  case TargetOpcode::G_VECREDUCE_XOR:
4439  ScalarOpc = TargetOpcode::G_XOR;
4440  break;
4441  case TargetOpcode::G_VECREDUCE_SMAX:
4442  ScalarOpc = TargetOpcode::G_SMAX;
4443  break;
4444  case TargetOpcode::G_VECREDUCE_SMIN:
4445  ScalarOpc = TargetOpcode::G_SMIN;
4446  break;
4447  case TargetOpcode::G_VECREDUCE_UMAX:
4448  ScalarOpc = TargetOpcode::G_UMAX;
4449  break;
4450  case TargetOpcode::G_VECREDUCE_UMIN:
4451  ScalarOpc = TargetOpcode::G_UMIN;
4452  break;
4453  default:
4454  llvm_unreachable("Unhandled reduction");
4455  }
4456  return ScalarOpc;
4457 }
4458 
4460  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4461  unsigned Opc = MI.getOpcode();
4462  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4463  Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4464  "Sequential reductions not expected");
4465 
4466  if (TypeIdx != 1)
4467  return UnableToLegalize;
4468 
4469  // The semantics of the normal non-sequential reductions allow us to freely
4470  // re-associate the operation.
4471  Register SrcReg = MI.getOperand(1).getReg();
4472  LLT SrcTy = MRI.getType(SrcReg);
4473  Register DstReg = MI.getOperand(0).getReg();
4474  LLT DstTy = MRI.getType(DstReg);
4475 
4476  if (NarrowTy.isVector() &&
4477  (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4478  return UnableToLegalize;
4479 
4480  unsigned ScalarOpc = getScalarOpcForReduction(Opc);
4481  SmallVector<Register> SplitSrcs;
4482  // If NarrowTy is a scalar then we're being asked to scalarize.
4483  const unsigned NumParts =
4484  NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4485  : SrcTy.getNumElements();
4486 
4487  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4488  if (NarrowTy.isScalar()) {
4489  if (DstTy != NarrowTy)
4490  return UnableToLegalize; // FIXME: handle implicit extensions.
4491 
4492  if (isPowerOf2_32(NumParts)) {
4493  // Generate a tree of scalar operations to reduce the critical path.
4494  SmallVector<Register> PartialResults;
4495  unsigned NumPartsLeft = NumParts;
4496  while (NumPartsLeft > 1) {
4497  for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4498  PartialResults.emplace_back(
4499  MIRBuilder
4500  .buildInstr(ScalarOpc, {NarrowTy},
4501  {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4502  .getReg(0));
4503  }
4504  SplitSrcs = PartialResults;
4505  PartialResults.clear();
4506  NumPartsLeft = SplitSrcs.size();
4507  }
4508  assert(SplitSrcs.size() == 1);
4509  MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4510  MI.eraseFromParent();
4511  return Legalized;
4512  }
4513  // If we can't generate a tree, then just do sequential operations.
4514  Register Acc = SplitSrcs[0];
4515  for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4516  Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4517  .getReg(0);
4518  MIRBuilder.buildCopy(DstReg, Acc);
4519  MI.eraseFromParent();
4520  return Legalized;
4521  }
4522  SmallVector<Register> PartialReductions;
4523  for (unsigned Part = 0; Part < NumParts; ++Part) {
4524  PartialReductions.push_back(
4525  MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4526  }
4527 
4528 
4529  // If the types involved are powers of 2, we can generate intermediate vector
4530  // ops, before generating a final reduction operation.
4531  if (isPowerOf2_32(SrcTy.getNumElements()) &&
4532  isPowerOf2_32(NarrowTy.getNumElements())) {
4533  return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4534  }
4535 
4536  Register Acc = PartialReductions[0];
4537  for (unsigned Part = 1; Part < NumParts; ++Part) {
4538  if (Part == NumParts - 1) {
4539  MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4540  {Acc, PartialReductions[Part]});
4541  } else {
4542  Acc = MIRBuilder
4543  .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4544  .getReg(0);
4545  }
4546  }
4547  MI.eraseFromParent();
4548  return Legalized;
4549 }
4550 
4552 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4553  LLT SrcTy, LLT NarrowTy,
4554  unsigned ScalarOpc) {
4555  SmallVector<Register> SplitSrcs;
4556  // Split the sources into NarrowTy size pieces.
4557  extractParts(SrcReg, NarrowTy,
4558  SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4559  // We're going to do a tree reduction using vector operations until we have
4560  // one NarrowTy size value left.
4561  while (SplitSrcs.size() > 1) {
4562  SmallVector<Register> PartialRdxs;
4563  for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4564  Register LHS = SplitSrcs[Idx];
4565  Register RHS = SplitSrcs[Idx + 1];
4566  // Create the intermediate vector op.
4567  Register Res =
4568  MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4569  PartialRdxs.push_back(Res);
4570  }
4571  SplitSrcs = std::move(PartialRdxs);
4572  }
4573  // Finally generate the requested NarrowTy based reduction.
4575  MI.getOperand(1).setReg(SplitSrcs[0]);
4577  return Legalized;
4578 }
4579 
4582  const LLT HalfTy, const LLT AmtTy) {
4583 
4584  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4585  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4586  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4587 
4588  if (Amt.isZero()) {
4589  MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4590  MI.eraseFromParent();
4591  return Legalized;
4592  }
4593 
4594  LLT NVT = HalfTy;
4595  unsigned NVTBits = HalfTy.getSizeInBits();
4596  unsigned VTBits = 2 * NVTBits;
4597 
4598  SrcOp Lo(Register(0)), Hi(Register(0));
4599  if (MI.getOpcode() == TargetOpcode::G_SHL) {
4600  if (Amt.ugt(VTBits)) {
4601  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4602  } else if (Amt.ugt(NVTBits)) {
4603  Lo = MIRBuilder.buildConstant(NVT, 0);
4604  Hi = MIRBuilder.buildShl(NVT, InL,
4605  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4606  } else if (Amt == NVTBits) {
4607  Lo = MIRBuilder.buildConstant(NVT, 0);
4608  Hi = InL;
4609  } else {
4610  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4611  auto OrLHS =
4612  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4613  auto OrRHS = MIRBuilder.buildLShr(
4614  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4615  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4616  }
4617  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4618  if (Amt.ugt(VTBits)) {
4619  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4620  } else if (Amt.ugt(NVTBits)) {
4621  Lo = MIRBuilder.buildLShr(NVT, InH,
4622  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4623  Hi = MIRBuilder.buildConstant(NVT, 0);
4624  } else if (Amt == NVTBits) {
4625  Lo = InH;
4626  Hi = MIRBuilder.buildConstant(NVT, 0);
4627  } else {
4628  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4629 
4630  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4631  auto OrRHS = MIRBuilder.buildShl(
4632  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4633 
4634  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4635  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4636  }
4637  } else {
4638  if (Amt.ugt(VTBits)) {
4639  Hi = Lo = MIRBuilder.buildAShr(
4640  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4641  } else if (Amt.ugt(NVTBits)) {
4642  Lo = MIRBuilder.buildAShr(NVT, InH,
4643  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4644  Hi = MIRBuilder.buildAShr(NVT, InH,
4645  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4646  } else if (Amt == NVTBits) {
4647  Lo = InH;
4648  Hi = MIRBuilder.buildAShr(NVT, InH,
4649  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4650  } else {
4651  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4652 
4653  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4654  auto OrRHS = MIRBuilder.buildShl(
4655  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4656 
4657  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4658  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4659  }
4660  }
4661 
4662  MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4663  MI.eraseFromParent();
4664 
4665  return Legalized;
4666 }
4667 
4668 // TODO: Optimize if constant shift amount.
4671  LLT RequestedTy) {
4672  if (TypeIdx == 1) {
4674  narrowScalarSrc(MI, RequestedTy, 2);
4676  return Legalized;
4677  }
4678 
4679  Register DstReg = MI.getOperand(0).getReg();
4680  LLT DstTy = MRI.getType(DstReg);
4681  if (DstTy.isVector())
4682  return UnableToLegalize;
4683 
4684  Register Amt = MI.getOperand(2).getReg();
4685  LLT ShiftAmtTy = MRI.getType(Amt);
4686  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4687  if (DstEltSize % 2 != 0)
4688  return UnableToLegalize;
4689 
4690  // Ignore the input type. We can only go to exactly half the size of the
4691  // input. If that isn't small enough, the resulting pieces will be further
4692  // legalized.
4693  const unsigned NewBitSize = DstEltSize / 2;
4694  const LLT HalfTy = LLT::scalar(NewBitSize);
4695  const LLT CondTy = LLT::scalar(1);
4696 
4697  if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
4698  return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
4699  ShiftAmtTy);
4700  }
4701 
4702  // TODO: Expand with known bits.
4703 
4704  // Handle the fully general expansion by an unknown amount.
4705  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4706 
4707  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4708  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4709  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4710 
4711  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4712  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4713 
4714  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4715  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4716  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4717 
4718  Register ResultRegs[2];
4719  switch (MI.getOpcode()) {
4720  case TargetOpcode::G_SHL: {
4721  // Short: ShAmt < NewBitSize
4722  auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4723 
4724  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4725  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4726  auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4727 
4728  // Long: ShAmt >= NewBitSize
4729  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4730  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4731 
4732  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4733  auto Hi = MIRBuilder.buildSelect(
4734  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4735 
4736  ResultRegs[0] = Lo.getReg(0);
4737  ResultRegs[1] = Hi.getReg(0);
4738  break;
4739  }
4740  case TargetOpcode::G_LSHR:
4741  case TargetOpcode::G_ASHR: {
4742  // Short: ShAmt < NewBitSize
4743  auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4744 
4745  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4746  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4747  auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4748 
4749  // Long: ShAmt >= NewBitSize
4750  MachineInstrBuilder HiL;
4751  if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4752  HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4753  } else {
4754  auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4755  HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
4756  }
4757  auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4758  {InH, AmtExcess}); // Lo from Hi part.
4759 
4760  auto Lo = MIRBuilder.buildSelect(
4761  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4762 
4763  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4764 
4765  ResultRegs[0] = Lo.getReg(0);
4766  ResultRegs[1] = Hi.getReg(0);
4767  break;
4768  }
4769  default:
4770  llvm_unreachable("not a shift");
4771  }
4772 
4773  MIRBuilder.buildMerge(DstReg, ResultRegs);
4774  MI.eraseFromParent();
4775  return Legalized;
4776 }
4777 
4780  LLT MoreTy) {
4781  assert(TypeIdx == 0 && "Expecting only Idx 0");
4782 
4784  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {