LLVM  14.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/Support/Debug.h"
33 
34 #define DEBUG_TYPE "legalizer"
35 
36 using namespace llvm;
37 using namespace LegalizeActions;
38 using namespace MIPatternMatch;
39 
40 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
41 ///
42 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
43 /// with any leftover piece as type \p LeftoverTy
44 ///
45 /// Returns -1 in the first element of the pair if the breakdown is not
46 /// satisfiable.
47 static std::pair<int, int>
48 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
49  assert(!LeftoverTy.isValid() && "this is an out argument");
50 
51  unsigned Size = OrigTy.getSizeInBits();
52  unsigned NarrowSize = NarrowTy.getSizeInBits();
53  unsigned NumParts = Size / NarrowSize;
54  unsigned LeftoverSize = Size - NumParts * NarrowSize;
55  assert(Size > NarrowSize);
56 
57  if (LeftoverSize == 0)
58  return {NumParts, 0};
59 
60  if (NarrowTy.isVector()) {
61  unsigned EltSize = OrigTy.getScalarSizeInBits();
62  if (LeftoverSize % EltSize != 0)
63  return {-1, -1};
64  LeftoverTy = LLT::scalarOrVector(
65  ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
66  } else {
67  LeftoverTy = LLT::scalar(LeftoverSize);
68  }
69 
70  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
71  return std::make_pair(NumParts, NumLeftover);
72 }
73 
75 
76  if (!Ty.isScalar())
77  return nullptr;
78 
79  switch (Ty.getSizeInBits()) {
80  case 16:
81  return Type::getHalfTy(Ctx);
82  case 32:
83  return Type::getFloatTy(Ctx);
84  case 64:
85  return Type::getDoubleTy(Ctx);
86  case 80:
87  return Type::getX86_FP80Ty(Ctx);
88  case 128:
89  return Type::getFP128Ty(Ctx);
90  default:
91  return nullptr;
92  }
93 }
94 
96  GISelChangeObserver &Observer,
98  : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
99  LI(*MF.getSubtarget().getLegalizerInfo()),
100  TLI(*MF.getSubtarget().getTargetLowering()) { }
101 
103  GISelChangeObserver &Observer,
105  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
106  TLI(*MF.getSubtarget().getTargetLowering()) { }
107 
110  LostDebugLocObserver &LocObserver) {
111  LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
112 
114 
115  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
116  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
117  return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
118  auto Step = LI.getAction(MI, MRI);
119  switch (Step.Action) {
120  case Legal:
121  LLVM_DEBUG(dbgs() << ".. Already legal\n");
122  return AlreadyLegal;
123  case Libcall:
124  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
125  return libcall(MI, LocObserver);
126  case NarrowScalar:
127  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
128  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
129  case WidenScalar:
130  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
131  return widenScalar(MI, Step.TypeIdx, Step.NewType);
132  case Bitcast:
133  LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
134  return bitcast(MI, Step.TypeIdx, Step.NewType);
135  case Lower:
136  LLVM_DEBUG(dbgs() << ".. Lower\n");
137  return lower(MI, Step.TypeIdx, Step.NewType);
138  case FewerElements:
139  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
140  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
141  case MoreElements:
142  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
143  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
144  case Custom:
145  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
146  return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
147  default:
148  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
149  return UnableToLegalize;
150  }
151 }
152 
153 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
154  SmallVectorImpl<Register> &VRegs) {
155  for (int i = 0; i < NumParts; ++i)
156  VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
157  MIRBuilder.buildUnmerge(VRegs, Reg);
158 }
159 
160 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
161  LLT MainTy, LLT &LeftoverTy,
163  SmallVectorImpl<Register> &LeftoverRegs) {
164  assert(!LeftoverTy.isValid() && "this is an out argument");
165 
166  unsigned RegSize = RegTy.getSizeInBits();
167  unsigned MainSize = MainTy.getSizeInBits();
168  unsigned NumParts = RegSize / MainSize;
169  unsigned LeftoverSize = RegSize - NumParts * MainSize;
170 
171  // Use an unmerge when possible.
172  if (LeftoverSize == 0) {
173  for (unsigned I = 0; I < NumParts; ++I)
174  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
175  MIRBuilder.buildUnmerge(VRegs, Reg);
176  return true;
177  }
178 
179  // Perform irregular split. Leftover is last element of RegPieces.
180  if (MainTy.isVector()) {
181  SmallVector<Register, 8> RegPieces;
182  extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
183  for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
184  VRegs.push_back(RegPieces[i]);
185  LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
186  LeftoverTy = MRI.getType(LeftoverRegs[0]);
187  return true;
188  }
189 
190  LeftoverTy = LLT::scalar(LeftoverSize);
191  // For irregular sizes, extract the individual parts.
192  for (unsigned I = 0; I != NumParts; ++I) {
193  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
194  VRegs.push_back(NewReg);
195  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
196  }
197 
198  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
199  Offset += LeftoverSize) {
200  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
201  LeftoverRegs.push_back(NewReg);
202  MIRBuilder.buildExtract(NewReg, Reg, Offset);
203  }
204 
205  return true;
206 }
207 
208 void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
209  SmallVectorImpl<Register> &VRegs) {
210  LLT RegTy = MRI.getType(Reg);
211  assert(RegTy.isVector() && "Expected a vector type");
212 
213  LLT EltTy = RegTy.getElementType();
214  LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
215  unsigned RegNumElts = RegTy.getNumElements();
216  unsigned LeftoverNumElts = RegNumElts % NumElts;
217  unsigned NumNarrowTyPieces = RegNumElts / NumElts;
218 
219  // Perfect split without leftover
220  if (LeftoverNumElts == 0)
221  return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
222 
223  // Irregular split. Provide direct access to all elements for artifact
224  // combiner using unmerge to elements. Then build vectors with NumElts
225  // elements. Remaining element(s) will be (used to build vector) Leftover.
227  extractParts(Reg, EltTy, RegNumElts, Elts);
228 
229  unsigned Offset = 0;
230  // Requested sub-vectors of NarrowTy.
231  for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
232  ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
233  VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
234  }
235 
236  // Leftover element(s).
237  if (LeftoverNumElts == 1) {
238  VRegs.push_back(Elts[Offset]);
239  } else {
240  LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
241  ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
242  VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
243  }
244 }
245 
246 void LegalizerHelper::insertParts(Register DstReg,
247  LLT ResultTy, LLT PartTy,
248  ArrayRef<Register> PartRegs,
249  LLT LeftoverTy,
250  ArrayRef<Register> LeftoverRegs) {
251  if (!LeftoverTy.isValid()) {
252  assert(LeftoverRegs.empty());
253 
254  if (!ResultTy.isVector()) {
255  MIRBuilder.buildMerge(DstReg, PartRegs);
256  return;
257  }
258 
259  if (PartTy.isVector())
260  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
261  else
262  MIRBuilder.buildBuildVector(DstReg, PartRegs);
263  return;
264  }
265 
266  // Merge sub-vectors with different number of elements and insert into DstReg.
267  if (ResultTy.isVector()) {
268  assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
269  SmallVector<Register, 8> AllRegs;
270  for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
271  AllRegs.push_back(Reg);
272  return mergeMixedSubvectors(DstReg, AllRegs);
273  }
274 
275  SmallVector<Register> GCDRegs;
276  LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
277  for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
278  extractGCDType(GCDRegs, GCDTy, PartReg);
279  LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
280  buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
281 }
282 
283 void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
284  Register Reg) {
285  LLT Ty = MRI.getType(Reg);
286  SmallVector<Register, 8> RegElts;
287  extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
288  Elts.append(RegElts);
289 }
290 
291 /// Merge \p PartRegs with different types into \p DstReg.
292 void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
293  ArrayRef<Register> PartRegs) {
294  SmallVector<Register, 8> AllElts;
295  for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
296  appendVectorElts(AllElts, PartRegs[i]);
297 
298  Register Leftover = PartRegs[PartRegs.size() - 1];
299  if (MRI.getType(Leftover).isScalar())
300  AllElts.push_back(Leftover);
301  else
302  appendVectorElts(AllElts, Leftover);
303 
304  MIRBuilder.buildMerge(DstReg, AllElts);
305 }
306 
307 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
309  const MachineInstr &MI) {
310  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
311 
312  const int StartIdx = Regs.size();
313  const int NumResults = MI.getNumOperands() - 1;
314  Regs.resize(Regs.size() + NumResults);
315  for (int I = 0; I != NumResults; ++I)
316  Regs[StartIdx + I] = MI.getOperand(I).getReg();
317 }
318 
319 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
320  LLT GCDTy, Register SrcReg) {
321  LLT SrcTy = MRI.getType(SrcReg);
322  if (SrcTy == GCDTy) {
323  // If the source already evenly divides the result type, we don't need to do
324  // anything.
325  Parts.push_back(SrcReg);
326  } else {
327  // Need to split into common type sized pieces.
328  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
329  getUnmergeResults(Parts, *Unmerge);
330  }
331 }
332 
333 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
334  LLT NarrowTy, Register SrcReg) {
335  LLT SrcTy = MRI.getType(SrcReg);
336  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
337  extractGCDType(Parts, GCDTy, SrcReg);
338  return GCDTy;
339 }
340 
341 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
343  unsigned PadStrategy) {
344  LLT LCMTy = getLCMType(DstTy, NarrowTy);
345 
346  int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
347  int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
348  int NumOrigSrc = VRegs.size();
349 
350  Register PadReg;
351 
352  // Get a value we can use to pad the source value if the sources won't evenly
353  // cover the result type.
354  if (NumOrigSrc < NumParts * NumSubParts) {
355  if (PadStrategy == TargetOpcode::G_ZEXT)
356  PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
357  else if (PadStrategy == TargetOpcode::G_ANYEXT)
358  PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
359  else {
360  assert(PadStrategy == TargetOpcode::G_SEXT);
361 
362  // Shift the sign bit of the low register through the high register.
363  auto ShiftAmt =
365  PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
366  }
367  }
368 
369  // Registers for the final merge to be produced.
370  SmallVector<Register, 4> Remerge(NumParts);
371 
372  // Registers needed for intermediate merges, which will be merged into a
373  // source for Remerge.
374  SmallVector<Register, 4> SubMerge(NumSubParts);
375 
376  // Once we've fully read off the end of the original source bits, we can reuse
377  // the same high bits for remaining padding elements.
378  Register AllPadReg;
379 
380  // Build merges to the LCM type to cover the original result type.
381  for (int I = 0; I != NumParts; ++I) {
382  bool AllMergePartsArePadding = true;
383 
384  // Build the requested merges to the requested type.
385  for (int J = 0; J != NumSubParts; ++J) {
386  int Idx = I * NumSubParts + J;
387  if (Idx >= NumOrigSrc) {
388  SubMerge[J] = PadReg;
389  continue;
390  }
391 
392  SubMerge[J] = VRegs[Idx];
393 
394  // There are meaningful bits here we can't reuse later.
395  AllMergePartsArePadding = false;
396  }
397 
398  // If we've filled up a complete piece with padding bits, we can directly
399  // emit the natural sized constant if applicable, rather than a merge of
400  // smaller constants.
401  if (AllMergePartsArePadding && !AllPadReg) {
402  if (PadStrategy == TargetOpcode::G_ANYEXT)
403  AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
404  else if (PadStrategy == TargetOpcode::G_ZEXT)
405  AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
406 
407  // If this is a sign extension, we can't materialize a trivial constant
408  // with the right type and have to produce a merge.
409  }
410 
411  if (AllPadReg) {
412  // Avoid creating additional instructions if we're just adding additional
413  // copies of padding bits.
414  Remerge[I] = AllPadReg;
415  continue;
416  }
417 
418  if (NumSubParts == 1)
419  Remerge[I] = SubMerge[0];
420  else
421  Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
422 
423  // In the sign extend padding case, re-use the first all-signbit merge.
424  if (AllMergePartsArePadding && !AllPadReg)
425  AllPadReg = Remerge[I];
426  }
427 
428  VRegs = std::move(Remerge);
429  return LCMTy;
430 }
431 
432 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
433  ArrayRef<Register> RemergeRegs) {
434  LLT DstTy = MRI.getType(DstReg);
435 
436  // Create the merge to the widened source, and extract the relevant bits into
437  // the result.
438 
439  if (DstTy == LCMTy) {
440  MIRBuilder.buildMerge(DstReg, RemergeRegs);
441  return;
442  }
443 
444  auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
445  if (DstTy.isScalar() && LCMTy.isScalar()) {
446  MIRBuilder.buildTrunc(DstReg, Remerge);
447  return;
448  }
449 
450  if (LCMTy.isVector()) {
451  unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
452  SmallVector<Register, 8> UnmergeDefs(NumDefs);
453  UnmergeDefs[0] = DstReg;
454  for (unsigned I = 1; I != NumDefs; ++I)
455  UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
456 
457  MIRBuilder.buildUnmerge(UnmergeDefs,
458  MIRBuilder.buildMerge(LCMTy, RemergeRegs));
459  return;
460  }
461 
462  llvm_unreachable("unhandled case");
463 }
464 
465 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
466 #define RTLIBCASE_INT(LibcallPrefix) \
467  do { \
468  switch (Size) { \
469  case 32: \
470  return RTLIB::LibcallPrefix##32; \
471  case 64: \
472  return RTLIB::LibcallPrefix##64; \
473  case 128: \
474  return RTLIB::LibcallPrefix##128; \
475  default: \
476  llvm_unreachable("unexpected size"); \
477  } \
478  } while (0)
479 
480 #define RTLIBCASE(LibcallPrefix) \
481  do { \
482  switch (Size) { \
483  case 32: \
484  return RTLIB::LibcallPrefix##32; \
485  case 64: \
486  return RTLIB::LibcallPrefix##64; \
487  case 80: \
488  return RTLIB::LibcallPrefix##80; \
489  case 128: \
490  return RTLIB::LibcallPrefix##128; \
491  default: \
492  llvm_unreachable("unexpected size"); \
493  } \
494  } while (0)
495 
496  switch (Opcode) {
497  case TargetOpcode::G_SDIV:
498  RTLIBCASE_INT(SDIV_I);
499  case TargetOpcode::G_UDIV:
500  RTLIBCASE_INT(UDIV_I);
501  case TargetOpcode::G_SREM:
502  RTLIBCASE_INT(SREM_I);
503  case TargetOpcode::G_UREM:
504  RTLIBCASE_INT(UREM_I);
505  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
506  RTLIBCASE_INT(CTLZ_I);
507  case TargetOpcode::G_FADD:
508  RTLIBCASE(ADD_F);
509  case TargetOpcode::G_FSUB:
510  RTLIBCASE(SUB_F);
511  case TargetOpcode::G_FMUL:
512  RTLIBCASE(MUL_F);
513  case TargetOpcode::G_FDIV:
514  RTLIBCASE(DIV_F);
515  case TargetOpcode::G_FEXP:
516  RTLIBCASE(EXP_F);
517  case TargetOpcode::G_FEXP2:
518  RTLIBCASE(EXP2_F);
519  case TargetOpcode::G_FREM:
520  RTLIBCASE(REM_F);
521  case TargetOpcode::G_FPOW:
522  RTLIBCASE(POW_F);
523  case TargetOpcode::G_FMA:
524  RTLIBCASE(FMA_F);
525  case TargetOpcode::G_FSIN:
526  RTLIBCASE(SIN_F);
527  case TargetOpcode::G_FCOS:
528  RTLIBCASE(COS_F);
529  case TargetOpcode::G_FLOG10:
530  RTLIBCASE(LOG10_F);
531  case TargetOpcode::G_FLOG:
532  RTLIBCASE(LOG_F);
533  case TargetOpcode::G_FLOG2:
534  RTLIBCASE(LOG2_F);
535  case TargetOpcode::G_FCEIL:
536  RTLIBCASE(CEIL_F);
537  case TargetOpcode::G_FFLOOR:
538  RTLIBCASE(FLOOR_F);
539  case TargetOpcode::G_FMINNUM:
540  RTLIBCASE(FMIN_F);
541  case TargetOpcode::G_FMAXNUM:
542  RTLIBCASE(FMAX_F);
543  case TargetOpcode::G_FSQRT:
544  RTLIBCASE(SQRT_F);
545  case TargetOpcode::G_FRINT:
546  RTLIBCASE(RINT_F);
547  case TargetOpcode::G_FNEARBYINT:
548  RTLIBCASE(NEARBYINT_F);
549  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
550  RTLIBCASE(ROUNDEVEN_F);
551  }
552  llvm_unreachable("Unknown libcall function");
553 }
554 
555 /// True if an instruction is in tail position in its caller. Intended for
556 /// legalizing libcalls as tail calls when possible.
558  const TargetInstrInfo &TII,
560  MachineBasicBlock &MBB = *MI.getParent();
561  const Function &F = MBB.getParent()->getFunction();
562 
563  // Conservatively require the attributes of the call to match those of
564  // the return. Ignore NoAlias and NonNull because they don't affect the
565  // call sequence.
566  AttributeList CallerAttrs = F.getAttributes();
567  if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
568  .removeAttribute(Attribute::NoAlias)
569  .removeAttribute(Attribute::NonNull)
570  .hasAttributes())
571  return false;
572 
573  // It's not safe to eliminate the sign / zero extension of the return value.
574  if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
575  CallerAttrs.hasRetAttr(Attribute::SExt))
576  return false;
577 
578  // Only tail call if the following instruction is a standard return or if we
579  // have a `thisreturn` callee, and a sequence like:
580  //
581  // G_MEMCPY %0, %1, %2
582  // $x0 = COPY %0
583  // RET_ReallyLR implicit $x0
584  auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
585  if (Next != MBB.instr_end() && Next->isCopy()) {
586  switch (MI.getOpcode()) {
587  default:
588  llvm_unreachable("unsupported opcode");
589  case TargetOpcode::G_BZERO:
590  return false;
591  case TargetOpcode::G_MEMCPY:
592  case TargetOpcode::G_MEMMOVE:
593  case TargetOpcode::G_MEMSET:
594  break;
595  }
596 
597  Register VReg = MI.getOperand(0).getReg();
598  if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
599  return false;
600 
601  Register PReg = Next->getOperand(0).getReg();
602  if (!PReg.isPhysical())
603  return false;
604 
605  auto Ret = next_nodbg(Next, MBB.instr_end());
606  if (Ret == MBB.instr_end() || !Ret->isReturn())
607  return false;
608 
609  if (Ret->getNumImplicitOperands() != 1)
610  return false;
611 
612  if (PReg != Ret->getOperand(0).getReg())
613  return false;
614 
615  // Skip over the COPY that we just validated.
616  Next = Ret;
617  }
618 
619  if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
620  return false;
621 
622  return true;
623 }
624 
626 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
627  const CallLowering::ArgInfo &Result,
629  const CallingConv::ID CC) {
630  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
631 
633  Info.CallConv = CC;
635  Info.OrigRet = Result;
636  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
637  if (!CLI.lowerCall(MIRBuilder, Info))
639 
641 }
642 
645  const CallLowering::ArgInfo &Result,
647  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
648  const char *Name = TLI.getLibcallName(Libcall);
649  const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
650  return createLibcall(MIRBuilder, Name, Result, Args, CC);
651 }
652 
653 // Useful for libcalls where all operands have the same type.
656  Type *OpType) {
657  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
658 
659  // FIXME: What does the original arg index mean here?
661  for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
662  Args.push_back({MO.getReg(), OpType, 0});
663  return createLibcall(MIRBuilder, Libcall,
664  {MI.getOperand(0).getReg(), OpType, 0}, Args);
665 }
666 
669  MachineInstr &MI, LostDebugLocObserver &LocObserver) {
670  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
671 
673  // Add all the args, except for the last which is an imm denoting 'tail'.
674  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
675  Register Reg = MI.getOperand(i).getReg();
676 
677  // Need derive an IR type for call lowering.
678  LLT OpLLT = MRI.getType(Reg);
679  Type *OpTy = nullptr;
680  if (OpLLT.isPointer())
681  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
682  else
683  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
684  Args.push_back({Reg, OpTy, 0});
685  }
686 
687  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
688  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
689  RTLIB::Libcall RTLibcall;
690  unsigned Opc = MI.getOpcode();
691  switch (Opc) {
692  case TargetOpcode::G_BZERO:
693  RTLibcall = RTLIB::BZERO;
694  break;
695  case TargetOpcode::G_MEMCPY:
696  RTLibcall = RTLIB::MEMCPY;
697  Args[0].Flags[0].setReturned();
698  break;
699  case TargetOpcode::G_MEMMOVE:
700  RTLibcall = RTLIB::MEMMOVE;
701  Args[0].Flags[0].setReturned();
702  break;
703  case TargetOpcode::G_MEMSET:
704  RTLibcall = RTLIB::MEMSET;
705  Args[0].Flags[0].setReturned();
706  break;
707  default:
708  llvm_unreachable("unsupported opcode");
709  }
710  const char *Name = TLI.getLibcallName(RTLibcall);
711 
712  // Unsupported libcall on the target.
713  if (!Name) {
714  LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
715  << MIRBuilder.getTII().getName(Opc) << "\n");
717  }
718 
720  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
722  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
723  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
724  isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
725 
726  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
727  if (!CLI.lowerCall(MIRBuilder, Info))
729 
730  if (Info.LoweredTailCall) {
731  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
732 
733  // Check debug locations before removing the return.
734  LocObserver.checkpoint(true);
735 
736  // We must have a return following the call (or debug insts) to get past
737  // isLibCallInTailPosition.
738  do {
739  MachineInstr *Next = MI.getNextNode();
740  assert(Next &&
741  (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
742  "Expected instr following MI to be return or debug inst?");
743  // We lowered a tail call, so the call is now the return from the block.
744  // Delete the old return.
745  Next->eraseFromParent();
746  } while (MI.getNextNode());
747 
748  // We expect to lose the debug location from the return.
749  LocObserver.checkpoint(false);
750  }
751 
753 }
754 
755 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
756  Type *FromType) {
757  auto ToMVT = MVT::getVT(ToType);
758  auto FromMVT = MVT::getVT(FromType);
759 
760  switch (Opcode) {
761  case TargetOpcode::G_FPEXT:
762  return RTLIB::getFPEXT(FromMVT, ToMVT);
763  case TargetOpcode::G_FPTRUNC:
764  return RTLIB::getFPROUND(FromMVT, ToMVT);
765  case TargetOpcode::G_FPTOSI:
766  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
767  case TargetOpcode::G_FPTOUI:
768  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
769  case TargetOpcode::G_SITOFP:
770  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
771  case TargetOpcode::G_UITOFP:
772  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
773  }
774  llvm_unreachable("Unsupported libcall function");
775 }
776 
779  Type *FromType) {
780  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
781  return createLibcall(MIRBuilder, Libcall,
782  {MI.getOperand(0).getReg(), ToType, 0},
783  {{MI.getOperand(1).getReg(), FromType, 0}});
784 }
785 
788  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
789  unsigned Size = LLTy.getSizeInBits();
790  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
791 
792  switch (MI.getOpcode()) {
793  default:
794  return UnableToLegalize;
795  case TargetOpcode::G_SDIV:
796  case TargetOpcode::G_UDIV:
797  case TargetOpcode::G_SREM:
798  case TargetOpcode::G_UREM:
799  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
800  Type *HLTy = IntegerType::get(Ctx, Size);
801  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
802  if (Status != Legalized)
803  return Status;
804  break;
805  }
806  case TargetOpcode::G_FADD:
807  case TargetOpcode::G_FSUB:
808  case TargetOpcode::G_FMUL:
809  case TargetOpcode::G_FDIV:
810  case TargetOpcode::G_FMA:
811  case TargetOpcode::G_FPOW:
812  case TargetOpcode::G_FREM:
813  case TargetOpcode::G_FCOS:
814  case TargetOpcode::G_FSIN:
815  case TargetOpcode::G_FLOG10:
816  case TargetOpcode::G_FLOG:
817  case TargetOpcode::G_FLOG2:
818  case TargetOpcode::G_FEXP:
819  case TargetOpcode::G_FEXP2:
820  case TargetOpcode::G_FCEIL:
821  case TargetOpcode::G_FFLOOR:
822  case TargetOpcode::G_FMINNUM:
823  case TargetOpcode::G_FMAXNUM:
824  case TargetOpcode::G_FSQRT:
825  case TargetOpcode::G_FRINT:
826  case TargetOpcode::G_FNEARBYINT:
827  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
828  Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
829  if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
830  LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
831  return UnableToLegalize;
832  }
833  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
834  if (Status != Legalized)
835  return Status;
836  break;
837  }
838  case TargetOpcode::G_FPEXT:
839  case TargetOpcode::G_FPTRUNC: {
840  Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
841  Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
842  if (!FromTy || !ToTy)
843  return UnableToLegalize;
845  if (Status != Legalized)
846  return Status;
847  break;
848  }
849  case TargetOpcode::G_FPTOSI:
850  case TargetOpcode::G_FPTOUI: {
851  // FIXME: Support other types
852  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
853  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
854  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
855  return UnableToLegalize;
857  MI, MIRBuilder,
858  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
859  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
860  if (Status != Legalized)
861  return Status;
862  break;
863  }
864  case TargetOpcode::G_SITOFP:
865  case TargetOpcode::G_UITOFP: {
866  // FIXME: Support other types
867  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
868  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
869  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
870  return UnableToLegalize;
872  MI, MIRBuilder,
873  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
874  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
875  if (Status != Legalized)
876  return Status;
877  break;
878  }
879  case TargetOpcode::G_BZERO:
880  case TargetOpcode::G_MEMCPY:
881  case TargetOpcode::G_MEMMOVE:
882  case TargetOpcode::G_MEMSET: {
883  LegalizeResult Result =
884  createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
885  if (Result != Legalized)
886  return Result;
887  MI.eraseFromParent();
888  return Result;
889  }
890  }
891 
892  MI.eraseFromParent();
893  return Legalized;
894 }
895 
897  unsigned TypeIdx,
898  LLT NarrowTy) {
899  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
900  uint64_t NarrowSize = NarrowTy.getSizeInBits();
901 
902  switch (MI.getOpcode()) {
903  default:
904  return UnableToLegalize;
905  case TargetOpcode::G_IMPLICIT_DEF: {
906  Register DstReg = MI.getOperand(0).getReg();
907  LLT DstTy = MRI.getType(DstReg);
908 
909  // If SizeOp0 is not an exact multiple of NarrowSize, emit
910  // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
911  // FIXME: Although this would also be legal for the general case, it causes
912  // a lot of regressions in the emitted code (superfluous COPYs, artifact
913  // combines not being hit). This seems to be a problem related to the
914  // artifact combiner.
915  if (SizeOp0 % NarrowSize != 0) {
916  LLT ImplicitTy = NarrowTy;
917  if (DstTy.isVector())
918  ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
919 
920  Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
921  MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
922 
923  MI.eraseFromParent();
924  return Legalized;
925  }
926 
927  int NumParts = SizeOp0 / NarrowSize;
928 
929  SmallVector<Register, 2> DstRegs;
930  for (int i = 0; i < NumParts; ++i)
931  DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
932 
933  if (DstTy.isVector())
934  MIRBuilder.buildBuildVector(DstReg, DstRegs);
935  else
936  MIRBuilder.buildMerge(DstReg, DstRegs);
937  MI.eraseFromParent();
938  return Legalized;
939  }
940  case TargetOpcode::G_CONSTANT: {
941  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
942  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
943  unsigned TotalSize = Ty.getSizeInBits();
944  unsigned NarrowSize = NarrowTy.getSizeInBits();
945  int NumParts = TotalSize / NarrowSize;
946 
947  SmallVector<Register, 4> PartRegs;
948  for (int I = 0; I != NumParts; ++I) {
949  unsigned Offset = I * NarrowSize;
950  auto K = MIRBuilder.buildConstant(NarrowTy,
951  Val.lshr(Offset).trunc(NarrowSize));
952  PartRegs.push_back(K.getReg(0));
953  }
954 
955  LLT LeftoverTy;
956  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
957  SmallVector<Register, 1> LeftoverRegs;
958  if (LeftoverBits != 0) {
959  LeftoverTy = LLT::scalar(LeftoverBits);
960  auto K = MIRBuilder.buildConstant(
961  LeftoverTy,
962  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
963  LeftoverRegs.push_back(K.getReg(0));
964  }
965 
966  insertParts(MI.getOperand(0).getReg(),
967  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
968 
969  MI.eraseFromParent();
970  return Legalized;
971  }
972  case TargetOpcode::G_SEXT:
973  case TargetOpcode::G_ZEXT:
974  case TargetOpcode::G_ANYEXT:
975  return narrowScalarExt(MI, TypeIdx, NarrowTy);
976  case TargetOpcode::G_TRUNC: {
977  if (TypeIdx != 1)
978  return UnableToLegalize;
979 
980  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
981  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
982  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
983  return UnableToLegalize;
984  }
985 
986  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
987  MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
988  MI.eraseFromParent();
989  return Legalized;
990  }
991 
992  case TargetOpcode::G_FREEZE: {
993  if (TypeIdx != 0)
994  return UnableToLegalize;
995 
996  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
997  // Should widen scalar first
998  if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
999  return UnableToLegalize;
1000 
1001  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1003  for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1004  Parts.push_back(
1005  MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1006  }
1007 
1008  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
1009  MI.eraseFromParent();
1010  return Legalized;
1011  }
1012  case TargetOpcode::G_ADD:
1013  case TargetOpcode::G_SUB:
1014  case TargetOpcode::G_SADDO:
1015  case TargetOpcode::G_SSUBO:
1016  case TargetOpcode::G_SADDE:
1017  case TargetOpcode::G_SSUBE:
1018  case TargetOpcode::G_UADDO:
1019  case TargetOpcode::G_USUBO:
1020  case TargetOpcode::G_UADDE:
1021  case TargetOpcode::G_USUBE:
1022  return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1023  case TargetOpcode::G_MUL:
1024  case TargetOpcode::G_UMULH:
1025  return narrowScalarMul(MI, NarrowTy);
1026  case TargetOpcode::G_EXTRACT:
1027  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1028  case TargetOpcode::G_INSERT:
1029  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1030  case TargetOpcode::G_LOAD: {
1031  auto &LoadMI = cast<GLoad>(MI);
1032  Register DstReg = LoadMI.getDstReg();
1033  LLT DstTy = MRI.getType(DstReg);
1034  if (DstTy.isVector())
1035  return UnableToLegalize;
1036 
1037  if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
1038  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1039  MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1040  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1041  LoadMI.eraseFromParent();
1042  return Legalized;
1043  }
1044 
1045  return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1046  }
1047  case TargetOpcode::G_ZEXTLOAD:
1048  case TargetOpcode::G_SEXTLOAD: {
1049  auto &LoadMI = cast<GExtLoad>(MI);
1050  Register DstReg = LoadMI.getDstReg();
1051  Register PtrReg = LoadMI.getPointerReg();
1052 
1053  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1054  auto &MMO = LoadMI.getMMO();
1055  unsigned MemSize = MMO.getSizeInBits();
1056 
1057  if (MemSize == NarrowSize) {
1058  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1059  } else if (MemSize < NarrowSize) {
1060  MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1061  } else if (MemSize > NarrowSize) {
1062  // FIXME: Need to split the load.
1063  return UnableToLegalize;
1064  }
1065 
1066  if (isa<GZExtLoad>(LoadMI))
1067  MIRBuilder.buildZExt(DstReg, TmpReg);
1068  else
1069  MIRBuilder.buildSExt(DstReg, TmpReg);
1070 
1071  LoadMI.eraseFromParent();
1072  return Legalized;
1073  }
1074  case TargetOpcode::G_STORE: {
1075  auto &StoreMI = cast<GStore>(MI);
1076 
1077  Register SrcReg = StoreMI.getValueReg();
1078  LLT SrcTy = MRI.getType(SrcReg);
1079  if (SrcTy.isVector())
1080  return UnableToLegalize;
1081 
1082  int NumParts = SizeOp0 / NarrowSize;
1083  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1084  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1085  if (SrcTy.isVector() && LeftoverBits != 0)
1086  return UnableToLegalize;
1087 
1088  if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
1089  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1090  MIRBuilder.buildTrunc(TmpReg, SrcReg);
1091  MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1092  StoreMI.eraseFromParent();
1093  return Legalized;
1094  }
1095 
1096  return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1097  }
1098  case TargetOpcode::G_SELECT:
1099  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1100  case TargetOpcode::G_AND:
1101  case TargetOpcode::G_OR:
1102  case TargetOpcode::G_XOR: {
1103  // Legalize bitwise operation:
1104  // A = BinOp<Ty> B, C
1105  // into:
1106  // B1, ..., BN = G_UNMERGE_VALUES B
1107  // C1, ..., CN = G_UNMERGE_VALUES C
1108  // A1 = BinOp<Ty/N> B1, C2
1109  // ...
1110  // AN = BinOp<Ty/N> BN, CN
1111  // A = G_MERGE_VALUES A1, ..., AN
1112  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1113  }
1114  case TargetOpcode::G_SHL:
1115  case TargetOpcode::G_LSHR:
1116  case TargetOpcode::G_ASHR:
1117  return narrowScalarShift(MI, TypeIdx, NarrowTy);
1118  case TargetOpcode::G_CTLZ:
1119  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1120  case TargetOpcode::G_CTTZ:
1121  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1122  case TargetOpcode::G_CTPOP:
1123  if (TypeIdx == 1)
1124  switch (MI.getOpcode()) {
1125  case TargetOpcode::G_CTLZ:
1126  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1127  return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1128  case TargetOpcode::G_CTTZ:
1129  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1130  return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1131  case TargetOpcode::G_CTPOP:
1132  return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1133  default:
1134  return UnableToLegalize;
1135  }
1136 
1138  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1140  return Legalized;
1141  case TargetOpcode::G_INTTOPTR:
1142  if (TypeIdx != 1)
1143  return UnableToLegalize;
1144 
1146  narrowScalarSrc(MI, NarrowTy, 1);
1148  return Legalized;
1149  case TargetOpcode::G_PTRTOINT:
1150  if (TypeIdx != 0)
1151  return UnableToLegalize;
1152 
1154  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1156  return Legalized;
1157  case TargetOpcode::G_PHI: {
1158  // FIXME: add support for when SizeOp0 isn't an exact multiple of
1159  // NarrowSize.
1160  if (SizeOp0 % NarrowSize != 0)
1161  return UnableToLegalize;
1162 
1163  unsigned NumParts = SizeOp0 / NarrowSize;
1164  SmallVector<Register, 2> DstRegs(NumParts);
1165  SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1167  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1168  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1169  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1170  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1171  SrcRegs[i / 2]);
1172  }
1173  MachineBasicBlock &MBB = *MI.getParent();
1175  for (unsigned i = 0; i < NumParts; ++i) {
1176  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1177  MachineInstrBuilder MIB =
1178  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1179  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1180  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1181  }
1183  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1185  MI.eraseFromParent();
1186  return Legalized;
1187  }
1188  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1189  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1190  if (TypeIdx != 2)
1191  return UnableToLegalize;
1192 
1193  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1195  narrowScalarSrc(MI, NarrowTy, OpIdx);
1197  return Legalized;
1198  }
1199  case TargetOpcode::G_ICMP: {
1200  Register LHS = MI.getOperand(2).getReg();
1201  LLT SrcTy = MRI.getType(LHS);
1202  uint64_t SrcSize = SrcTy.getSizeInBits();
1203  CmpInst::Predicate Pred =
1204  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1205 
1206  // TODO: Handle the non-equality case for weird sizes.
1207  if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1208  return UnableToLegalize;
1209 
1210  LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1211  SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1212  if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1213  LHSLeftoverRegs))
1214  return UnableToLegalize;
1215 
1216  LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1217  SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1218  if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1219  RHSPartRegs, RHSLeftoverRegs))
1220  return UnableToLegalize;
1221 
1222  // We now have the LHS and RHS of the compare split into narrow-type
1223  // registers, plus potentially some leftover type.
1224  Register Dst = MI.getOperand(0).getReg();
1225  LLT ResTy = MRI.getType(Dst);
1226  if (ICmpInst::isEquality(Pred)) {
1227  // For each part on the LHS and RHS, keep track of the result of XOR-ing
1228  // them together. For each equal part, the result should be all 0s. For
1229  // each non-equal part, we'll get at least one 1.
1230  auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1232  for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1233  auto LHS = std::get<0>(LHSAndRHS);
1234  auto RHS = std::get<1>(LHSAndRHS);
1235  auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1236  Xors.push_back(Xor);
1237  }
1238 
1239  // Build a G_XOR for each leftover register. Each G_XOR must be widened
1240  // to the desired narrow type so that we can OR them together later.
1241  SmallVector<Register, 4> WidenedXors;
1242  for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1243  auto LHS = std::get<0>(LHSAndRHS);
1244  auto RHS = std::get<1>(LHSAndRHS);
1245  auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1246  LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1247  buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1248  /* PadStrategy = */ TargetOpcode::G_ZEXT);
1249  Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1250  }
1251 
1252  // Now, for each part we broke up, we know if they are equal/not equal
1253  // based off the G_XOR. We can OR these all together and compare against
1254  // 0 to get the result.
1255  assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1256  auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1257  for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1258  Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1259  MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1260  } else {
1261  // TODO: Handle non-power-of-two types.
1262  assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1263  assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1264  Register LHSL = LHSPartRegs[0];
1265  Register LHSH = LHSPartRegs[1];
1266  Register RHSL = RHSPartRegs[0];
1267  Register RHSH = RHSPartRegs[1];
1268  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1269  MachineInstrBuilder CmpHEQ =
1270  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1272  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1273  MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1274  }
1275  MI.eraseFromParent();
1276  return Legalized;
1277  }
1278  case TargetOpcode::G_SEXT_INREG: {
1279  if (TypeIdx != 0)
1280  return UnableToLegalize;
1281 
1282  int64_t SizeInBits = MI.getOperand(2).getImm();
1283 
1284  // So long as the new type has more bits than the bits we're extending we
1285  // don't need to break it apart.
1286  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1288  // We don't lose any non-extension bits by truncating the src and
1289  // sign-extending the dst.
1290  MachineOperand &MO1 = MI.getOperand(1);
1291  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1292  MO1.setReg(TruncMIB.getReg(0));
1293 
1294  MachineOperand &MO2 = MI.getOperand(0);
1295  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1297  MIRBuilder.buildSExt(MO2, DstExt);
1298  MO2.setReg(DstExt);
1300  return Legalized;
1301  }
1302 
1303  // Break it apart. Components below the extension point are unmodified. The
1304  // component containing the extension point becomes a narrower SEXT_INREG.
1305  // Components above it are ashr'd from the component containing the
1306  // extension point.
1307  if (SizeOp0 % NarrowSize != 0)
1308  return UnableToLegalize;
1309  int NumParts = SizeOp0 / NarrowSize;
1310 
1311  // List the registers where the destination will be scattered.
1312  SmallVector<Register, 2> DstRegs;
1313  // List the registers where the source will be split.
1314  SmallVector<Register, 2> SrcRegs;
1315 
1316  // Create all the temporary registers.
1317  for (int i = 0; i < NumParts; ++i) {
1318  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1319 
1320  SrcRegs.push_back(SrcReg);
1321  }
1322 
1323  // Explode the big arguments into smaller chunks.
1324  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1325 
1326  Register AshrCstReg =
1327  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1328  .getReg(0);
1329  Register FullExtensionReg = 0;
1330  Register PartialExtensionReg = 0;
1331 
1332  // Do the operation on each small part.
1333  for (int i = 0; i < NumParts; ++i) {
1334  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1335  DstRegs.push_back(SrcRegs[i]);
1336  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1337  assert(PartialExtensionReg &&
1338  "Expected to visit partial extension before full");
1339  if (FullExtensionReg) {
1340  DstRegs.push_back(FullExtensionReg);
1341  continue;
1342  }
1343  DstRegs.push_back(
1344  MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1345  .getReg(0));
1346  FullExtensionReg = DstRegs.back();
1347  } else {
1348  DstRegs.push_back(
1349  MIRBuilder
1350  .buildInstr(
1351  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1352  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1353  .getReg(0));
1354  PartialExtensionReg = DstRegs.back();
1355  }
1356  }
1357 
1358  // Gather the destination registers into the final destination.
1359  Register DstReg = MI.getOperand(0).getReg();
1360  MIRBuilder.buildMerge(DstReg, DstRegs);
1361  MI.eraseFromParent();
1362  return Legalized;
1363  }
1364  case TargetOpcode::G_BSWAP:
1365  case TargetOpcode::G_BITREVERSE: {
1366  if (SizeOp0 % NarrowSize != 0)
1367  return UnableToLegalize;
1368 
1370  SmallVector<Register, 2> SrcRegs, DstRegs;
1371  unsigned NumParts = SizeOp0 / NarrowSize;
1372  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1373 
1374  for (unsigned i = 0; i < NumParts; ++i) {
1375  auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1376  {SrcRegs[NumParts - 1 - i]});
1377  DstRegs.push_back(DstPart.getReg(0));
1378  }
1379 
1380  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1381 
1383  MI.eraseFromParent();
1384  return Legalized;
1385  }
1386  case TargetOpcode::G_PTR_ADD:
1387  case TargetOpcode::G_PTRMASK: {
1388  if (TypeIdx != 1)
1389  return UnableToLegalize;
1391  narrowScalarSrc(MI, NarrowTy, 2);
1393  return Legalized;
1394  }
1395  case TargetOpcode::G_FPTOUI:
1396  case TargetOpcode::G_FPTOSI:
1397  return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1398  case TargetOpcode::G_FPEXT:
1399  if (TypeIdx != 0)
1400  return UnableToLegalize;
1402  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1404  return Legalized;
1405  }
1406 }
1407 
1409  LLT Ty = MRI.getType(Val);
1410  if (Ty.isScalar())
1411  return Val;
1412 
1414  LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1415  if (Ty.isPointer()) {
1416  if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1417  return Register();
1418  return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1419  }
1420 
1421  Register NewVal = Val;
1422 
1423  assert(Ty.isVector());
1424  LLT EltTy = Ty.getElementType();
1425  if (EltTy.isPointer())
1426  NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1427  return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1428 }
1429 
1431  unsigned OpIdx, unsigned ExtOpcode) {
1432  MachineOperand &MO = MI.getOperand(OpIdx);
1433  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1434  MO.setReg(ExtB.getReg(0));
1435 }
1436 
1438  unsigned OpIdx) {
1439  MachineOperand &MO = MI.getOperand(OpIdx);
1440  auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1441  MO.setReg(ExtB.getReg(0));
1442 }
1443 
1445  unsigned OpIdx, unsigned TruncOpcode) {
1446  MachineOperand &MO = MI.getOperand(OpIdx);
1447  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1449  MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1450  MO.setReg(DstExt);
1451 }
1452 
1454  unsigned OpIdx, unsigned ExtOpcode) {
1455  MachineOperand &MO = MI.getOperand(OpIdx);
1456  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1458  MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1459  MO.setReg(DstTrunc);
1460 }
1461 
1463  unsigned OpIdx) {
1464  MachineOperand &MO = MI.getOperand(OpIdx);
1466  Register Dst = MO.getReg();
1467  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1468  MO.setReg(DstExt);
1470 }
1471 
1473  unsigned OpIdx) {
1474  MachineOperand &MO = MI.getOperand(OpIdx);
1477 }
1478 
1479 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1480  MachineOperand &Op = MI.getOperand(OpIdx);
1481  Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1482 }
1483 
1484 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1485  MachineOperand &MO = MI.getOperand(OpIdx);
1486  Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1488  MIRBuilder.buildBitcast(MO, CastDst);
1489  MO.setReg(CastDst);
1490 }
1491 
1493 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1494  LLT WideTy) {
1495  if (TypeIdx != 1)
1496  return UnableToLegalize;
1497 
1498  Register DstReg = MI.getOperand(0).getReg();
1499  LLT DstTy = MRI.getType(DstReg);
1500  if (DstTy.isVector())
1501  return UnableToLegalize;
1502 
1503  Register Src1 = MI.getOperand(1).getReg();
1504  LLT SrcTy = MRI.getType(Src1);
1505  const int DstSize = DstTy.getSizeInBits();
1506  const int SrcSize = SrcTy.getSizeInBits();
1507  const int WideSize = WideTy.getSizeInBits();
1508  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1509 
1510  unsigned NumOps = MI.getNumOperands();
1511  unsigned NumSrc = MI.getNumOperands() - 1;
1512  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1513 
1514  if (WideSize >= DstSize) {
1515  // Directly pack the bits in the target type.
1516  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1517 
1518  for (unsigned I = 2; I != NumOps; ++I) {
1519  const unsigned Offset = (I - 1) * PartSize;
1520 
1521  Register SrcReg = MI.getOperand(I).getReg();
1522  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1523 
1524  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1525 
1526  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1527  MRI.createGenericVirtualRegister(WideTy);
1528 
1529  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1530  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1531  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1532  ResultReg = NextResult;
1533  }
1534 
1535  if (WideSize > DstSize)
1536  MIRBuilder.buildTrunc(DstReg, ResultReg);
1537  else if (DstTy.isPointer())
1538  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1539 
1540  MI.eraseFromParent();
1541  return Legalized;
1542  }
1543 
1544  // Unmerge the original values to the GCD type, and recombine to the next
1545  // multiple greater than the original type.
1546  //
1547  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1548  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1549  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1550  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1551  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1552  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1553  // %12:_(s12) = G_MERGE_VALUES %10, %11
1554  //
1555  // Padding with undef if necessary:
1556  //
1557  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1558  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1559  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1560  // %7:_(s2) = G_IMPLICIT_DEF
1561  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1562  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1563  // %10:_(s12) = G_MERGE_VALUES %8, %9
1564 
1565  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1566  LLT GCDTy = LLT::scalar(GCD);
1567 
1569  SmallVector<Register, 8> NewMergeRegs;
1570  SmallVector<Register, 8> Unmerges;
1571  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1572 
1573  // Decompose the original operands if they don't evenly divide.
1574  for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1575  Register SrcReg = MO.getReg();
1576  if (GCD == SrcSize) {
1577  Unmerges.push_back(SrcReg);
1578  } else {
1579  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1580  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1581  Unmerges.push_back(Unmerge.getReg(J));
1582  }
1583  }
1584 
1585  // Pad with undef to the next size that is a multiple of the requested size.
1586  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1587  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1588  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1589  Unmerges.push_back(UndefReg);
1590  }
1591 
1592  const int PartsPerGCD = WideSize / GCD;
1593 
1594  // Build merges of each piece.
1595  ArrayRef<Register> Slicer(Unmerges);
1596  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1597  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1598  NewMergeRegs.push_back(Merge.getReg(0));
1599  }
1600 
1601  // A truncate may be necessary if the requested type doesn't evenly divide the
1602  // original result type.
1603  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1604  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1605  } else {
1606  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1607  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1608  }
1609 
1610  MI.eraseFromParent();
1611  return Legalized;
1612 }
1613 
1615  Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1616  LLT OrigTy = MRI.getType(OrigReg);
1617  LLT LCMTy = getLCMType(WideTy, OrigTy);
1618 
1619  const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1620  const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1621 
1622  Register UnmergeSrc = WideReg;
1623 
1624  // Create a merge to the LCM type, padding with undef
1625  // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1626  // =>
1627  // %1:_(<4 x s32>) = G_FOO
1628  // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1629  // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1630  // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1631  if (NumMergeParts > 1) {
1632  Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1633  SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1634  MergeParts[0] = WideReg;
1635  UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1636  }
1637 
1638  // Unmerge to the original register and pad with dead defs.
1639  SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1640  UnmergeResults[0] = OrigReg;
1641  for (int I = 1; I != NumUnmergeParts; ++I)
1642  UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1643 
1644  MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1645  return WideReg;
1646 }
1647 
1649 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1650  LLT WideTy) {
1651  if (TypeIdx != 0)
1652  return UnableToLegalize;
1653 
1654  int NumDst = MI.getNumOperands() - 1;
1655  Register SrcReg = MI.getOperand(NumDst).getReg();
1656  LLT SrcTy = MRI.getType(SrcReg);
1657  if (SrcTy.isVector())
1658  return UnableToLegalize;
1659 
1660  Register Dst0Reg = MI.getOperand(0).getReg();
1661  LLT DstTy = MRI.getType(Dst0Reg);
1662  if (!DstTy.isScalar())
1663  return UnableToLegalize;
1664 
1665  if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1666  if (SrcTy.isPointer()) {
1668  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1669  LLVM_DEBUG(
1670  dbgs() << "Not casting non-integral address space integer\n");
1671  return UnableToLegalize;
1672  }
1673 
1674  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1675  SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1676  }
1677 
1678  // Widen SrcTy to WideTy. This does not affect the result, but since the
1679  // user requested this size, it is probably better handled than SrcTy and
1680  // should reduce the total number of legalization artifacts
1681  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1682  SrcTy = WideTy;
1683  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1684  }
1685 
1686  // Theres no unmerge type to target. Directly extract the bits from the
1687  // source type
1688  unsigned DstSize = DstTy.getSizeInBits();
1689 
1690  MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1691  for (int I = 1; I != NumDst; ++I) {
1692  auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1693  auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1694  MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1695  }
1696 
1697  MI.eraseFromParent();
1698  return Legalized;
1699  }
1700 
1701  // Extend the source to a wider type.
1702  LLT LCMTy = getLCMType(SrcTy, WideTy);
1703 
1704  Register WideSrc = SrcReg;
1705  if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1706  // TODO: If this is an integral address space, cast to integer and anyext.
1707  if (SrcTy.isPointer()) {
1708  LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1709  return UnableToLegalize;
1710  }
1711 
1712  WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1713  }
1714 
1715  auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1716 
1717  // Create a sequence of unmerges and merges to the original results. Since we
1718  // may have widened the source, we will need to pad the results with dead defs
1719  // to cover the source register.
1720  // e.g. widen s48 to s64:
1721  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1722  //
1723  // =>
1724  // %4:_(s192) = G_ANYEXT %0:_(s96)
1725  // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1726  // ; unpack to GCD type, with extra dead defs
1727  // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1728  // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1729  // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1730  // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1731  // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1732  const LLT GCDTy = getGCDType(WideTy, DstTy);
1733  const int NumUnmerge = Unmerge->getNumOperands() - 1;
1734  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1735 
1736  // Directly unmerge to the destination without going through a GCD type
1737  // if possible
1738  if (PartsPerRemerge == 1) {
1739  const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1740 
1741  for (int I = 0; I != NumUnmerge; ++I) {
1742  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1743 
1744  for (int J = 0; J != PartsPerUnmerge; ++J) {
1745  int Idx = I * PartsPerUnmerge + J;
1746  if (Idx < NumDst)
1747  MIB.addDef(MI.getOperand(Idx).getReg());
1748  else {
1749  // Create dead def for excess components.
1750  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1751  }
1752  }
1753 
1754  MIB.addUse(Unmerge.getReg(I));
1755  }
1756  } else {
1758  for (int J = 0; J != NumUnmerge; ++J)
1759  extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1760 
1761  SmallVector<Register, 8> RemergeParts;
1762  for (int I = 0; I != NumDst; ++I) {
1763  for (int J = 0; J < PartsPerRemerge; ++J) {
1764  const int Idx = I * PartsPerRemerge + J;
1765  RemergeParts.emplace_back(Parts[Idx]);
1766  }
1767 
1768  MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1769  RemergeParts.clear();
1770  }
1771  }
1772 
1773  MI.eraseFromParent();
1774  return Legalized;
1775 }
1776 
1778 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1779  LLT WideTy) {
1780  Register DstReg = MI.getOperand(0).getReg();
1781  Register SrcReg = MI.getOperand(1).getReg();
1782  LLT SrcTy = MRI.getType(SrcReg);
1783 
1784  LLT DstTy = MRI.getType(DstReg);
1785  unsigned Offset = MI.getOperand(2).getImm();
1786 
1787  if (TypeIdx == 0) {
1788  if (SrcTy.isVector() || DstTy.isVector())
1789  return UnableToLegalize;
1790 
1791  SrcOp Src(SrcReg);
1792  if (SrcTy.isPointer()) {
1793  // Extracts from pointers can be handled only if they are really just
1794  // simple integers.
1796  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1797  return UnableToLegalize;
1798 
1799  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1800  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1801  SrcTy = SrcAsIntTy;
1802  }
1803 
1804  if (DstTy.isPointer())
1805  return UnableToLegalize;
1806 
1807  if (Offset == 0) {
1808  // Avoid a shift in the degenerate case.
1809  MIRBuilder.buildTrunc(DstReg,
1810  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1811  MI.eraseFromParent();
1812  return Legalized;
1813  }
1814 
1815  // Do a shift in the source type.
1816  LLT ShiftTy = SrcTy;
1817  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1818  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1819  ShiftTy = WideTy;
1820  }
1821 
1822  auto LShr = MIRBuilder.buildLShr(
1823  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1824  MIRBuilder.buildTrunc(DstReg, LShr);
1825  MI.eraseFromParent();
1826  return Legalized;
1827  }
1828 
1829  if (SrcTy.isScalar()) {
1831  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1833  return Legalized;
1834  }
1835 
1836  if (!SrcTy.isVector())
1837  return UnableToLegalize;
1838 
1839  if (DstTy != SrcTy.getElementType())
1840  return UnableToLegalize;
1841 
1842  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1843  return UnableToLegalize;
1844 
1846  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1847 
1848  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1849  Offset);
1850  widenScalarDst(MI, WideTy.getScalarType(), 0);
1852  return Legalized;
1853 }
1854 
1856 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1857  LLT WideTy) {
1858  if (TypeIdx != 0 || WideTy.isVector())
1859  return UnableToLegalize;
1861  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1862  widenScalarDst(MI, WideTy);
1864  return Legalized;
1865 }
1866 
1868 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1869  LLT WideTy) {
1870  if (TypeIdx == 1)
1871  return UnableToLegalize; // TODO
1872 
1873  unsigned Opcode;
1874  unsigned ExtOpcode;
1875  Optional<Register> CarryIn = None;
1876  switch (MI.getOpcode()) {
1877  default:
1878  llvm_unreachable("Unexpected opcode!");
1879  case TargetOpcode::G_SADDO:
1880  Opcode = TargetOpcode::G_ADD;
1881  ExtOpcode = TargetOpcode::G_SEXT;
1882  break;
1883  case TargetOpcode::G_SSUBO:
1884  Opcode = TargetOpcode::G_SUB;
1885  ExtOpcode = TargetOpcode::G_SEXT;
1886  break;
1887  case TargetOpcode::G_UADDO:
1888  Opcode = TargetOpcode::G_ADD;
1889  ExtOpcode = TargetOpcode::G_ZEXT;
1890  break;
1891  case TargetOpcode::G_USUBO:
1892  Opcode = TargetOpcode::G_SUB;
1893  ExtOpcode = TargetOpcode::G_ZEXT;
1894  break;
1895  case TargetOpcode::G_SADDE:
1896  Opcode = TargetOpcode::G_UADDE;
1897  ExtOpcode = TargetOpcode::G_SEXT;
1898  CarryIn = MI.getOperand(4).getReg();
1899  break;
1900  case TargetOpcode::G_SSUBE:
1901  Opcode = TargetOpcode::G_USUBE;
1902  ExtOpcode = TargetOpcode::G_SEXT;
1903  CarryIn = MI.getOperand(4).getReg();
1904  break;
1905  case TargetOpcode::G_UADDE:
1906  Opcode = TargetOpcode::G_UADDE;
1907  ExtOpcode = TargetOpcode::G_ZEXT;
1908  CarryIn = MI.getOperand(4).getReg();
1909  break;
1910  case TargetOpcode::G_USUBE:
1911  Opcode = TargetOpcode::G_USUBE;
1912  ExtOpcode = TargetOpcode::G_ZEXT;
1913  CarryIn = MI.getOperand(4).getReg();
1914  break;
1915  }
1916 
1917  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1918  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1919  // Do the arithmetic in the larger type.
1920  Register NewOp;
1921  if (CarryIn) {
1922  LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1923  NewOp = MIRBuilder
1924  .buildInstr(Opcode, {WideTy, CarryOutTy},
1925  {LHSExt, RHSExt, *CarryIn})
1926  .getReg(0);
1927  } else {
1928  NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1929  }
1930  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1931  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1932  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1933  // There is no overflow if the ExtOp is the same as NewOp.
1934  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1935  // Now trunc the NewOp to the original result.
1936  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1937  MI.eraseFromParent();
1938  return Legalized;
1939 }
1940 
1942 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1943  LLT WideTy) {
1944  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1945  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1946  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1947  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1948  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1949  // We can convert this to:
1950  // 1. Any extend iN to iM
1951  // 2. SHL by M-N
1952  // 3. [US][ADD|SUB|SHL]SAT
1953  // 4. L/ASHR by M-N
1954  //
1955  // It may be more efficient to lower this to a min and a max operation in
1956  // the higher precision arithmetic if the promoted operation isn't legal,
1957  // but this decision is up to the target's lowering request.
1958  Register DstReg = MI.getOperand(0).getReg();
1959 
1960  unsigned NewBits = WideTy.getScalarSizeInBits();
1961  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1962 
1963  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1964  // must not left shift the RHS to preserve the shift amount.
1965  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1966  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1967  : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1968  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1969  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1970  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1971 
1972  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1973  {ShiftL, ShiftR}, MI.getFlags());
1974 
1975  // Use a shift that will preserve the number of sign bits when the trunc is
1976  // folded away.
1977  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1978  : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1979 
1980  MIRBuilder.buildTrunc(DstReg, Result);
1981  MI.eraseFromParent();
1982  return Legalized;
1983 }
1984 
1986 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1987  LLT WideTy) {
1988  if (TypeIdx == 1)
1989  return UnableToLegalize;
1990 
1991  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1992  Register Result = MI.getOperand(0).getReg();
1993  Register OriginalOverflow = MI.getOperand(1).getReg();
1994  Register LHS = MI.getOperand(2).getReg();
1995  Register RHS = MI.getOperand(3).getReg();
1996  LLT SrcTy = MRI.getType(LHS);
1997  LLT OverflowTy = MRI.getType(OriginalOverflow);
1998  unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1999 
2000  // To determine if the result overflowed in the larger type, we extend the
2001  // input to the larger type, do the multiply (checking if it overflows),
2002  // then also check the high bits of the result to see if overflow happened
2003  // there.
2004  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2005  auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2006  auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2007 
2008  auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
2009  {LeftOperand, RightOperand});
2010  auto Mul = Mulo->getOperand(0);
2011  MIRBuilder.buildTrunc(Result, Mul);
2012 
2013  MachineInstrBuilder ExtResult;
2014  // Overflow occurred if it occurred in the larger type, or if the high part
2015  // of the result does not zero/sign-extend the low part. Check this second
2016  // possibility first.
2017  if (IsSigned) {
2018  // For signed, overflow occurred when the high part does not sign-extend
2019  // the low part.
2020  ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2021  } else {
2022  // Unsigned overflow occurred when the high part does not zero-extend the
2023  // low part.
2024  ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2025  }
2026 
2027  // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2028  // so we don't need to check the overflow result of larger type Mulo.
2029  if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
2030  auto Overflow =
2031  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2032  // Finally check if the multiplication in the larger type itself overflowed.
2033  MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2034  } else {
2035  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2036  }
2037  MI.eraseFromParent();
2038  return Legalized;
2039 }
2040 
2042 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
2043  switch (MI.getOpcode()) {
2044  default:
2045  return UnableToLegalize;
2046  case TargetOpcode::G_ATOMICRMW_XCHG:
2047  case TargetOpcode::G_ATOMICRMW_ADD:
2048  case TargetOpcode::G_ATOMICRMW_SUB:
2049  case TargetOpcode::G_ATOMICRMW_AND:
2050  case TargetOpcode::G_ATOMICRMW_OR:
2051  case TargetOpcode::G_ATOMICRMW_XOR:
2052  case TargetOpcode::G_ATOMICRMW_MIN:
2053  case TargetOpcode::G_ATOMICRMW_MAX:
2054  case TargetOpcode::G_ATOMICRMW_UMIN:
2055  case TargetOpcode::G_ATOMICRMW_UMAX:
2056  assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2058  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2059  widenScalarDst(MI, WideTy, 0);
2061  return Legalized;
2062  case TargetOpcode::G_ATOMIC_CMPXCHG:
2063  assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2065  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2066  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2067  widenScalarDst(MI, WideTy, 0);
2069  return Legalized;
2070  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2071  if (TypeIdx == 0) {
2073  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2074  widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2075  widenScalarDst(MI, WideTy, 0);
2077  return Legalized;
2078  }
2079  assert(TypeIdx == 1 &&
2080  "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2082  widenScalarDst(MI, WideTy, 1);
2084  return Legalized;
2085  case TargetOpcode::G_EXTRACT:
2086  return widenScalarExtract(MI, TypeIdx, WideTy);
2087  case TargetOpcode::G_INSERT:
2088  return widenScalarInsert(MI, TypeIdx, WideTy);
2089  case TargetOpcode::G_MERGE_VALUES:
2090  return widenScalarMergeValues(MI, TypeIdx, WideTy);
2091  case TargetOpcode::G_UNMERGE_VALUES:
2092  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2093  case TargetOpcode::G_SADDO:
2094  case TargetOpcode::G_SSUBO:
2095  case TargetOpcode::G_UADDO:
2096  case TargetOpcode::G_USUBO:
2097  case TargetOpcode::G_SADDE:
2098  case TargetOpcode::G_SSUBE:
2099  case TargetOpcode::G_UADDE:
2100  case TargetOpcode::G_USUBE:
2101  return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2102  case TargetOpcode::G_UMULO:
2103  case TargetOpcode::G_SMULO:
2104  return widenScalarMulo(MI, TypeIdx, WideTy);
2105  case TargetOpcode::G_SADDSAT:
2106  case TargetOpcode::G_SSUBSAT:
2107  case TargetOpcode::G_SSHLSAT:
2108  case TargetOpcode::G_UADDSAT:
2109  case TargetOpcode::G_USUBSAT:
2110  case TargetOpcode::G_USHLSAT:
2111  return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2112  case TargetOpcode::G_CTTZ:
2113  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2114  case TargetOpcode::G_CTLZ:
2115  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2116  case TargetOpcode::G_CTPOP: {
2117  if (TypeIdx == 0) {
2119  widenScalarDst(MI, WideTy, 0);
2121  return Legalized;
2122  }
2123 
2124  Register SrcReg = MI.getOperand(1).getReg();
2125 
2126  // First extend the input.
2127  unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2128  MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2129  ? TargetOpcode::G_ANYEXT
2130  : TargetOpcode::G_ZEXT;
2131  auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2132  LLT CurTy = MRI.getType(SrcReg);
2133  unsigned NewOpc = MI.getOpcode();
2134  if (NewOpc == TargetOpcode::G_CTTZ) {
2135  // The count is the same in the larger type except if the original
2136  // value was zero. This can be handled by setting the bit just off
2137  // the top of the original type.
2138  auto TopBit =
2139  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
2140  MIBSrc = MIRBuilder.buildOr(
2141  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2142  // Now we know the operand is non-zero, use the more relaxed opcode.
2143  NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2144  }
2145 
2146  // Perform the operation at the larger size.
2147  auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2148  // This is already the correct result for CTPOP and CTTZs
2149  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2150  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2151  // The correct result is NewOp - (Difference in widety and current ty).
2152  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2153  MIBNewOp = MIRBuilder.buildSub(
2154  WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2155  }
2156 
2157  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2158  MI.eraseFromParent();
2159  return Legalized;
2160  }
2161  case TargetOpcode::G_BSWAP: {
2163  Register DstReg = MI.getOperand(0).getReg();
2164 
2165  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2166  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2167  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2168  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2169 
2170  MI.getOperand(0).setReg(DstExt);
2171 
2173 
2174  LLT Ty = MRI.getType(DstReg);
2175  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2176  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2177  MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2178 
2179  MIRBuilder.buildTrunc(DstReg, ShrReg);
2181  return Legalized;
2182  }
2183  case TargetOpcode::G_BITREVERSE: {
2185 
2186  Register DstReg = MI.getOperand(0).getReg();
2187  LLT Ty = MRI.getType(DstReg);
2188  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2189 
2190  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2191  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2192  MI.getOperand(0).setReg(DstExt);
2194 
2195  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2196  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2197  MIRBuilder.buildTrunc(DstReg, Shift);
2199  return Legalized;
2200  }
2201  case TargetOpcode::G_FREEZE:
2203  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2204  widenScalarDst(MI, WideTy);
2206  return Legalized;
2207 
2208  case TargetOpcode::G_ABS:
2210  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2211  widenScalarDst(MI, WideTy);
2213  return Legalized;
2214 
2215  case TargetOpcode::G_ADD:
2216  case TargetOpcode::G_AND:
2217  case TargetOpcode::G_MUL:
2218  case TargetOpcode::G_OR:
2219  case TargetOpcode::G_XOR:
2220  case TargetOpcode::G_SUB:
2221  // Perform operation at larger width (any extension is fines here, high bits
2222  // don't affect the result) and then truncate the result back to the
2223  // original type.
2225  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2226  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2227  widenScalarDst(MI, WideTy);
2229  return Legalized;
2230 
2231  case TargetOpcode::G_SBFX:
2232  case TargetOpcode::G_UBFX:
2234 
2235  if (TypeIdx == 0) {
2236  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2237  widenScalarDst(MI, WideTy);
2238  } else {
2239  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2240  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2241  }
2242 
2244  return Legalized;
2245 
2246  case TargetOpcode::G_SHL:
2248 
2249  if (TypeIdx == 0) {
2250  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2251  widenScalarDst(MI, WideTy);
2252  } else {
2253  assert(TypeIdx == 1);
2254  // The "number of bits to shift" operand must preserve its value as an
2255  // unsigned integer:
2256  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2257  }
2258 
2260  return Legalized;
2261 
2262  case TargetOpcode::G_SDIV:
2263  case TargetOpcode::G_SREM:
2264  case TargetOpcode::G_SMIN:
2265  case TargetOpcode::G_SMAX:
2267  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2268  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2269  widenScalarDst(MI, WideTy);
2271  return Legalized;
2272 
2273  case TargetOpcode::G_SDIVREM:
2275  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2276  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2277  widenScalarDst(MI, WideTy);
2278  widenScalarDst(MI, WideTy, 1);
2280  return Legalized;
2281 
2282  case TargetOpcode::G_ASHR:
2283  case TargetOpcode::G_LSHR:
2285 
2286  if (TypeIdx == 0) {
2287  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2288  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2289 
2290  widenScalarSrc(MI, WideTy, 1, CvtOp);
2291  widenScalarDst(MI, WideTy);
2292  } else {
2293  assert(TypeIdx == 1);
2294  // The "number of bits to shift" operand must preserve its value as an
2295  // unsigned integer:
2296  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2297  }
2298 
2300  return Legalized;
2301  case TargetOpcode::G_UDIV:
2302  case TargetOpcode::G_UREM:
2303  case TargetOpcode::G_UMIN:
2304  case TargetOpcode::G_UMAX:
2306  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2307  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2308  widenScalarDst(MI, WideTy);
2310  return Legalized;
2311 
2312  case TargetOpcode::G_UDIVREM:
2314  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2315  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2316  widenScalarDst(MI, WideTy);
2317  widenScalarDst(MI, WideTy, 1);
2319  return Legalized;
2320 
2321  case TargetOpcode::G_SELECT:
2323  if (TypeIdx == 0) {
2324  // Perform operation at larger width (any extension is fine here, high
2325  // bits don't affect the result) and then truncate the result back to the
2326  // original type.
2327  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2328  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2329  widenScalarDst(MI, WideTy);
2330  } else {
2331  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2332  // Explicit extension is required here since high bits affect the result.
2333  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2334  }
2336  return Legalized;
2337 
2338  case TargetOpcode::G_FPTOSI:
2339  case TargetOpcode::G_FPTOUI:
2341 
2342  if (TypeIdx == 0)
2343  widenScalarDst(MI, WideTy);
2344  else
2345  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2346 
2348  return Legalized;
2349  case TargetOpcode::G_SITOFP:
2351 
2352  if (TypeIdx == 0)
2353  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2354  else
2355  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2356 
2358  return Legalized;
2359  case TargetOpcode::G_UITOFP:
2361 
2362  if (TypeIdx == 0)
2363  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2364  else
2365  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2366 
2368  return Legalized;
2369  case TargetOpcode::G_LOAD:
2370  case TargetOpcode::G_SEXTLOAD:
2371  case TargetOpcode::G_ZEXTLOAD:
2373  widenScalarDst(MI, WideTy);
2375  return Legalized;
2376 
2377  case TargetOpcode::G_STORE: {
2378  if (TypeIdx != 0)
2379  return UnableToLegalize;
2380 
2381  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2382  if (!Ty.isScalar())
2383  return UnableToLegalize;
2384 
2386 
2387  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2388  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2389  widenScalarSrc(MI, WideTy, 0, ExtType);
2390 
2392  return Legalized;
2393  }
2394  case TargetOpcode::G_CONSTANT: {
2395  MachineOperand &SrcMO = MI.getOperand(1);
2397  unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2398  MRI.getType(MI.getOperand(0).getReg()));
2399  assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2400  ExtOpc == TargetOpcode::G_ANYEXT) &&
2401  "Illegal Extend");
2402  const APInt &SrcVal = SrcMO.getCImm()->getValue();
2403  const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2404  ? SrcVal.sext(WideTy.getSizeInBits())
2405  : SrcVal.zext(WideTy.getSizeInBits());
2407  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2408 
2409  widenScalarDst(MI, WideTy);
2411  return Legalized;
2412  }
2413  case TargetOpcode::G_FCONSTANT: {
2414  MachineOperand &SrcMO = MI.getOperand(1);
2416  APFloat Val = SrcMO.getFPImm()->getValueAPF();
2417  bool LosesInfo;
2418  switch (WideTy.getSizeInBits()) {
2419  case 32:
2421  &LosesInfo);
2422  break;
2423  case 64:
2425  &LosesInfo);
2426  break;
2427  default:
2428  return UnableToLegalize;
2429  }
2430 
2431  assert(!LosesInfo && "extend should always be lossless");
2432 
2434  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2435 
2436  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2438  return Legalized;
2439  }
2440  case TargetOpcode::G_IMPLICIT_DEF: {
2442  widenScalarDst(MI, WideTy);
2444  return Legalized;
2445  }
2446  case TargetOpcode::G_BRCOND:
2448  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2450  return Legalized;
2451 
2452  case TargetOpcode::G_FCMP:
2454  if (TypeIdx == 0)
2455  widenScalarDst(MI, WideTy);
2456  else {
2457  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2458  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2459  }
2461  return Legalized;
2462 
2463  case TargetOpcode::G_ICMP:
2465  if (TypeIdx == 0)
2466  widenScalarDst(MI, WideTy);
2467  else {
2468  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2469  MI.getOperand(1).getPredicate()))
2470  ? TargetOpcode::G_SEXT
2471  : TargetOpcode::G_ZEXT;
2472  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2473  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2474  }
2476  return Legalized;
2477 
2478  case TargetOpcode::G_PTR_ADD:
2479  assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2481  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2483  return Legalized;
2484 
2485  case TargetOpcode::G_PHI: {
2486  assert(TypeIdx == 0 && "Expecting only Idx 0");
2487 
2489  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2490  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2491  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2492  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2493  }
2494 
2495  MachineBasicBlock &MBB = *MI.getParent();
2497  widenScalarDst(MI, WideTy);
2499  return Legalized;
2500  }
2501  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2502  if (TypeIdx == 0) {
2503  Register VecReg = MI.getOperand(1).getReg();
2504  LLT VecTy = MRI.getType(VecReg);
2506 
2508  MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2509  TargetOpcode::G_ANYEXT);
2510 
2511  widenScalarDst(MI, WideTy, 0);
2513  return Legalized;
2514  }
2515 
2516  if (TypeIdx != 2)
2517  return UnableToLegalize;
2519  // TODO: Probably should be zext
2520  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2522  return Legalized;
2523  }
2524  case TargetOpcode::G_INSERT_VECTOR_ELT: {
2525  if (TypeIdx == 1) {
2527 
2528  Register VecReg = MI.getOperand(1).getReg();
2529  LLT VecTy = MRI.getType(VecReg);
2530  LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2531 
2532  widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2533  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2534  widenScalarDst(MI, WideVecTy, 0);
2536  return Legalized;
2537  }
2538 
2539  if (TypeIdx == 2) {
2541  // TODO: Probably should be zext
2542  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2544  return Legalized;
2545  }
2546 
2547  return UnableToLegalize;
2548  }
2549  case TargetOpcode::G_FADD:
2550  case TargetOpcode::G_FMUL:
2551  case TargetOpcode::G_FSUB:
2552  case TargetOpcode::G_FMA:
2553  case TargetOpcode::G_FMAD:
2554  case TargetOpcode::G_FNEG:
2555  case TargetOpcode::G_FABS:
2556  case TargetOpcode::G_FCANONICALIZE:
2557  case TargetOpcode::G_FMINNUM:
2558  case TargetOpcode::G_FMAXNUM:
2559  case TargetOpcode::G_FMINNUM_IEEE:
2560  case TargetOpcode::G_FMAXNUM_IEEE:
2561  case TargetOpcode::G_FMINIMUM:
2562  case TargetOpcode::G_FMAXIMUM:
2563  case TargetOpcode::G_FDIV:
2564  case TargetOpcode::G_FREM:
2565  case TargetOpcode::G_FCEIL:
2566  case TargetOpcode::G_FFLOOR:
2567  case TargetOpcode::G_FCOS:
2568  case TargetOpcode::G_FSIN:
2569  case TargetOpcode::G_FLOG10:
2570  case TargetOpcode::G_FLOG:
2571  case TargetOpcode::G_FLOG2:
2572  case TargetOpcode::G_FRINT:
2573  case TargetOpcode::G_FNEARBYINT:
2574  case TargetOpcode::G_FSQRT:
2575  case TargetOpcode::G_FEXP:
2576  case TargetOpcode::G_FEXP2:
2577  case TargetOpcode::G_FPOW:
2578  case TargetOpcode::G_INTRINSIC_TRUNC:
2579  case TargetOpcode::G_INTRINSIC_ROUND:
2580  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2581  assert(TypeIdx == 0);
2583 
2584  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2585  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2586 
2587  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2589  return Legalized;
2590  case TargetOpcode::G_FPOWI: {
2591  if (TypeIdx != 0)
2592  return UnableToLegalize;
2594  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2595  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2597  return Legalized;
2598  }
2599  case TargetOpcode::G_INTTOPTR:
2600  if (TypeIdx != 1)
2601  return UnableToLegalize;
2602 
2604  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2606  return Legalized;
2607  case TargetOpcode::G_PTRTOINT:
2608  if (TypeIdx != 0)
2609  return UnableToLegalize;
2610 
2612  widenScalarDst(MI, WideTy, 0);
2614  return Legalized;
2615  case TargetOpcode::G_BUILD_VECTOR: {
2617 
2618  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2619  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2620  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2621 
2622  // Avoid changing the result vector type if the source element type was
2623  // requested.
2624  if (TypeIdx == 1) {
2625  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2626  } else {
2627  widenScalarDst(MI, WideTy, 0);
2628  }
2629 
2631  return Legalized;
2632  }
2633  case TargetOpcode::G_SEXT_INREG:
2634  if (TypeIdx != 0)
2635  return UnableToLegalize;
2636 
2638  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2639  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2641  return Legalized;
2642  case TargetOpcode::G_PTRMASK: {
2643  if (TypeIdx != 1)
2644  return UnableToLegalize;
2646  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2648  return Legalized;
2649  }
2650  }
2651 }
2652 
2654  MachineIRBuilder &B, Register Src, LLT Ty) {
2655  auto Unmerge = B.buildUnmerge(Ty, Src);
2656  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2657  Pieces.push_back(Unmerge.getReg(I));
2658 }
2659 
2662  Register Dst = MI.getOperand(0).getReg();
2663  Register Src = MI.getOperand(1).getReg();
2664  LLT DstTy = MRI.getType(Dst);
2665  LLT SrcTy = MRI.getType(Src);
2666 
2667  if (SrcTy.isVector()) {
2668  LLT SrcEltTy = SrcTy.getElementType();
2669  SmallVector<Register, 8> SrcRegs;
2670 
2671  if (DstTy.isVector()) {
2672  int NumDstElt = DstTy.getNumElements();
2673  int NumSrcElt = SrcTy.getNumElements();
2674 
2675  LLT DstEltTy = DstTy.getElementType();
2676  LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2677  LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2678 
2679  // If there's an element size mismatch, insert intermediate casts to match
2680  // the result element type.
2681  if (NumSrcElt < NumDstElt) { // Source element type is larger.
2682  // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2683  //
2684  // =>
2685  //
2686  // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2687  // %3:_(<2 x s8>) = G_BITCAST %2
2688  // %4:_(<2 x s8>) = G_BITCAST %3
2689  // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2690  DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
2691  SrcPartTy = SrcEltTy;
2692  } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2693  //
2694  // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2695  //
2696  // =>
2697  //
2698  // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2699  // %3:_(s16) = G_BITCAST %2
2700  // %4:_(s16) = G_BITCAST %3
2701  // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2702  SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
2703  DstCastTy = DstEltTy;
2704  }
2705 
2706  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2707  for (Register &SrcReg : SrcRegs)
2708  SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2709  } else
2710  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2711 
2712  MIRBuilder.buildMerge(Dst, SrcRegs);
2713  MI.eraseFromParent();
2714  return Legalized;
2715  }
2716 
2717  if (DstTy.isVector()) {
2718  SmallVector<Register, 8> SrcRegs;
2719  getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2720  MIRBuilder.buildMerge(Dst, SrcRegs);
2721  MI.eraseFromParent();
2722  return Legalized;
2723  }
2724 
2725  return UnableToLegalize;
2726 }
2727 
2728 /// Figure out the bit offset into a register when coercing a vector index for
2729 /// the wide element type. This is only for the case when promoting vector to
2730 /// one with larger elements.
2731 //
2732 ///
2733 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2734 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2736  Register Idx,
2737  unsigned NewEltSize,
2738  unsigned OldEltSize) {
2739  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2740  LLT IdxTy = B.getMRI()->getType(Idx);
2741 
2742  // Now figure out the amount we need to shift to get the target bits.
2743  auto OffsetMask = B.buildConstant(
2744  IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
2745  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2746  return B.buildShl(IdxTy, OffsetIdx,
2747  B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2748 }
2749 
2750 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2751 /// is casting to a vector with a smaller element size, perform multiple element
2752 /// extracts and merge the results. If this is coercing to a vector with larger
2753 /// elements, index the bitcasted vector and extract the target element with bit
2754 /// operations. This is intended to force the indexing in the native register
2755 /// size for architectures that can dynamically index the register file.
2758  LLT CastTy) {
2759  if (TypeIdx != 1)
2760  return UnableToLegalize;
2761 
2762  Register Dst = MI.getOperand(0).getReg();
2763  Register SrcVec = MI.getOperand(1).getReg();
2764  Register Idx = MI.getOperand(2).getReg();
2765  LLT SrcVecTy = MRI.getType(SrcVec);
2766  LLT IdxTy = MRI.getType(Idx);
2767 
2768  LLT SrcEltTy = SrcVecTy.getElementType();
2769  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2770  unsigned OldNumElts = SrcVecTy.getNumElements();
2771 
2772  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2773  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2774 
2775  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2776  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2777  if (NewNumElts > OldNumElts) {
2778  // Decreasing the vector element size
2779  //
2780  // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2781  // =>
2782  // v4i32:castx = bitcast x:v2i64
2783  //
2784  // i64 = bitcast
2785  // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2786  // (i32 (extract_vector_elt castx, (2 * y + 1)))
2787  //
2788  if (NewNumElts % OldNumElts != 0)
2789  return UnableToLegalize;
2790 
2791  // Type of the intermediate result vector.
2792  const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2793  LLT MidTy =
2794  LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
2795 
2796  auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2797 
2798  SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2799  auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2800 
2801  for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2802  auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2803  auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2804  auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2805  NewOps[I] = Elt.getReg(0);
2806  }
2807 
2808  auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2809  MIRBuilder.buildBitcast(Dst, NewVec);
2810  MI.eraseFromParent();
2811  return Legalized;
2812  }
2813 
2814  if (NewNumElts < OldNumElts) {
2815  if (NewEltSize % OldEltSize != 0)
2816  return UnableToLegalize;
2817 
2818  // This only depends on powers of 2 because we use bit tricks to figure out
2819  // the bit offset we need to shift to get the target element. A general
2820  // expansion could emit division/multiply.
2821  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2822  return UnableToLegalize;
2823 
2824  // Increasing the vector element size.
2825  // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2826  //
2827  // =>
2828  //
2829  // %cast = G_BITCAST %vec
2830  // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2831  // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2832  // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2833  // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2834  // %elt_bits = G_LSHR %wide_elt, %offset_bits
2835  // %elt = G_TRUNC %elt_bits
2836 
2837  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2838  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2839 
2840  // Divide to get the index in the wider element type.
2841  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2842 
2843  Register WideElt = CastVec;
2844  if (CastTy.isVector()) {
2845  WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2846  ScaledIdx).getReg(0);
2847  }
2848 
2849  // Compute the bit offset into the register of the target element.
2851  MIRBuilder, Idx, NewEltSize, OldEltSize);
2852 
2853  // Shift the wide element to get the target element.
2854  auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2855  MIRBuilder.buildTrunc(Dst, ExtractedBits);
2856  MI.eraseFromParent();
2857  return Legalized;
2858  }
2859 
2860  return UnableToLegalize;
2861 }
2862 
2863 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2864 /// TargetReg, while preserving other bits in \p TargetReg.
2865 ///
2866 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2868  Register TargetReg, Register InsertReg,
2869  Register OffsetBits) {
2870  LLT TargetTy = B.getMRI()->getType(TargetReg);
2871  LLT InsertTy = B.getMRI()->getType(InsertReg);
2872  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2873  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2874 
2875  // Produce a bitmask of the value to insert
2876  auto EltMask = B.buildConstant(
2877  TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2878  InsertTy.getSizeInBits()));
2879  // Shift it into position
2880  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2881  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2882 
2883  // Clear out the bits in the wide element
2884  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2885 
2886  // The value to insert has all zeros already, so stick it into the masked
2887  // wide element.
2888  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2889 }
2890 
2891 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2892 /// is increasing the element size, perform the indexing in the target element
2893 /// type, and use bit operations to insert at the element position. This is
2894 /// intended for architectures that can dynamically index the register file and
2895 /// want to force indexing in the native register size.
2898  LLT CastTy) {
2899  if (TypeIdx != 0)
2900  return UnableToLegalize;
2901 
2902  Register Dst = MI.getOperand(0).getReg();
2903  Register SrcVec = MI.getOperand(1).getReg();
2904  Register Val = MI.getOperand(2).getReg();
2905  Register Idx = MI.getOperand(3).getReg();
2906 
2907  LLT VecTy = MRI.getType(Dst);
2908  LLT IdxTy = MRI.getType(Idx);
2909 
2910  LLT VecEltTy = VecTy.getElementType();
2911  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2912  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2913  const unsigned OldEltSize = VecEltTy.getSizeInBits();
2914 
2915  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2916  unsigned OldNumElts = VecTy.getNumElements();
2917 
2918  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2919  if (NewNumElts < OldNumElts) {
2920  if (NewEltSize % OldEltSize != 0)
2921  return UnableToLegalize;
2922 
2923  // This only depends on powers of 2 because we use bit tricks to figure out
2924  // the bit offset we need to shift to get the target element. A general
2925  // expansion could emit division/multiply.
2926  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2927  return UnableToLegalize;
2928 
2929  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2930  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2931 
2932  // Divide to get the index in the wider element type.
2933  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2934 
2935  Register ExtractedElt = CastVec;
2936  if (CastTy.isVector()) {
2937  ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2938  ScaledIdx).getReg(0);
2939  }
2940 
2941  // Compute the bit offset into the register of the target element.
2943  MIRBuilder, Idx, NewEltSize, OldEltSize);
2944 
2945  Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2946  Val, OffsetBits);
2947  if (CastTy.isVector()) {
2948  InsertedElt = MIRBuilder.buildInsertVectorElement(
2949  CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2950  }
2951 
2952  MIRBuilder.buildBitcast(Dst, InsertedElt);
2953  MI.eraseFromParent();
2954  return Legalized;
2955  }
2956 
2957  return UnableToLegalize;
2958 }
2959 
2961  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2962  Register DstReg = LoadMI.getDstReg();
2963  Register PtrReg = LoadMI.getPointerReg();
2964  LLT DstTy = MRI.getType(DstReg);
2965  MachineMemOperand &MMO = LoadMI.getMMO();
2966  LLT MemTy = MMO.getMemoryType();
2968 
2969  unsigned MemSizeInBits = MemTy.getSizeInBits();
2970  unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
2971 
2972  if (MemSizeInBits != MemStoreSizeInBits) {
2973  if (MemTy.isVector())
2974  return UnableToLegalize;
2975 
2976  // Promote to a byte-sized load if not loading an integral number of
2977  // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
2978  LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
2979  MachineMemOperand *NewMMO =
2980  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
2981 
2982  Register LoadReg = DstReg;
2983  LLT LoadTy = DstTy;
2984 
2985  // If this wasn't already an extending load, we need to widen the result
2986  // register to avoid creating a load with a narrower result than the source.
2987  if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
2988  LoadTy = WideMemTy;
2989  LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
2990  }
2991 
2992  if (isa<GSExtLoad>(LoadMI)) {
2993  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2994  MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
2995  } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
2996  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2997  // The extra bits are guaranteed to be zero, since we stored them that
2998  // way. A zext load from Wide thus automatically gives zext from MemVT.
2999  MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3000  } else {
3001  MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3002  }
3003 
3004  if (DstTy != LoadTy)
3005  MIRBuilder.buildTrunc(DstReg, LoadReg);
3006 
3007  LoadMI.eraseFromParent();
3008  return Legalized;
3009  }
3010 
3011  // Big endian lowering not implemented.
3013  return UnableToLegalize;
3014 
3015  // This load needs splitting into power of 2 sized loads.
3016  //
3017  // Our strategy here is to generate anyextending loads for the smaller
3018  // types up to next power-2 result type, and then combine the two larger
3019  // result values together, before truncating back down to the non-pow-2
3020  // type.
3021  // E.g. v1 = i24 load =>
3022  // v2 = i32 zextload (2 byte)
3023  // v3 = i32 load (1 byte)
3024  // v4 = i32 shl v3, 16
3025  // v5 = i32 or v4, v2
3026  // v1 = i24 trunc v5
3027  // By doing this we generate the correct truncate which should get
3028  // combined away as an artifact with a matching extend.
3029 
3030  uint64_t LargeSplitSize, SmallSplitSize;
3031 
3032  if (!isPowerOf2_32(MemSizeInBits)) {
3033  // This load needs splitting into power of 2 sized loads.
3034  LargeSplitSize = PowerOf2Floor(MemSizeInBits);
3035  SmallSplitSize = MemSizeInBits - LargeSplitSize;
3036  } else {
3037  // This is already a power of 2, but we still need to split this in half.
3038  //
3039  // Assume we're being asked to decompose an unaligned load.
3040  // TODO: If this requires multiple splits, handle them all at once.
3041  auto &Ctx = MF.getFunction().getContext();
3042  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3043  return UnableToLegalize;
3044 
3045  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3046  }
3047 
3048  if (MemTy.isVector()) {
3049  // TODO: Handle vector extloads
3050  if (MemTy != DstTy)
3051  return UnableToLegalize;
3052 
3053  // TODO: We can do better than scalarizing the vector and at least split it
3054  // in half.
3055  return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3056  }
3057 
3058  MachineMemOperand *LargeMMO =
3059  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3060  MachineMemOperand *SmallMMO =
3061  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3062 
3063  LLT PtrTy = MRI.getType(PtrReg);
3064  unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3065  LLT AnyExtTy = LLT::scalar(AnyExtSize);
3066  auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3067  PtrReg, *LargeMMO);
3068 
3069  auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3070  LargeSplitSize / 8);
3071  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3072  auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3073  auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3074  SmallPtr, *SmallMMO);
3075 
3076  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3077  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3078 
3079  if (AnyExtTy == DstTy)
3080  MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3081  else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3082  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3083  MIRBuilder.buildTrunc(DstReg, {Or});
3084  } else {
3085  assert(DstTy.isPointer() && "expected pointer");
3086  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3087 
3088  // FIXME: We currently consider this to be illegal for non-integral address
3089  // spaces, but we need still need a way to reinterpret the bits.
3090  MIRBuilder.buildIntToPtr(DstReg, Or);
3091  }
3092 
3093  LoadMI.eraseFromParent();
3094  return Legalized;
3095 }
3096 
3098  // Lower a non-power of 2 store into multiple pow-2 stores.
3099  // E.g. split an i24 store into an i16 store + i8 store.
3100  // We do this by first extending the stored value to the next largest power
3101  // of 2 type, and then using truncating stores to store the components.
3102  // By doing this, likewise with G_LOAD, generate an extend that can be
3103  // artifact-combined away instead of leaving behind extracts.
3104  Register SrcReg = StoreMI.getValueReg();
3105  Register PtrReg = StoreMI.getPointerReg();
3106  LLT SrcTy = MRI.getType(SrcReg);
3108  MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3109  LLT MemTy = MMO.getMemoryType();
3110 
3111  unsigned StoreWidth = MemTy.getSizeInBits();
3112  unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3113 
3114  if (StoreWidth != StoreSizeInBits) {
3115  if (SrcTy.isVector())
3116  return UnableToLegalize;
3117 
3118  // Promote to a byte-sized store with upper bits zero if not
3119  // storing an integral number of bytes. For example, promote
3120  // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3121  LLT WideTy = LLT::scalar(StoreSizeInBits);
3122 
3123  if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3124  // Avoid creating a store with a narrower source than result.
3125  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3126  SrcTy = WideTy;
3127  }
3128 
3129  auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3130 
3131  MachineMemOperand *NewMMO =
3132  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3133  MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3134  StoreMI.eraseFromParent();
3135  return Legalized;
3136  }
3137 
3138  if (MemTy.isVector()) {
3139  // TODO: Handle vector trunc stores
3140  if (MemTy != SrcTy)
3141  return UnableToLegalize;
3142 
3143  // TODO: We can do better than scalarizing the vector and at least split it
3144  // in half.
3145  return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3146  }
3147 
3148  unsigned MemSizeInBits = MemTy.getSizeInBits();
3149  uint64_t LargeSplitSize, SmallSplitSize;
3150 
3151  if (!isPowerOf2_32(MemSizeInBits)) {
3152  LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
3153  SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3154  } else {
3155  auto &Ctx = MF.getFunction().getContext();
3156  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3157  return UnableToLegalize; // Don't know what we're being asked to do.
3158 
3159  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3160  }
3161 
3162  // Extend to the next pow-2. If this store was itself the result of lowering,
3163  // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3164  // that's wider than the stored size.
3165  unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3166  const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3167 
3168  if (SrcTy.isPointer()) {
3169  const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3170  SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3171  }
3172 
3173  auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3174 
3175  // Obtain the smaller value by shifting away the larger value.
3176  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3177  auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3178 
3179  // Generate the PtrAdd and truncating stores.
3180  LLT PtrTy = MRI.getType(PtrReg);
3181  auto OffsetCst = MIRBuilder.buildConstant(
3182  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3183  auto SmallPtr =
3184  MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3185 
3186  MachineMemOperand *LargeMMO =
3187  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3188  MachineMemOperand *SmallMMO =
3189  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3190  MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3191  MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3192  StoreMI.eraseFromParent();
3193  return Legalized;
3194 }
3195 
3197 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3198  switch (MI.getOpcode()) {
3199  case TargetOpcode::G_LOAD: {
3200  if (TypeIdx != 0)
3201  return UnableToLegalize;
3202  MachineMemOperand &MMO = **MI.memoperands_begin();
3203 
3204  // Not sure how to interpret a bitcast of an extending load.
3205  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3206  return UnableToLegalize;
3207 
3209  bitcastDst(MI, CastTy, 0);
3210  MMO.setType(CastTy);
3212  return Legalized;
3213  }
3214  case TargetOpcode::G_STORE: {
3215  if (TypeIdx != 0)
3216  return UnableToLegalize;
3217 
3218  MachineMemOperand &MMO = **MI.memoperands_begin();
3219 
3220  // Not sure how to interpret a bitcast of a truncating store.
3221  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3222  return UnableToLegalize;
3223 
3225  bitcastSrc(MI, CastTy, 0);
3226  MMO.setType(CastTy);
3228  return Legalized;
3229  }
3230  case TargetOpcode::G_SELECT: {
3231  if (TypeIdx != 0)
3232  return UnableToLegalize;
3233 
3234  if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3235  LLVM_DEBUG(
3236  dbgs() << "bitcast action not implemented for vector select\n");
3237  return UnableToLegalize;
3238  }
3239 
3241  bitcastSrc(MI, CastTy, 2);
3242  bitcastSrc(MI, CastTy, 3);
3243  bitcastDst(MI, CastTy, 0);
3245  return Legalized;
3246  }
3247  case TargetOpcode::G_AND:
3248  case TargetOpcode::G_OR:
3249  case TargetOpcode::G_XOR: {
3251  bitcastSrc(MI, CastTy, 1);
3252  bitcastSrc(MI, CastTy, 2);
3253  bitcastDst(MI, CastTy, 0);
3255  return Legalized;
3256  }
3257  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3258  return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3259  case TargetOpcode::G_INSERT_VECTOR_ELT:
3260  return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3261  default:
3262  return UnableToLegalize;
3263  }
3264 }
3265 
3266 // Legalize an instruction by changing the opcode in place.
3267 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3269  MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3271 }
3272 
3274 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3275  using namespace TargetOpcode;
3276 
3277  switch(MI.getOpcode()) {
3278  default:
3279  return UnableToLegalize;
3280  case TargetOpcode::G_BITCAST:
3281  return lowerBitcast(MI);
3282  case TargetOpcode::G_SREM:
3283  case TargetOpcode::G_UREM: {
3284  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3285  auto Quot =
3286  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3287  {MI.getOperand(1), MI.getOperand(2)});
3288 
3289  auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3290  MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3291  MI.eraseFromParent();
3292  return Legalized;
3293  }
3294  case TargetOpcode::G_SADDO:
3295  case TargetOpcode::G_SSUBO:
3296  return lowerSADDO_SSUBO(MI);
3297  case TargetOpcode::G_UMULH:
3298  case TargetOpcode::G_SMULH:
3299  return lowerSMULH_UMULH(MI);
3300  case TargetOpcode::G_SMULO:
3301  case TargetOpcode::G_UMULO: {
3302  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3303  // result.
3304  Register Res = MI.getOperand(0).getReg();
3305  Register Overflow = MI.getOperand(1).getReg();
3306  Register LHS = MI.getOperand(2).getReg();
3307  Register RHS = MI.getOperand(3).getReg();
3308  LLT Ty = MRI.getType(Res);
3309 
3310  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3311  ? TargetOpcode::G_SMULH
3312  : TargetOpcode::G_UMULH;
3313 
3315  const auto &TII = MIRBuilder.getTII();
3316  MI.setDesc(TII.get(TargetOpcode::G_MUL));
3317  MI.RemoveOperand(1);
3319 
3320  auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3321  auto Zero = MIRBuilder.buildConstant(Ty, 0);
3322 
3323  // Move insert point forward so we can use the Res register if needed.
3325 
3326  // For *signed* multiply, overflow is detected by checking:
3327  // (hi != (lo >> bitwidth-1))
3328  if (Opcode == TargetOpcode::G_SMULH) {
3329  auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3330  auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3331  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3332  } else {
3333  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3334  }
3335  return Legalized;
3336  }
3337  case TargetOpcode::G_FNEG: {
3338  Register Res = MI.getOperand(0).getReg();
3339  LLT Ty = MRI.getType(Res);
3340 
3341  // TODO: Handle vector types once we are able to
3342  // represent them.
3343  if (Ty.isVector())
3344  return UnableToLegalize;
3345  auto SignMask =
3347  Register SubByReg = MI.getOperand(1).getReg();
3348  MIRBuilder.buildXor(Res, SubByReg, SignMask);
3349  MI.eraseFromParent();
3350  return Legalized;
3351  }
3352  case TargetOpcode::G_FSUB: {
3353  Register Res = MI.getOperand(0).getReg();
3354  LLT Ty = MRI.getType(Res);
3355 
3356  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3357  // First, check if G_FNEG is marked as Lower. If so, we may
3358  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3359  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3360  return UnableToLegalize;
3361  Register LHS = MI.getOperand(1).getReg();
3362  Register RHS = MI.getOperand(2).getReg();
3364  MIRBuilder.buildFNeg(Neg, RHS);
3365  MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3366  MI.eraseFromParent();
3367  return Legalized;
3368  }
3369  case TargetOpcode::G_FMAD:
3370  return lowerFMad(MI);
3371  case TargetOpcode::G_FFLOOR:
3372  return lowerFFloor(MI);
3373  case TargetOpcode::G_INTRINSIC_ROUND:
3374  return lowerIntrinsicRound(MI);
3375  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3376  // Since round even is the assumed rounding mode for unconstrained FP
3377  // operations, rint and roundeven are the same operation.
3378  changeOpcode(MI, TargetOpcode::G_FRINT);
3379  return Legalized;
3380  }
3381  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3382  Register OldValRes = MI.getOperand(0).getReg();
3383  Register SuccessRes = MI.getOperand(1).getReg();
3384  Register Addr = MI.getOperand(2).getReg();
3385  Register CmpVal = MI.getOperand(3).getReg();
3386  Register NewVal = MI.getOperand(4).getReg();
3387  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3388  **MI.memoperands_begin());
3389  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3390  MI.eraseFromParent();
3391  return Legalized;
3392  }
3393  case TargetOpcode::G_LOAD:
3394  case TargetOpcode::G_SEXTLOAD:
3395  case TargetOpcode::G_ZEXTLOAD:
3396  return lowerLoad(cast<GAnyLoad>(MI));
3397  case TargetOpcode::G_STORE:
3398  return lowerStore(cast<GStore>(MI));
3399  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3400  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3401  case TargetOpcode::G_CTLZ:
3402  case TargetOpcode::G_CTTZ:
3403  case TargetOpcode::G_CTPOP:
3404  return lowerBitCount(MI);
3405  case G_UADDO: {
3406  Register Res = MI.getOperand(0).getReg();
3407  Register CarryOut = MI.getOperand(1).getReg();
3408  Register LHS = MI.getOperand(2).getReg();
3409  Register RHS = MI.getOperand(3).getReg();
3410 
3411  MIRBuilder.buildAdd(Res, LHS, RHS);
3412  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3413 
3414  MI.eraseFromParent();
3415  return Legalized;
3416  }
3417  case G_UADDE: {
3418  Register Res = MI.getOperand(0).getReg();
3419  Register CarryOut = MI.getOperand(1).getReg();
3420  Register LHS = MI.getOperand(2).getReg();
3421  Register RHS = MI.getOperand(3).getReg();
3422  Register CarryIn = MI.getOperand(4).getReg();
3423  LLT Ty = MRI.getType(Res);
3424 
3425  auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3426  auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3427  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3428  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3429 
3430  MI.eraseFromParent();
3431  return Legalized;
3432  }
3433  case G_USUBO: {
3434  Register Res = MI.getOperand(0).getReg();
3435  Register BorrowOut = MI.getOperand(1).getReg();
3436  Register LHS = MI.getOperand(2).getReg();
3437  Register RHS = MI.getOperand(3).getReg();
3438 
3439  MIRBuilder.buildSub(Res, LHS, RHS);
3441 
3442  MI.eraseFromParent();
3443  return Legalized;
3444  }
3445  case G_USUBE: {
3446  Register Res = MI.getOperand(0).getReg();
3447  Register BorrowOut = MI.getOperand(1).getReg();
3448  Register LHS = MI.getOperand(2).getReg();
3449  Register RHS = MI.getOperand(3).getReg();
3450  Register BorrowIn = MI.getOperand(4).getReg();
3451  const LLT CondTy = MRI.getType(BorrowOut);
3452  const LLT Ty = MRI.getType(Res);
3453 
3454  auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3455  auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3456  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3457 
3458  auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3459  auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3460  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3461 
3462  MI.eraseFromParent();
3463  return Legalized;
3464  }
3465  case G_UITOFP:
3466  return lowerUITOFP(MI);
3467  case G_SITOFP:
3468  return lowerSITOFP(MI);
3469  case G_FPTOUI:
3470  return lowerFPTOUI(MI);
3471  case G_FPTOSI:
3472  return lowerFPTOSI(MI);
3473  case G_FPTRUNC:
3474  return lowerFPTRUNC(MI);
3475  case G_FPOWI:
3476  return lowerFPOWI(MI);
3477  case G_SMIN:
3478  case G_SMAX:
3479  case G_UMIN:
3480  case G_UMAX:
3481  return lowerMinMax(MI);
3482  case G_FCOPYSIGN:
3483  return lowerFCopySign(MI);
3484  case G_FMINNUM:
3485  case G_FMAXNUM:
3486  return lowerFMinNumMaxNum(MI);
3487  case G_MERGE_VALUES:
3488  return lowerMergeValues(MI);
3489  case G_UNMERGE_VALUES:
3490  return lowerUnmergeValues(MI);
3491  case TargetOpcode::G_SEXT_INREG: {
3492  assert(MI.getOperand(2).isImm() && "Expected immediate");
3493  int64_t SizeInBits = MI.getOperand(2).getImm();
3494 
3495  Register DstReg = MI.getOperand(0).getReg();
3496  Register SrcReg = MI.getOperand(1).getReg();
3497  LLT DstTy = MRI.getType(DstReg);
3498  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3499 
3500  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3501  MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3502  MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3503  MI.eraseFromParent();
3504  return Legalized;
3505  }
3506  case G_EXTRACT_VECTOR_ELT:
3507  case G_INSERT_VECTOR_ELT:
3509  case G_SHUFFLE_VECTOR:
3510  return lowerShuffleVector(MI);
3511  case G_DYN_STACKALLOC:
3512  return lowerDynStackAlloc(MI);
3513  case G_EXTRACT:
3514  return lowerExtract(MI);
3515  case G_INSERT:
3516  return lowerInsert(MI);
3517  case G_BSWAP:
3518  return lowerBswap(MI);
3519  case G_BITREVERSE:
3520  return lowerBitreverse(MI);
3521  case G_READ_REGISTER:
3522  case G_WRITE_REGISTER:
3523  return lowerReadWriteRegister(MI);
3524  case G_UADDSAT:
3525  case G_USUBSAT: {
3526  // Try to make a reasonable guess about which lowering strategy to use. The
3527  // target can override this with custom lowering and calling the
3528  // implementation functions.
3529  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3530  if (LI.isLegalOrCustom({G_UMIN, Ty}))
3531  return lowerAddSubSatToMinMax(MI);
3532  return lowerAddSubSatToAddoSubo(MI);
3533  }
3534  case G_SADDSAT:
3535  case G_SSUBSAT: {
3536  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3537 
3538  // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3539  // since it's a shorter expansion. However, we would need to figure out the
3540  // preferred boolean type for the carry out for the query.
3541  if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3542  return lowerAddSubSatToMinMax(MI);
3543  return lowerAddSubSatToAddoSubo(MI);
3544  }
3545  case G_SSHLSAT:
3546  case G_USHLSAT:
3547  return lowerShlSat(MI);
3548  case G_ABS:
3549  return lowerAbsToAddXor(MI);
3550  case G_SELECT:
3551  return lowerSelect(MI);
3552  case G_SDIVREM:
3553  case G_UDIVREM:
3554  return lowerDIVREM(MI);
3555  case G_FSHL:
3556  case G_FSHR:
3557  return lowerFunnelShift(MI);
3558  case G_ROTL:
3559  case G_ROTR:
3560  return lowerRotate(MI);
3561  case G_MEMSET:
3562  case G_MEMCPY:
3563  case G_MEMMOVE:
3564  return lowerMemCpyFamily(MI);
3565  case G_MEMCPY_INLINE:
3566  return lowerMemcpyInline(MI);
3568  return lowerVectorReduction(MI);
3569  }
3570 }
3571 
3573  Align MinAlign) const {
3574  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3575  // datalayout for the preferred alignment. Also there should be a target hook
3576  // for this to allow targets to reduce the alignment and ignore the
3577  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3578  // the type.
3580 }
3581 
3584  MachinePointerInfo &PtrInfo) {
3587  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3588 
3589  unsigned AddrSpace = DL.getAllocaAddrSpace();
3590  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3591 
3592  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3593  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3594 }
3595 
3597  LLT VecTy) {
3598  int64_t IdxVal;
3599  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3600  return IdxReg;
3601 
3602  LLT IdxTy = B.getMRI()->getType(IdxReg);
3603  unsigned NElts = VecTy.getNumElements();
3604  if (isPowerOf2_32(NElts)) {
3605  APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3606  return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3607  }
3608 
3609  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3610  .getReg(0);
3611 }
3612 
3614  Register Index) {
3615  LLT EltTy = VecTy.getElementType();
3616 
3617  // Calculate the element offset and add it to the pointer.
3618  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3619  assert(EltSize * 8 == EltTy.getSizeInBits() &&
3620  "Converting bits to bytes lost precision");
3621 
3623 
3624  LLT IdxTy = MRI.getType(Index);
3625  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3626  MIRBuilder.buildConstant(IdxTy, EltSize));
3627 
3628  LLT PtrTy = MRI.getType(VecPtr);
3629  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3630 }
3631 
3632 #ifndef NDEBUG
3633 /// Check that all vector operands have same number of elements. Other operands
3634 /// should be listed in NonVecOp.
3637  std::initializer_list<unsigned> NonVecOpIndices) {
3638  if (MI.getNumMemOperands() != 0)
3639  return false;
3640 
3641  LLT VecTy = MRI.getType(MI.getReg(0));
3642  if (!VecTy.isVector())
3643  return false;
3644  unsigned NumElts = VecTy.getNumElements();
3645 
3646  for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
3647  MachineOperand &Op = MI.getOperand(OpIdx);
3648  if (!Op.isReg()) {
3649  if (!is_contained(NonVecOpIndices, OpIdx))
3650  return false;
3651  continue;
3652  }
3653 
3654  LLT Ty = MRI.getType(Op.getReg());
3655  if (!Ty.isVector()) {
3656  if (!is_contained(NonVecOpIndices, OpIdx))
3657  return false;
3658  continue;
3659  }
3660 
3661  if (Ty.getNumElements() != NumElts)
3662  return false;
3663  }
3664 
3665  return true;
3666 }
3667 #endif
3668 
3669 /// Fill \p DstOps with DstOps that have same number of elements combined as
3670 /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
3671 /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
3672 /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
3673 static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
3674  unsigned NumElts) {
3675  LLT LeftoverTy;
3676  assert(Ty.isVector() && "Expected vector type");
3677  LLT EltTy = Ty.getElementType();
3678  LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
3679  int NumParts, NumLeftover;
3680  std::tie(NumParts, NumLeftover) =
3681  getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
3682 
3683  assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
3684  for (int i = 0; i < NumParts; ++i) {
3685  DstOps.push_back(NarrowTy);
3686  }
3687 
3688  if (LeftoverTy.isValid()) {
3689  assert(NumLeftover == 1 && "expected exactly one leftover");
3690  DstOps.push_back(LeftoverTy);
3691  }
3692 }
3693 
3694 /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
3695 /// made from \p Op depending on operand type.
3696 static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
3697  MachineOperand &Op) {
3698  for (unsigned i = 0; i < N; ++i) {
3699  if (Op.isReg())
3700  Ops.push_back(Op.getReg());
3701  else if (Op.isImm())
3702  Ops.push_back(Op.getImm());
3703  else if (Op.isPredicate())
3704  Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
3705  else
3706  llvm_unreachable("Unsupported type");
3707  }
3708 }
3709 
3710 // Handle splitting vector operations which need to have the same number of
3711 // elements in each type index, but each type index may have a different element
3712 // type.
3713 //
3714 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3715 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3716 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3717 //
3718 // Also handles some irregular breakdown cases, e.g.
3719 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3720 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3721 // s64 = G_SHL s64, s32
3724  GenericMachineInstr &MI, unsigned NumElts,
3725  std::initializer_list<unsigned> NonVecOpIndices) {
3726  assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
3727  "Non-compatible opcode or not specified non-vector operands");
3728  unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3729 
3730  unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3731  unsigned NumDefs = MI.getNumDefs();
3732 
3733  // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
3734  // Build instructions with DstOps to use instruction found by CSE directly.
3735  // CSE copies found instruction into given vreg when building with vreg dest.
3736  SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
3737  // Output registers will be taken from created instructions.
3738  SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
3739  for (unsigned i = 0; i < NumDefs; ++i) {
3740  makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
3741  }
3742 
3743  // Split vector input operands into sub-vectors with NumElts elts + Leftover.
3744  // Operands listed in NonVecOpIndices will be used as is without splitting;
3745  // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
3746  // scalar condition (op 1), immediate in sext_inreg (op 2).
3747  SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
3748  for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3749  ++UseIdx, ++UseNo) {
3750  if (is_contained(NonVecOpIndices, UseIdx)) {
3751  broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
3752  MI.getOperand(UseIdx));
3753  } else {
3754  SmallVector<Register, 8> SplitPieces;
3755  extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
3756  for (auto Reg : SplitPieces)
3757  InputOpsPieces[UseNo].push_back(Reg);
3758  }
3759  }
3760 
3761  unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3762 
3763  // Take i-th piece of each input operand split and build sub-vector/scalar
3764  // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
3765  for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3766  SmallVector<DstOp, 2> Defs;
3767  for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3768  Defs.push_back(OutputOpsPieces[DstNo][i]);
3769 
3771  for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
3772  Uses.push_back(InputOpsPieces[InputNo][i]);
3773 
3774  auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
3775  for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3776  OutputRegs[DstNo].push_back(I.getReg(DstNo));
3777  }
3778 
3779  // Merge small outputs into MI's output for each def operand.
3780  if (NumLeftovers) {
3781  for (unsigned i = 0; i < NumDefs; ++i)
3782  mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
3783  } else {
3784  for (unsigned i = 0; i < NumDefs; ++i)
3785  MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
3786  }
3787 
3788  MI.eraseFromParent();
3789  return Legalized;
3790 }
3791 
3794  unsigned NumElts) {
3795  unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3796 
3797  unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3798  unsigned NumDefs = MI.getNumDefs();
3799 
3800  SmallVector<DstOp, 8> OutputOpsPieces;
3801  SmallVector<Register, 8> OutputRegs;
3802  makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
3803 
3804  // Instructions that perform register split will be inserted in basic block
3805  // where register is defined (basic block is in the next operand).
3806  SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
3807  for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3808  UseIdx += 2, ++UseNo) {
3809  MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
3810  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3811  extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
3812  }
3813 
3814  // Build PHIs with fewer elements.
3815  unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3816  MIRBuilder.setInsertPt(*MI.getParent(), MI);
3817  for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3818  auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
3819  Phi.addDef(
3820  MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
3821  OutputRegs.push_back(Phi.getReg(0));
3822 
3823  for (unsigned j = 0; j < NumInputs / 2; ++j) {
3824  Phi.addUse(InputOpsPieces[j][i]);
3825  Phi.add(MI.getOperand(1 + j * 2 + 1));
3826  }
3827  }
3828 
3829  // Merge small outputs into MI's def.
3830  if (NumLeftovers) {
3831  mergeMixedSubvectors(MI.getReg(0), OutputRegs);
3832  } else {
3833  MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
3834  }
3835 
3836  MI.eraseFromParent();
3837  return Legalized;
3838 }
3839 
3842  unsigned TypeIdx,
3843  LLT NarrowTy) {
3844  const int NumDst = MI.getNumOperands() - 1;
3845  const Register SrcReg = MI.getOperand(NumDst).getReg();
3846  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3847  LLT SrcTy = MRI.getType(SrcReg);
3848 
3849  if (TypeIdx != 1 || NarrowTy == DstTy)
3850  return UnableToLegalize;
3851 
3852  // Requires compatible types. Otherwise SrcReg should have been defined by
3853  // merge-like instruction that would get artifact combined. Most likely
3854  // instruction that defines SrcReg has to perform more/fewer elements
3855  // legalization compatible with NarrowTy.
3856  assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3857  assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3858 
3859  if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3860  (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
3861  return UnableToLegalize;
3862 
3863  // This is most likely DstTy (smaller then register size) packed in SrcTy
3864  // (larger then register size) and since unmerge was not combined it will be
3865  // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
3866  // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
3867 
3868  // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
3869  //
3870  // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
3871  // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
3872  // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
3873  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
3874  const int NumUnmerge = Unmerge->getNumOperands() - 1;
3875  const int PartsPerUnmerge = NumDst / NumUnmerge;
3876 
3877  for (int I = 0; I != NumUnmerge; ++I) {
3878  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3879 
3880  for (int J = 0; J != PartsPerUnmerge; ++J)
3881  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3882  MIB.addUse(Unmerge.getReg(I));
3883  }
3884 
3885  MI.eraseFromParent();
3886  return Legalized;
3887 }
3888 
3891  LLT NarrowTy) {
3892  Register DstReg = MI.getOperand(0).getReg();
3893  LLT DstTy = MRI.getType(DstReg);
3894  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3895  // Requires compatible types. Otherwise user of DstReg did not perform unmerge
3896  // that should have been artifact combined. Most likely instruction that uses
3897  // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
3898  assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3899  assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3900  if (NarrowTy == SrcTy)
3901  return UnableToLegalize;
3902 
3903  // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
3904  // is for old mir tests. Since the changes to more/fewer elements it should no
3905  // longer be possible to generate MIR like this when starting from llvm-ir
3906  // because LCMTy approach was replaced with merge/unmerge to vector elements.
3907  if (TypeIdx == 1) {
3908  assert(SrcTy.isVector() && "Expected vector types");
3909  assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3910  if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3911  (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
3912  return UnableToLegalize;
3913  // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
3914  //
3915  // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
3916  // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
3917  // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
3918  // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
3919  // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
3920  // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
3921 
3923  LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
3924  for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
3925  auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
3926  for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
3927  Elts.push_back(Unmerge.getReg(j));
3928  }
3929 
3930  SmallVector<Register, 8> NarrowTyElts;
3931  unsigned NumNarrowTyElts = NarrowTy.getNumElements();
3932  unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
3933  for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
3934  ++i, Offset += NumNarrowTyElts) {
3935  ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
3936  NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
3937  }
3938 
3939  MIRBuilder.buildMerge(DstReg, NarrowTyElts);
3940  MI.eraseFromParent();
3941  return Legalized;
3942  }
3943 
3944  assert(TypeIdx == 0 && "Bad type index");
3945  if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
3946  (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
3947  return UnableToLegalize;
3948 
3949  // This is most likely SrcTy (smaller then register size) packed in DstTy
3950  // (larger then register size) and since merge was not combined it will be
3951  // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
3952  // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
3953 
3954  // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
3955  //
3956  // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
3957  // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
3958  // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
3959  SmallVector<Register, 8> NarrowTyElts;
3960  unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3961  unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
3962  unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
3963  for (unsigned i = 0; i < NumParts; ++i) {
3964  SmallVector<Register, 8> Sources;
3965  for (unsigned j = 0; j < NumElts; ++j)
3966  Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
3967  NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
3968  }
3969 
3970  MIRBuilder.buildMerge(DstReg, NarrowTyElts);
3971  MI.eraseFromParent();
3972  return Legalized;
3973 }
3974 
3977  unsigned TypeIdx,
3978  LLT NarrowVecTy) {
3979  Register DstReg = MI.getOperand(0).getReg();
3980  Register SrcVec = MI.getOperand(1).getReg();
3981  Register InsertVal;
3982  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3983 
3984  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3985  if (IsInsert)
3986  InsertVal = MI.getOperand(2).getReg();
3987 
3988  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3989 
3990  // TODO: Handle total scalarization case.
3991  if (!NarrowVecTy.isVector())
3992  return UnableToLegalize;
3993 
3994  LLT VecTy = MRI.getType(SrcVec);
3995 
3996  // If the index is a constant, we can really break this down as you would
3997  // expect, and index into the target size pieces.
3998  int64_t IdxVal;
3999  auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4000  if (MaybeCst) {
4001  IdxVal = MaybeCst->Value.getSExtValue();
4002  // Avoid out of bounds indexing the pieces.
4003  if (IdxVal >= VecTy.getNumElements()) {
4004  MIRBuilder.buildUndef(DstReg);
4005  MI.eraseFromParent();
4006  return Legalized;
4007  }
4008 
4009  SmallVector<Register, 8> VecParts;
4010  LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4011 
4012  // Build a sequence of NarrowTy pieces in VecParts for this operand.
4013  LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4014  TargetOpcode::G_ANYEXT);
4015 
4016  unsigned NewNumElts = NarrowVecTy.getNumElements();
4017 
4018  LLT IdxTy = MRI.getType(Idx);
4019  int64_t PartIdx = IdxVal / NewNumElts;
4020  auto NewIdx =
4021  MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4022 
4023  if (IsInsert) {
4024  LLT PartTy = MRI.getType(VecParts[PartIdx]);
4025 
4026  // Use the adjusted index to insert into one of the subvectors.
4027  auto InsertPart = MIRBuilder.buildInsertVectorElement(
4028  PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4029  VecParts[PartIdx] = InsertPart.getReg(0);
4030 
4031  // Recombine the inserted subvector with the others to reform the result
4032  // vector.
4033  buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4034  } else {
4035  MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4036  }
4037 
4038  MI.eraseFromParent();
4039  return Legalized;
4040  }
4041 
4042  // With a variable index, we can't perform the operation in a smaller type, so
4043  // we're forced to expand this.
4044  //
4045  // TODO: We could emit a chain of compare/select to figure out which piece to
4046  // index.
4048 }
4049 
4052  LLT NarrowTy) {
4053  // FIXME: Don't know how to handle secondary types yet.
4054  if (TypeIdx != 0)
4055  return UnableToLegalize;
4056 
4057  // This implementation doesn't work for atomics. Give up instead of doing
4058  // something invalid.
4059  if (LdStMI.isAtomic())
4060  return UnableToLegalize;
4061 
4062  bool IsLoad = isa<GLoad>(LdStMI);
4063  Register ValReg = LdStMI.getReg(0);
4064  Register AddrReg = LdStMI.getPointerReg();
4065  LLT ValTy = MRI.getType(ValReg);
4066 
4067  // FIXME: Do we need a distinct NarrowMemory legalize action?
4068  if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
4069  LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4070  return UnableToLegalize;
4071  }
4072 
4073  int NumParts = -1;
4074  int NumLeftover = -1;
4075  LLT LeftoverTy;
4076  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4077  if (IsLoad) {
4078  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4079  } else {
4080  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4081  NarrowLeftoverRegs)) {
4082  NumParts = NarrowRegs.size();
4083  NumLeftover = NarrowLeftoverRegs.size();
4084  }
4085  }
4086 
4087  if (NumParts == -1)
4088  return UnableToLegalize;
4089 
4090  LLT PtrTy = MRI.getType(AddrReg);
4091  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4092 
4093  unsigned TotalSize = ValTy.getSizeInBits();
4094 
4095  // Split the load/store into PartTy sized pieces starting at Offset. If this
4096  // is a load, return the new registers in ValRegs. For a store, each elements
4097  // of ValRegs should be PartTy. Returns the next offset that needs to be
4098  // handled.
4099  auto MMO = LdStMI.getMMO();
4100  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4101  unsigned Offset) -> unsigned {
4103  unsigned PartSize = PartTy.getSizeInBits();
4104  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4105  Offset += PartSize, ++Idx) {
4106  unsigned ByteOffset = Offset / 8;
4107  Register NewAddrReg;
4108 
4109  MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4110 
4111  MachineMemOperand *NewMMO =
4112  MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4113 
4114  if (IsLoad) {
4115  Register Dst = MRI.createGenericVirtualRegister(PartTy);
4116  ValRegs.push_back(Dst);
4117  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4118  } else {
4119  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4120  }
4121  }
4122 
4123  return Offset;
4124  };
4125 
4126  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
4127 
4128  // Handle the rest of the register if this isn't an even type breakdown.
4129  if (LeftoverTy.isValid())
4130  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
4131 
4132  if (IsLoad) {
4133  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4134  LeftoverTy, NarrowLeftoverRegs);
4135  }
4136 
4137  LdStMI.eraseFromParent();
4138  return Legalized;
4139 }
4140 
4143  LLT NarrowTy) {
4144  using namespace TargetOpcode;
4145  GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4146  unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4147 
4148  switch (MI.getOpcode()) {
4149  case G_IMPLICIT_DEF:
4150  case G_TRUNC:
4151  case G_AND:
4152  case G_OR:
4153  case G_XOR:
4154  case G_ADD:
4155  case G_SUB:
4156  case G_MUL:
4157  case G_PTR_ADD:
4158  case G_SMULH:
4159  case G_UMULH:
4160  case G_FADD:
4161  case G_FMUL:
4162  case G_FSUB:
4163  case G_FNEG:
4164  case G_FABS:
4165  case G_FCANONICALIZE:
4166  case G_FDIV:
4167  case G_FREM:
4168  case G_FMA:
4169  case G_FMAD:
4170  case G_FPOW:
4171  case G_FEXP:
4172  case G_FEXP2:
4173  case G_FLOG:
4174  case G_FLOG2:
4175  case G_FLOG10:
4176  case G_FNEARBYINT:
4177  case G_FCEIL:
4178  case G_FFLOOR:
4179  case G_FRINT:
4180  case G_INTRINSIC_ROUND:
4181  case G_INTRINSIC_ROUNDEVEN:
4182  case G_INTRINSIC_TRUNC:
4183  case G_FCOS:
4184  case G_FSIN:
4185  case G_FSQRT:
4186  case G_BSWAP:
4187  case G_BITREVERSE:
4188  case G_SDIV:
4189  case G_UDIV:
4190  case G_SREM:
4191  case G_UREM:
4192  case G_SDIVREM:
4193  case G_UDIVREM:
4194  case G_SMIN:
4195  case G_SMAX:
4196  case G_UMIN:
4197  case G_UMAX:
4198  case G_ABS:
4199  case G_FMINNUM:
4200  case G_FMAXNUM:
4201  case G_FMINNUM_IEEE:
4202  case G_FMAXNUM_IEEE:
4203  case G_FMINIMUM:
4204  case G_FMAXIMUM:
4205  case G_FSHL:
4206  case G_FSHR:
4207  case G_ROTL:
4208  case G_ROTR:
4209  case G_FREEZE:
4210  case G_SADDSAT:
4211  case G_SSUBSAT:
4212  case G_UADDSAT:
4213  case G_USUBSAT:
4214  case G_UMULO:
4215  case G_SMULO:
4216  case G_SHL:
4217  case G_LSHR:
4218  case G_ASHR:
4219  case G_SSHLSAT:
4220  case G_USHLSAT:
4221  case G_CTLZ:
4222  case G_CTLZ_ZERO_UNDEF:
4223  case G_CTTZ:
4224  case G_CTTZ_ZERO_UNDEF:
4225  case G_CTPOP:
4226  case G_FCOPYSIGN:
4227  case G_ZEXT:
4228  case G_SEXT:
4229  case G_ANYEXT:
4230  case G_FPEXT:
4231  case G_FPTRUNC:
4232  case G_SITOFP:
4233  case G_UITOFP:
4234  case G_FPTOSI:
4235  case G_FPTOUI:
4236  case G_INTTOPTR:
4237  case G_PTRTOINT:
4238  case G_ADDRSPACE_CAST:
4239  return fewerElementsVectorMultiEltType(GMI, NumElts);
4240  case G_ICMP:
4241  case G_FCMP:
4242  return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4243  case G_SELECT:
4244  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4245  return fewerElementsVectorMultiEltType(GMI, NumElts);
4246  return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4247  case G_PHI:
4248  return fewerElementsVectorPhi(GMI, NumElts);
4249  case G_UNMERGE_VALUES:
4250  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4251  case G_BUILD_VECTOR:
4252  assert(TypeIdx == 0 && "not a vector type index");
4253  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4254  case G_CONCAT_VECTORS:
4255  if (TypeIdx != 1) // TODO: This probably does work as expected already.
4256  return UnableToLegalize;
4257  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4258  case G_EXTRACT_VECTOR_ELT:
4259  case G_INSERT_VECTOR_ELT:
4260  return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4261  case G_LOAD:
4262  case G_STORE:
4263  return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4264  case G_SEXT_INREG:
4265  return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4267  return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4268  case G_SHUFFLE_VECTOR:
4269  return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4270  default:
4271  return UnableToLegalize;
4272  }
4273 }
4274 
4276  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4277  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4278  if (TypeIdx != 0)
4279  return UnableToLegalize;
4280 
4281  Register DstReg = MI.getOperand(0).getReg();
4282  Register Src1Reg = MI.getOperand(1).getReg();
4283  Register Src2Reg = MI.getOperand(2).getReg();
4284  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4285  LLT DstTy = MRI.getType(DstReg);
4286  LLT Src1Ty = MRI.getType(Src1Reg);
4287  LLT Src2Ty = MRI.getType(Src2Reg);
4288  // The shuffle should be canonicalized by now.
4289  if (DstTy != Src1Ty)
4290  return UnableToLegalize;
4291  if (DstTy != Src2Ty)
4292  return UnableToLegalize;
4293 
4294  if (!isPowerOf2_32(DstTy.getNumElements()))
4295  return UnableToLegalize;
4296 
4297  // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4298  // Further legalization attempts will be needed to do split further.
4299  NarrowTy =
4301  unsigned NewElts = NarrowTy.getNumElements();
4302 
4303  SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4304  extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4305  extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4306  Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4307  SplitSrc2Regs[1]};
4308 
4309  Register Hi, Lo;
4310 
4311  // If Lo or Hi uses elements from at most two of the four input vectors, then
4312  // express it as a vector shuffle of those two inputs. Otherwise extract the
4313  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4315  for (unsigned High = 0; High < 2; ++High) {
4316  Register &Output = High ? Hi : Lo;
4317 
4318  // Build a shuffle mask for the output, discovering on the fly which
4319  // input vectors to use as shuffle operands (recorded in InputUsed).
4320  // If building a suitable shuffle vector proves too hard, then bail
4321  // out with useBuildVector set.
4322  unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4323  unsigned FirstMaskIdx = High * NewElts;
4324  bool UseBuildVector = false;
4325  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4326  // The mask element. This indexes into the input.
4327  int Idx = Mask[FirstMaskIdx + MaskOffset];
4328 
4329  // The input vector this mask element indexes into.
4330  unsigned Input = (unsigned)Idx / NewElts;
4331 
4332  if (Input >= array_lengthof(Inputs)) {
4333  // The mask element does not index into any input vector.
4334  Ops.push_back(-1);
4335  continue;
4336  }
4337 
4338  // Turn the index into an offset from the start of the input vector.
4339  Idx -= Input * NewElts;
4340 
4341  // Find or create a shuffle vector operand to hold this input.
4342  unsigned OpNo;
4343  for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
4344  if (InputUsed[OpNo] == Input) {
4345  // This input vector is already an operand.
4346  break;
4347  } else if (InputUsed[OpNo] == -1U) {
4348  // Create a new operand for this input vector.
4349  InputUsed[OpNo] = Input;
4350  break;
4351  }
4352  }
4353 
4354  if (OpNo >= array_lengthof(InputUsed)) {
4355  // More than two input vectors used! Give up on trying to create a
4356  // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4357  UseBuildVector = true;
4358  break;
4359  }
4360 
4361  // Add the mask index for the new shuffle vector.
4362  Ops.push_back(Idx + OpNo * NewElts);
4363  }
4364 
4365  if (UseBuildVector) {
4366  LLT EltTy = NarrowTy.getElementType();
4368 
4369  // Extract the input elements by hand.
4370  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4371  // The mask element. This indexes into the input.
4372  int Idx = Mask[FirstMaskIdx + MaskOffset];
4373 
4374  // The input vector this mask element indexes into.
4375  unsigned Input = (unsigned)Idx / NewElts;
4376 
4377  if (Input >= array_lengthof(Inputs)) {
4378  // The mask element is "undef" or indexes off the end of the input.
4379  SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4380  continue;
4381  }
4382 
4383  // Turn the index into an offset from the start of the input vector.
4384  Idx -= Input * NewElts;
4385 
4386  // Extract the vector element by hand.
4387  SVOps.push_back(MIRBuilder
4388  .buildExtractVectorElement(
4389  EltTy, Inputs[Input],
4391  .getReg(0));
4392  }
4393 
4394  // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4395  Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4396  } else if (InputUsed[0] == -1U) {
4397  // No input vectors were used! The result is undefined.
4398  Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4399  } else {
4400  Register Op0 = Inputs[InputUsed[0]];
4401  // If only one input was used, use an undefined vector for the other.
4402  Register Op1 = InputUsed[1] == -1U
4403  ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4404  : Inputs[InputUsed[1]];
4405  // At least one input vector was used. Create a new shuffle vector.
4406  Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4407  }
4408 
4409  Ops.clear();
4410  }
4411 
4412  MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4413  MI.eraseFromParent();
4414  return Legalized;
4415 }
4416 
4417 static unsigned getScalarOpcForReduction(unsigned Opc) {
4418  unsigned ScalarOpc;
4419  switch (Opc) {
4420  case TargetOpcode::G_VECREDUCE_FADD:
4421  ScalarOpc = TargetOpcode::G_FADD;
4422  break;
4423  case TargetOpcode::G_VECREDUCE_FMUL:
4424  ScalarOpc = TargetOpcode::G_FMUL;
4425  break;
4426  case TargetOpcode::G_VECREDUCE_FMAX:
4427  ScalarOpc = TargetOpcode::G_FMAXNUM;
4428  break;
4429  case TargetOpcode::G_VECREDUCE_FMIN:
4430  ScalarOpc = TargetOpcode::G_FMINNUM;
4431  break;
4432  case TargetOpcode::G_VECREDUCE_ADD:
4433  ScalarOpc = TargetOpcode::G_ADD;
4434  break;
4435  case TargetOpcode::G_VECREDUCE_MUL:
4436  ScalarOpc = TargetOpcode::G_MUL;
4437  break;
4438  case TargetOpcode::G_VECREDUCE_AND:
4439  ScalarOpc = TargetOpcode::G_AND;
4440  break;
4441  case TargetOpcode::G_VECREDUCE_OR:
4442  ScalarOpc = TargetOpcode::G_OR;
4443  break;
4444  case TargetOpcode::G_VECREDUCE_XOR:
4445  ScalarOpc = TargetOpcode::G_XOR;
4446  break;
4447  case TargetOpcode::G_VECREDUCE_SMAX:
4448  ScalarOpc = TargetOpcode::G_SMAX;
4449  break;
4450  case TargetOpcode::G_VECREDUCE_SMIN:
4451  ScalarOpc = TargetOpcode::G_SMIN;
4452  break;
4453  case TargetOpcode::G_VECREDUCE_UMAX:
4454  ScalarOpc = TargetOpcode::G_UMAX;
4455  break;
4456  case TargetOpcode::G_VECREDUCE_UMIN:
4457  ScalarOpc = TargetOpcode::G_UMIN;
4458  break;
4459  default:
4460  llvm_unreachable("Unhandled reduction");
4461  }
4462  return ScalarOpc;
4463 }
4464 
4466  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4467  unsigned Opc = MI.getOpcode();
4468  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4469  Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4470  "Sequential reductions not expected");
4471 
4472  if (TypeIdx != 1)
4473  return UnableToLegalize;
4474 
4475  // The semantics of the normal non-sequential reductions allow us to freely
4476  // re-associate the operation.
4477  Register SrcReg = MI.getOperand(1).getReg();
4478  LLT SrcTy = MRI.getType(SrcReg);
4479  Register DstReg = MI.getOperand(0).getReg();
4480  LLT DstTy = MRI.getType(DstReg);
4481 
4482  if (NarrowTy.isVector() &&
4483  (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4484  return UnableToLegalize;
4485 
4486  unsigned ScalarOpc = getScalarOpcForReduction(Opc);
4487  SmallVector<Register> SplitSrcs;
4488  // If NarrowTy is a scalar then we're being asked to scalarize.
4489  const unsigned NumParts =
4490  NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4491  : SrcTy.getNumElements();
4492 
4493  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4494  if (NarrowTy.isScalar()) {
4495  if (DstTy != NarrowTy)
4496  return UnableToLegalize; // FIXME: handle implicit extensions.
4497 
4498  if (isPowerOf2_32(NumParts)) {
4499  // Generate a tree of scalar operations to reduce the critical path.
4500  SmallVector<Register> PartialResults;
4501  unsigned NumPartsLeft = NumParts;
4502  while (NumPartsLeft > 1) {
4503  for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4504  PartialResults.emplace_back(
4505  MIRBuilder
4506  .buildInstr(ScalarOpc, {NarrowTy},
4507  {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4508  .getReg(0));
4509  }
4510  SplitSrcs = PartialResults;
4511  PartialResults.clear();
4512  NumPartsLeft = SplitSrcs.size();
4513  }
4514  assert(SplitSrcs.size() == 1);
4515  MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4516  MI.eraseFromParent();
4517  return Legalized;
4518  }
4519  // If we can't generate a tree, then just do sequential operations.
4520  Register Acc = SplitSrcs[0];
4521  for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4522  Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4523  .getReg(0);
4524  MIRBuilder.buildCopy(DstReg, Acc);
4525  MI.eraseFromParent();
4526  return Legalized;
4527  }
4528  SmallVector<Register> PartialReductions;
4529  for (unsigned Part = 0; Part < NumParts; ++Part) {
4530  PartialReductions.push_back(
4531  MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4532  }
4533 
4534 
4535  // If the types involved are powers of 2, we can generate intermediate vector
4536  // ops, before generating a final reduction operation.
4537  if (isPowerOf2_32(SrcTy.getNumElements()) &&
4538  isPowerOf2_32(NarrowTy.getNumElements())) {
4539  return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4540  }
4541 
4542  Register Acc = PartialReductions[0];
4543  for (unsigned Part = 1; Part < NumParts; ++Part) {
4544  if (Part == NumParts - 1) {
4545  MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4546  {Acc, PartialReductions[Part]});
4547  } else {
4548  Acc = MIRBuilder
4549  .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4550  .getReg(0);
4551  }
4552  }
4553  MI.eraseFromParent();
4554  return Legalized;
4555 }
4556 
4558 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4559  LLT SrcTy, LLT NarrowTy,
4560  unsigned ScalarOpc) {
4561  SmallVector<Register> SplitSrcs;
4562  // Split the sources into NarrowTy size pieces.
4563  extractParts(SrcReg, NarrowTy,
4564  SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4565  // We're going to do a tree reduction using vector operations until we have
4566  // one NarrowTy size value left.
4567  while (SplitSrcs.size() > 1) {
4568  SmallVector<Register> PartialRdxs;
4569  for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4570  Register LHS = SplitSrcs[Idx];
4571  Register RHS = SplitSrcs[Idx + 1];
4572  // Create the intermediate vector op.
4573  Register Res =
4574  MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4575  PartialRdxs.push_back(Res);
4576  }
4577  SplitSrcs = std::move(PartialRdxs);
4578  }
4579  // Finally generate the requested NarrowTy based reduction.
4581  MI.getOperand(1).setReg(SplitSrcs[0]);
4583  return Legalized;
4584 }
4585 
4588  const LLT HalfTy, const LLT AmtTy) {
4589 
4590  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4591  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4592  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4593 
4594  if (Amt.isZero()) {
4595  MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4596  MI.eraseFromParent();
4597  return Legalized;
4598  }
4599 
4600  LLT NVT = HalfTy;
4601  unsigned NVTBits = HalfTy.getSizeInBits();
4602  unsigned VTBits = 2 * NVTBits;
4603 
4604  SrcOp Lo(Register(0)), Hi(Register(0));
4605  if (MI.getOpcode() == TargetOpcode::G_SHL) {
4606  if (Amt.ugt(VTBits)) {
4607  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4608  } else if (Amt.ugt(NVTBits)) {
4609  Lo = MIRBuilder.buildConstant(NVT, 0);
4610  Hi = MIRBuilder.buildShl(NVT, InL,
4611  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4612  } else if (Amt == NVTBits) {
4613  Lo = MIRBuilder.buildConstant(NVT, 0);
4614  Hi = InL;
4615  } else {
4616  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4617  auto OrLHS =
4618  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4619  auto OrRHS = MIRBuilder.buildLShr(
4620  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4621  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4622  }
4623  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4624  if (Amt.ugt(VTBits)) {
4625  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4626  } else if (Amt.ugt(NVTBits)) {
4627  Lo = MIRBuilder.buildLShr(NVT, InH,
4628  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4629  Hi = MIRBuilder.buildConstant(NVT, 0);
4630  } else if (Amt == NVTBits) {
4631  Lo = InH;
4632  Hi = MIRBuilder.buildConstant(NVT, 0);
4633  } else {
4634  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4635 
4636  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4637  auto OrRHS = MIRBuilder.buildShl(
4638  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4639 
4640  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4641  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4642  }
4643  } else {
4644  if (Amt.ugt(VTBits)) {
4645  Hi = Lo = MIRBuilder.buildAShr(
4646  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4647  } else if (Amt.ugt(NVTBits)) {
4648  Lo = MIRBuilder.buildAShr(NVT, InH,
4649  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4650  Hi = MIRBuilder.buildAShr(NVT, InH,
4651  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4652  } else if (Amt == NVTBits) {
4653  Lo = InH;
4654  Hi = MIRBuilder.buildAShr(NVT, InH,
4655  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4656  } else {
4657  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4658 
4659  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4660  auto OrRHS = MIRBuilder.buildShl(
4661  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4662 
4663  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4664  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4665  }
4666  }
4667 
4668  MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4669  MI.eraseFromParent();
4670 
4671  return Legalized;
4672 }
4673 
4674 // TODO: Optimize if constant shift amount.
4677  LLT RequestedTy) {
4678  if (TypeIdx == 1) {
4680  narrowScalarSrc(MI, RequestedTy, 2);
4682  return Legalized;
4683  }
4684 
4685  Register DstReg = MI.getOperand(0).getReg();
4686  LLT DstTy = MRI.getType(DstReg);
4687  if (DstTy.isVector())
4688  return UnableToLegalize;
4689 
4690  Register Amt = MI.getOperand(2).getReg();
4691  LLT ShiftAmtTy = MRI.getType(Amt);
4692  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4693  if (DstEltSize % 2 != 0)
4694  return UnableToLegalize;
4695 
4696  // Ignore the input type. We can only go to exactly half the size of the
4697  // input. If that isn't small enough, the resulting pieces will be further
4698  // legalized.
4699  const unsigned NewBitSize = DstEltSize / 2;
4700  const LLT HalfTy = LLT::scalar(NewBitSize);
4701  const LLT CondTy = LLT::scalar(1);
4702 
4703  if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
4704  return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
4705  ShiftAmtTy);
4706  }
4707 
4708  // TODO: Expand with known bits.
4709 
4710  // Handle the fully general expansion by an unknown amount.
4711  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4712 
4713  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4714  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4715  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4716 
4717  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4718  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4719 
4720  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4721  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4722  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4723 
4724  Register ResultRegs[2];
4725  switch (MI.getOpcode()) {
4726  case TargetOpcode::G_SHL: {
4727  // Short: ShAmt < NewBitSize
4728  auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4729 
4730  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4731  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4732  auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4733 
4734  // Long: ShAmt >= NewBitSize
4735  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4736  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4737 
4738  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4739  auto Hi = MIRBuilder.buildSelect(
4740  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4741 
4742  ResultRegs[0] = Lo.getReg(0);
4743  ResultRegs[1] = Hi.getReg(0);
4744  break;
4745  }
4746  case TargetOpcode::G_LSHR:
4747  case TargetOpcode::G_ASHR: {
4748  // Short: ShAmt < NewBitSize
4749  auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4750 
4751  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4752  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4753  auto LoS = MIRBuilder.buildOr(HalfTy, LoO