LLVM  12.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
25 #include "llvm/Support/Debug.h"
28 
29 #define DEBUG_TYPE "legalizer"
30 
31 using namespace llvm;
32 using namespace LegalizeActions;
33 using namespace MIPatternMatch;
34 
35 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
36 ///
37 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
38 /// with any leftover piece as type \p LeftoverTy
39 ///
40 /// Returns -1 in the first element of the pair if the breakdown is not
41 /// satisfiable.
42 static std::pair<int, int>
43 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
44  assert(!LeftoverTy.isValid() && "this is an out argument");
45 
46  unsigned Size = OrigTy.getSizeInBits();
47  unsigned NarrowSize = NarrowTy.getSizeInBits();
48  unsigned NumParts = Size / NarrowSize;
49  unsigned LeftoverSize = Size - NumParts * NarrowSize;
50  assert(Size > NarrowSize);
51 
52  if (LeftoverSize == 0)
53  return {NumParts, 0};
54 
55  if (NarrowTy.isVector()) {
56  unsigned EltSize = OrigTy.getScalarSizeInBits();
57  if (LeftoverSize % EltSize != 0)
58  return {-1, -1};
59  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
60  } else {
61  LeftoverTy = LLT::scalar(LeftoverSize);
62  }
63 
64  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
65  return std::make_pair(NumParts, NumLeftover);
66 }
67 
69 
70  if (!Ty.isScalar())
71  return nullptr;
72 
73  switch (Ty.getSizeInBits()) {
74  case 16:
75  return Type::getHalfTy(Ctx);
76  case 32:
77  return Type::getFloatTy(Ctx);
78  case 64:
79  return Type::getDoubleTy(Ctx);
80  case 80:
81  return Type::getX86_FP80Ty(Ctx);
82  case 128:
83  return Type::getFP128Ty(Ctx);
84  default:
85  return nullptr;
86  }
87 }
88 
90  GISelChangeObserver &Observer,
92  : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
93  LI(*MF.getSubtarget().getLegalizerInfo()),
94  TLI(*MF.getSubtarget().getTargetLowering()) { }
95 
97  GISelChangeObserver &Observer,
99  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
100  TLI(*MF.getSubtarget().getTargetLowering()) { }
101 
104  LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
105 
107 
108  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
109  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
110  return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
111  auto Step = LI.getAction(MI, MRI);
112  switch (Step.Action) {
113  case Legal:
114  LLVM_DEBUG(dbgs() << ".. Already legal\n");
115  return AlreadyLegal;
116  case Libcall:
117  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
118  return libcall(MI);
119  case NarrowScalar:
120  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
121  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
122  case WidenScalar:
123  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
124  return widenScalar(MI, Step.TypeIdx, Step.NewType);
125  case Bitcast:
126  LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
127  return bitcast(MI, Step.TypeIdx, Step.NewType);
128  case Lower:
129  LLVM_DEBUG(dbgs() << ".. Lower\n");
130  return lower(MI, Step.TypeIdx, Step.NewType);
131  case FewerElements:
132  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
133  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
134  case MoreElements:
135  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
136  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
137  case Custom:
138  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
139  return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
140  default:
141  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
142  return UnableToLegalize;
143  }
144 }
145 
146 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
147  SmallVectorImpl<Register> &VRegs) {
148  for (int i = 0; i < NumParts; ++i)
150  MIRBuilder.buildUnmerge(VRegs, Reg);
151 }
152 
153 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
154  LLT MainTy, LLT &LeftoverTy,
156  SmallVectorImpl<Register> &LeftoverRegs) {
157  assert(!LeftoverTy.isValid() && "this is an out argument");
158 
159  unsigned RegSize = RegTy.getSizeInBits();
160  unsigned MainSize = MainTy.getSizeInBits();
161  unsigned NumParts = RegSize / MainSize;
162  unsigned LeftoverSize = RegSize - NumParts * MainSize;
163 
164  // Use an unmerge when possible.
165  if (LeftoverSize == 0) {
166  for (unsigned I = 0; I < NumParts; ++I)
167  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
168  MIRBuilder.buildUnmerge(VRegs, Reg);
169  return true;
170  }
171 
172  if (MainTy.isVector()) {
173  unsigned EltSize = MainTy.getScalarSizeInBits();
174  if (LeftoverSize % EltSize != 0)
175  return false;
176  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
177  } else {
178  LeftoverTy = LLT::scalar(LeftoverSize);
179  }
180 
181  // For irregular sizes, extract the individual parts.
182  for (unsigned I = 0; I != NumParts; ++I) {
183  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
184  VRegs.push_back(NewReg);
185  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
186  }
187 
188  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
189  Offset += LeftoverSize) {
190  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
191  LeftoverRegs.push_back(NewReg);
192  MIRBuilder.buildExtract(NewReg, Reg, Offset);
193  }
194 
195  return true;
196 }
197 
198 void LegalizerHelper::insertParts(Register DstReg,
199  LLT ResultTy, LLT PartTy,
200  ArrayRef<Register> PartRegs,
201  LLT LeftoverTy,
202  ArrayRef<Register> LeftoverRegs) {
203  if (!LeftoverTy.isValid()) {
204  assert(LeftoverRegs.empty());
205 
206  if (!ResultTy.isVector()) {
207  MIRBuilder.buildMerge(DstReg, PartRegs);
208  return;
209  }
210 
211  if (PartTy.isVector())
212  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
213  else
214  MIRBuilder.buildBuildVector(DstReg, PartRegs);
215  return;
216  }
217 
218  unsigned PartSize = PartTy.getSizeInBits();
219  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
220 
221  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
222  MIRBuilder.buildUndef(CurResultReg);
223 
224  unsigned Offset = 0;
225  for (Register PartReg : PartRegs) {
226  Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
227  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
228  CurResultReg = NewResultReg;
229  Offset += PartSize;
230  }
231 
232  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
233  // Use the original output register for the final insert to avoid a copy.
234  Register NewResultReg = (I + 1 == E) ?
235  DstReg : MRI.createGenericVirtualRegister(ResultTy);
236 
237  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
238  CurResultReg = NewResultReg;
239  Offset += LeftoverPartSize;
240  }
241 }
242 
243 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
245  const MachineInstr &MI) {
246  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
247 
248  const int StartIdx = Regs.size();
249  const int NumResults = MI.getNumOperands() - 1;
250  Regs.resize(Regs.size() + NumResults);
251  for (int I = 0; I != NumResults; ++I)
252  Regs[StartIdx + I] = MI.getOperand(I).getReg();
253 }
254 
255 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
256  LLT GCDTy, Register SrcReg) {
257  LLT SrcTy = MRI.getType(SrcReg);
258  if (SrcTy == GCDTy) {
259  // If the source already evenly divides the result type, we don't need to do
260  // anything.
261  Parts.push_back(SrcReg);
262  } else {
263  // Need to split into common type sized pieces.
264  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
265  getUnmergeResults(Parts, *Unmerge);
266  }
267 }
268 
269 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
270  LLT NarrowTy, Register SrcReg) {
271  LLT SrcTy = MRI.getType(SrcReg);
272  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
273  extractGCDType(Parts, GCDTy, SrcReg);
274  return GCDTy;
275 }
276 
277 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
279  unsigned PadStrategy) {
280  LLT LCMTy = getLCMType(DstTy, NarrowTy);
281 
282  int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
283  int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
284  int NumOrigSrc = VRegs.size();
285 
286  Register PadReg;
287 
288  // Get a value we can use to pad the source value if the sources won't evenly
289  // cover the result type.
290  if (NumOrigSrc < NumParts * NumSubParts) {
291  if (PadStrategy == TargetOpcode::G_ZEXT)
292  PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
293  else if (PadStrategy == TargetOpcode::G_ANYEXT)
294  PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
295  else {
296  assert(PadStrategy == TargetOpcode::G_SEXT);
297 
298  // Shift the sign bit of the low register through the high register.
299  auto ShiftAmt =
301  PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
302  }
303  }
304 
305  // Registers for the final merge to be produced.
306  SmallVector<Register, 4> Remerge(NumParts);
307 
308  // Registers needed for intermediate merges, which will be merged into a
309  // source for Remerge.
310  SmallVector<Register, 4> SubMerge(NumSubParts);
311 
312  // Once we've fully read off the end of the original source bits, we can reuse
313  // the same high bits for remaining padding elements.
314  Register AllPadReg;
315 
316  // Build merges to the LCM type to cover the original result type.
317  for (int I = 0; I != NumParts; ++I) {
318  bool AllMergePartsArePadding = true;
319 
320  // Build the requested merges to the requested type.
321  for (int J = 0; J != NumSubParts; ++J) {
322  int Idx = I * NumSubParts + J;
323  if (Idx >= NumOrigSrc) {
324  SubMerge[J] = PadReg;
325  continue;
326  }
327 
328  SubMerge[J] = VRegs[Idx];
329 
330  // There are meaningful bits here we can't reuse later.
331  AllMergePartsArePadding = false;
332  }
333 
334  // If we've filled up a complete piece with padding bits, we can directly
335  // emit the natural sized constant if applicable, rather than a merge of
336  // smaller constants.
337  if (AllMergePartsArePadding && !AllPadReg) {
338  if (PadStrategy == TargetOpcode::G_ANYEXT)
339  AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
340  else if (PadStrategy == TargetOpcode::G_ZEXT)
341  AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
342 
343  // If this is a sign extension, we can't materialize a trivial constant
344  // with the right type and have to produce a merge.
345  }
346 
347  if (AllPadReg) {
348  // Avoid creating additional instructions if we're just adding additional
349  // copies of padding bits.
350  Remerge[I] = AllPadReg;
351  continue;
352  }
353 
354  if (NumSubParts == 1)
355  Remerge[I] = SubMerge[0];
356  else
357  Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
358 
359  // In the sign extend padding case, re-use the first all-signbit merge.
360  if (AllMergePartsArePadding && !AllPadReg)
361  AllPadReg = Remerge[I];
362  }
363 
364  VRegs = std::move(Remerge);
365  return LCMTy;
366 }
367 
368 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
369  ArrayRef<Register> RemergeRegs) {
370  LLT DstTy = MRI.getType(DstReg);
371 
372  // Create the merge to the widened source, and extract the relevant bits into
373  // the result.
374 
375  if (DstTy == LCMTy) {
376  MIRBuilder.buildMerge(DstReg, RemergeRegs);
377  return;
378  }
379 
380  auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
381  if (DstTy.isScalar() && LCMTy.isScalar()) {
382  MIRBuilder.buildTrunc(DstReg, Remerge);
383  return;
384  }
385 
386  if (LCMTy.isVector()) {
387  unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
388  SmallVector<Register, 8> UnmergeDefs(NumDefs);
389  UnmergeDefs[0] = DstReg;
390  for (unsigned I = 1; I != NumDefs; ++I)
391  UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
392 
393  MIRBuilder.buildUnmerge(UnmergeDefs,
394  MIRBuilder.buildMerge(LCMTy, RemergeRegs));
395  return;
396  }
397 
398  llvm_unreachable("unhandled case");
399 }
400 
401 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
402 #define RTLIBCASE_INT(LibcallPrefix) \
403  do { \
404  switch (Size) { \
405  case 32: \
406  return RTLIB::LibcallPrefix##32; \
407  case 64: \
408  return RTLIB::LibcallPrefix##64; \
409  case 128: \
410  return RTLIB::LibcallPrefix##128; \
411  default: \
412  llvm_unreachable("unexpected size"); \
413  } \
414  } while (0)
415 
416 #define RTLIBCASE(LibcallPrefix) \
417  do { \
418  switch (Size) { \
419  case 32: \
420  return RTLIB::LibcallPrefix##32; \
421  case 64: \
422  return RTLIB::LibcallPrefix##64; \
423  case 80: \
424  return RTLIB::LibcallPrefix##80; \
425  case 128: \
426  return RTLIB::LibcallPrefix##128; \
427  default: \
428  llvm_unreachable("unexpected size"); \
429  } \
430  } while (0)
431 
432  switch (Opcode) {
433  case TargetOpcode::G_SDIV:
434  RTLIBCASE_INT(SDIV_I);
435  case TargetOpcode::G_UDIV:
436  RTLIBCASE_INT(UDIV_I);
437  case TargetOpcode::G_SREM:
438  RTLIBCASE_INT(SREM_I);
439  case TargetOpcode::G_UREM:
440  RTLIBCASE_INT(UREM_I);
441  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
442  RTLIBCASE_INT(CTLZ_I);
443  case TargetOpcode::G_FADD:
444  RTLIBCASE(ADD_F);
445  case TargetOpcode::G_FSUB:
446  RTLIBCASE(SUB_F);
447  case TargetOpcode::G_FMUL:
448  RTLIBCASE(MUL_F);
449  case TargetOpcode::G_FDIV:
450  RTLIBCASE(DIV_F);
451  case TargetOpcode::G_FEXP:
452  RTLIBCASE(EXP_F);
453  case TargetOpcode::G_FEXP2:
454  RTLIBCASE(EXP2_F);
455  case TargetOpcode::G_FREM:
456  RTLIBCASE(REM_F);
457  case TargetOpcode::G_FPOW:
458  RTLIBCASE(POW_F);
459  case TargetOpcode::G_FMA:
460  RTLIBCASE(FMA_F);
461  case TargetOpcode::G_FSIN:
462  RTLIBCASE(SIN_F);
463  case TargetOpcode::G_FCOS:
464  RTLIBCASE(COS_F);
465  case TargetOpcode::G_FLOG10:
466  RTLIBCASE(LOG10_F);
467  case TargetOpcode::G_FLOG:
468  RTLIBCASE(LOG_F);
469  case TargetOpcode::G_FLOG2:
470  RTLIBCASE(LOG2_F);
471  case TargetOpcode::G_FCEIL:
472  RTLIBCASE(CEIL_F);
473  case TargetOpcode::G_FFLOOR:
474  RTLIBCASE(FLOOR_F);
475  case TargetOpcode::G_FMINNUM:
476  RTLIBCASE(FMIN_F);
477  case TargetOpcode::G_FMAXNUM:
478  RTLIBCASE(FMAX_F);
479  case TargetOpcode::G_FSQRT:
480  RTLIBCASE(SQRT_F);
481  case TargetOpcode::G_FRINT:
482  RTLIBCASE(RINT_F);
483  case TargetOpcode::G_FNEARBYINT:
484  RTLIBCASE(NEARBYINT_F);
485  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
486  RTLIBCASE(ROUNDEVEN_F);
487  }
488  llvm_unreachable("Unknown libcall function");
489 }
490 
491 /// True if an instruction is in tail position in its caller. Intended for
492 /// legalizing libcalls as tail calls when possible.
494  MachineInstr &MI) {
495  MachineBasicBlock &MBB = *MI.getParent();
496  const Function &F = MBB.getParent()->getFunction();
497 
498  // Conservatively require the attributes of the call to match those of
499  // the return. Ignore NoAlias and NonNull because they don't affect the
500  // call sequence.
501  AttributeList CallerAttrs = F.getAttributes();
502  if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
503  .removeAttribute(Attribute::NoAlias)
504  .removeAttribute(Attribute::NonNull)
505  .hasAttributes())
506  return false;
507 
508  // It's not safe to eliminate the sign / zero extension of the return value.
509  if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
510  CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
511  return false;
512 
513  // Only tail call if the following instruction is a standard return.
514  auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
515  if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
516  return false;
517 
518  return true;
519 }
520 
522 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
523  const CallLowering::ArgInfo &Result,
525  const CallingConv::ID CC) {
526  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
527 
529  Info.CallConv = CC;
531  Info.OrigRet = Result;
532  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
533  if (!CLI.lowerCall(MIRBuilder, Info))
535 
537 }
538 
541  const CallLowering::ArgInfo &Result,
543  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
544  const char *Name = TLI.getLibcallName(Libcall);
545  const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
546  return createLibcall(MIRBuilder, Name, Result, Args, CC);
547 }
548 
549 // Useful for libcalls where all operands have the same type.
552  Type *OpType) {
553  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
554 
556  for (unsigned i = 1; i < MI.getNumOperands(); i++)
557  Args.push_back({MI.getOperand(i).getReg(), OpType});
558  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
559  Args);
560 }
561 
564  MachineInstr &MI) {
565  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
566 
568  // Add all the args, except for the last which is an imm denoting 'tail'.
569  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
570  Register Reg = MI.getOperand(i).getReg();
571 
572  // Need derive an IR type for call lowering.
573  LLT OpLLT = MRI.getType(Reg);
574  Type *OpTy = nullptr;
575  if (OpLLT.isPointer())
576  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
577  else
578  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
579  Args.push_back({Reg, OpTy});
580  }
581 
582  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
583  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
584  RTLIB::Libcall RTLibcall;
585  switch (MI.getOpcode()) {
586  case TargetOpcode::G_MEMCPY:
587  RTLibcall = RTLIB::MEMCPY;
588  break;
589  case TargetOpcode::G_MEMMOVE:
590  RTLibcall = RTLIB::MEMMOVE;
591  break;
592  case TargetOpcode::G_MEMSET:
593  RTLibcall = RTLIB::MEMSET;
594  break;
595  default:
597  }
598  const char *Name = TLI.getLibcallName(RTLibcall);
599 
601  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
603  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
604  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
605  isLibCallInTailPosition(MIRBuilder.getTII(), MI);
606 
607  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
608  if (!CLI.lowerCall(MIRBuilder, Info))
610 
611  if (Info.LoweredTailCall) {
612  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
613  // We must have a return following the call (or debug insts) to get past
614  // isLibCallInTailPosition.
615  do {
616  MachineInstr *Next = MI.getNextNode();
617  assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
618  "Expected instr following MI to be return or debug inst?");
619  // We lowered a tail call, so the call is now the return from the block.
620  // Delete the old return.
621  Next->eraseFromParent();
622  } while (MI.getNextNode());
623  }
624 
626 }
627 
628 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
629  Type *FromType) {
630  auto ToMVT = MVT::getVT(ToType);
631  auto FromMVT = MVT::getVT(FromType);
632 
633  switch (Opcode) {
634  case TargetOpcode::G_FPEXT:
635  return RTLIB::getFPEXT(FromMVT, ToMVT);
636  case TargetOpcode::G_FPTRUNC:
637  return RTLIB::getFPROUND(FromMVT, ToMVT);
638  case TargetOpcode::G_FPTOSI:
639  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
640  case TargetOpcode::G_FPTOUI:
641  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
642  case TargetOpcode::G_SITOFP:
643  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
644  case TargetOpcode::G_UITOFP:
645  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
646  }
647  llvm_unreachable("Unsupported libcall function");
648 }
649 
652  Type *FromType) {
653  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
654  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
655  {{MI.getOperand(1).getReg(), FromType}});
656 }
657 
660  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
661  unsigned Size = LLTy.getSizeInBits();
662  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
663 
664  switch (MI.getOpcode()) {
665  default:
666  return UnableToLegalize;
667  case TargetOpcode::G_SDIV:
668  case TargetOpcode::G_UDIV:
669  case TargetOpcode::G_SREM:
670  case TargetOpcode::G_UREM:
671  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
672  Type *HLTy = IntegerType::get(Ctx, Size);
673  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
674  if (Status != Legalized)
675  return Status;
676  break;
677  }
678  case TargetOpcode::G_FADD:
679  case TargetOpcode::G_FSUB:
680  case TargetOpcode::G_FMUL:
681  case TargetOpcode::G_FDIV:
682  case TargetOpcode::G_FMA:
683  case TargetOpcode::G_FPOW:
684  case TargetOpcode::G_FREM:
685  case TargetOpcode::G_FCOS:
686  case TargetOpcode::G_FSIN:
687  case TargetOpcode::G_FLOG10:
688  case TargetOpcode::G_FLOG:
689  case TargetOpcode::G_FLOG2:
690  case TargetOpcode::G_FEXP:
691  case TargetOpcode::G_FEXP2:
692  case TargetOpcode::G_FCEIL:
693  case TargetOpcode::G_FFLOOR:
694  case TargetOpcode::G_FMINNUM:
695  case TargetOpcode::G_FMAXNUM:
696  case TargetOpcode::G_FSQRT:
697  case TargetOpcode::G_FRINT:
698  case TargetOpcode::G_FNEARBYINT:
699  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
700  Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
701  if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
702  LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
703  return UnableToLegalize;
704  }
705  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
706  if (Status != Legalized)
707  return Status;
708  break;
709  }
710  case TargetOpcode::G_FPEXT:
711  case TargetOpcode::G_FPTRUNC: {
712  Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
713  Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
714  if (!FromTy || !ToTy)
715  return UnableToLegalize;
717  if (Status != Legalized)
718  return Status;
719  break;
720  }
721  case TargetOpcode::G_FPTOSI:
722  case TargetOpcode::G_FPTOUI: {
723  // FIXME: Support other types
724  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
725  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
726  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
727  return UnableToLegalize;
729  MI, MIRBuilder,
730  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
731  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
732  if (Status != Legalized)
733  return Status;
734  break;
735  }
736  case TargetOpcode::G_SITOFP:
737  case TargetOpcode::G_UITOFP: {
738  // FIXME: Support other types
739  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
740  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
741  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
742  return UnableToLegalize;
744  MI, MIRBuilder,
745  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
746  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
747  if (Status != Legalized)
748  return Status;
749  break;
750  }
751  case TargetOpcode::G_MEMCPY:
752  case TargetOpcode::G_MEMMOVE:
753  case TargetOpcode::G_MEMSET: {
755  MI.eraseFromParent();
756  return Result;
757  }
758  }
759 
760  MI.eraseFromParent();
761  return Legalized;
762 }
763 
765  unsigned TypeIdx,
766  LLT NarrowTy) {
767  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
768  uint64_t NarrowSize = NarrowTy.getSizeInBits();
769 
770  switch (MI.getOpcode()) {
771  default:
772  return UnableToLegalize;
773  case TargetOpcode::G_IMPLICIT_DEF: {
774  Register DstReg = MI.getOperand(0).getReg();
775  LLT DstTy = MRI.getType(DstReg);
776 
777  // If SizeOp0 is not an exact multiple of NarrowSize, emit
778  // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
779  // FIXME: Although this would also be legal for the general case, it causes
780  // a lot of regressions in the emitted code (superfluous COPYs, artifact
781  // combines not being hit). This seems to be a problem related to the
782  // artifact combiner.
783  if (SizeOp0 % NarrowSize != 0) {
784  LLT ImplicitTy = NarrowTy;
785  if (DstTy.isVector())
786  ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
787 
788  Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
789  MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
790 
791  MI.eraseFromParent();
792  return Legalized;
793  }
794 
795  int NumParts = SizeOp0 / NarrowSize;
796 
797  SmallVector<Register, 2> DstRegs;
798  for (int i = 0; i < NumParts; ++i)
799  DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
800 
801  if (DstTy.isVector())
802  MIRBuilder.buildBuildVector(DstReg, DstRegs);
803  else
804  MIRBuilder.buildMerge(DstReg, DstRegs);
805  MI.eraseFromParent();
806  return Legalized;
807  }
808  case TargetOpcode::G_CONSTANT: {
809  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
810  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
811  unsigned TotalSize = Ty.getSizeInBits();
812  unsigned NarrowSize = NarrowTy.getSizeInBits();
813  int NumParts = TotalSize / NarrowSize;
814 
815  SmallVector<Register, 4> PartRegs;
816  for (int I = 0; I != NumParts; ++I) {
817  unsigned Offset = I * NarrowSize;
818  auto K = MIRBuilder.buildConstant(NarrowTy,
819  Val.lshr(Offset).trunc(NarrowSize));
820  PartRegs.push_back(K.getReg(0));
821  }
822 
823  LLT LeftoverTy;
824  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
825  SmallVector<Register, 1> LeftoverRegs;
826  if (LeftoverBits != 0) {
827  LeftoverTy = LLT::scalar(LeftoverBits);
828  auto K = MIRBuilder.buildConstant(
829  LeftoverTy,
830  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
831  LeftoverRegs.push_back(K.getReg(0));
832  }
833 
834  insertParts(MI.getOperand(0).getReg(),
835  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
836 
837  MI.eraseFromParent();
838  return Legalized;
839  }
840  case TargetOpcode::G_SEXT:
841  case TargetOpcode::G_ZEXT:
842  case TargetOpcode::G_ANYEXT:
843  return narrowScalarExt(MI, TypeIdx, NarrowTy);
844  case TargetOpcode::G_TRUNC: {
845  if (TypeIdx != 1)
846  return UnableToLegalize;
847 
848  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
849  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
850  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
851  return UnableToLegalize;
852  }
853 
854  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
855  MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
856  MI.eraseFromParent();
857  return Legalized;
858  }
859 
860  case TargetOpcode::G_FREEZE:
861  return reduceOperationWidth(MI, TypeIdx, NarrowTy);
862 
863  case TargetOpcode::G_ADD: {
864  // FIXME: add support for when SizeOp0 isn't an exact multiple of
865  // NarrowSize.
866  if (SizeOp0 % NarrowSize != 0)
867  return UnableToLegalize;
868  // Expand in terms of carry-setting/consuming G_ADDE instructions.
869  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
870 
871  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
872  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
873  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
874 
875  Register CarryIn;
876  for (int i = 0; i < NumParts; ++i) {
877  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
879 
880  if (i == 0)
881  MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
882  else {
883  MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
884  Src2Regs[i], CarryIn);
885  }
886 
887  DstRegs.push_back(DstReg);
888  CarryIn = CarryOut;
889  }
890  Register DstReg = MI.getOperand(0).getReg();
891  if(MRI.getType(DstReg).isVector())
892  MIRBuilder.buildBuildVector(DstReg, DstRegs);
893  else
894  MIRBuilder.buildMerge(DstReg, DstRegs);
895  MI.eraseFromParent();
896  return Legalized;
897  }
898  case TargetOpcode::G_SUB: {
899  // FIXME: add support for when SizeOp0 isn't an exact multiple of
900  // NarrowSize.
901  if (SizeOp0 % NarrowSize != 0)
902  return UnableToLegalize;
903 
904  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
905 
906  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
907  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
908  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
909 
910  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
912  MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
913  {Src1Regs[0], Src2Regs[0]});
914  DstRegs.push_back(DstReg);
915  Register BorrowIn = BorrowOut;
916  for (int i = 1; i < NumParts; ++i) {
917  DstReg = MRI.createGenericVirtualRegister(NarrowTy);
918  BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
919 
920  MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
921  {Src1Regs[i], Src2Regs[i], BorrowIn});
922 
923  DstRegs.push_back(DstReg);
924  BorrowIn = BorrowOut;
925  }
926  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
927  MI.eraseFromParent();
928  return Legalized;
929  }
930  case TargetOpcode::G_MUL:
931  case TargetOpcode::G_UMULH:
932  return narrowScalarMul(MI, NarrowTy);
933  case TargetOpcode::G_EXTRACT:
934  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
935  case TargetOpcode::G_INSERT:
936  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
937  case TargetOpcode::G_LOAD: {
938  auto &MMO = **MI.memoperands_begin();
939  Register DstReg = MI.getOperand(0).getReg();
940  LLT DstTy = MRI.getType(DstReg);
941  if (DstTy.isVector())
942  return UnableToLegalize;
943 
944  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
945  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
946  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
947  MIRBuilder.buildAnyExt(DstReg, TmpReg);
948  MI.eraseFromParent();
949  return Legalized;
950  }
951 
952  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
953  }
954  case TargetOpcode::G_ZEXTLOAD:
955  case TargetOpcode::G_SEXTLOAD: {
956  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
957  Register DstReg = MI.getOperand(0).getReg();
958  Register PtrReg = MI.getOperand(1).getReg();
959 
960  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
961  auto &MMO = **MI.memoperands_begin();
962  unsigned MemSize = MMO.getSizeInBits();
963 
964  if (MemSize == NarrowSize) {
965  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
966  } else if (MemSize < NarrowSize) {
967  MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
968  } else if (MemSize > NarrowSize) {
969  // FIXME: Need to split the load.
970  return UnableToLegalize;
971  }
972 
973  if (ZExt)
974  MIRBuilder.buildZExt(DstReg, TmpReg);
975  else
976  MIRBuilder.buildSExt(DstReg, TmpReg);
977 
978  MI.eraseFromParent();
979  return Legalized;
980  }
981  case TargetOpcode::G_STORE: {
982  const auto &MMO = **MI.memoperands_begin();
983 
984  Register SrcReg = MI.getOperand(0).getReg();
985  LLT SrcTy = MRI.getType(SrcReg);
986  if (SrcTy.isVector())
987  return UnableToLegalize;
988 
989  int NumParts = SizeOp0 / NarrowSize;
990  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
991  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
992  if (SrcTy.isVector() && LeftoverBits != 0)
993  return UnableToLegalize;
994 
995  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
996  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
997  auto &MMO = **MI.memoperands_begin();
998  MIRBuilder.buildTrunc(TmpReg, SrcReg);
999  MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
1000  MI.eraseFromParent();
1001  return Legalized;
1002  }
1003 
1004  return reduceLoadStoreWidth(MI, 0, NarrowTy);
1005  }
1006  case TargetOpcode::G_SELECT:
1007  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1008  case TargetOpcode::G_AND:
1009  case TargetOpcode::G_OR:
1010  case TargetOpcode::G_XOR: {
1011  // Legalize bitwise operation:
1012  // A = BinOp<Ty> B, C
1013  // into:
1014  // B1, ..., BN = G_UNMERGE_VALUES B
1015  // C1, ..., CN = G_UNMERGE_VALUES C
1016  // A1 = BinOp<Ty/N> B1, C2
1017  // ...
1018  // AN = BinOp<Ty/N> BN, CN
1019  // A = G_MERGE_VALUES A1, ..., AN
1020  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1021  }
1022  case TargetOpcode::G_SHL:
1023  case TargetOpcode::G_LSHR:
1024  case TargetOpcode::G_ASHR:
1025  return narrowScalarShift(MI, TypeIdx, NarrowTy);
1026  case TargetOpcode::G_CTLZ:
1027  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1028  case TargetOpcode::G_CTTZ:
1029  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1030  case TargetOpcode::G_CTPOP:
1031  if (TypeIdx == 1)
1032  switch (MI.getOpcode()) {
1033  case TargetOpcode::G_CTLZ:
1034  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1035  return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1036  case TargetOpcode::G_CTTZ:
1037  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1038  return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1039  case TargetOpcode::G_CTPOP:
1040  return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1041  default:
1042  return UnableToLegalize;
1043  }
1044 
1046  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1048  return Legalized;
1049  case TargetOpcode::G_INTTOPTR:
1050  if (TypeIdx != 1)
1051  return UnableToLegalize;
1052 
1054  narrowScalarSrc(MI, NarrowTy, 1);
1056  return Legalized;
1057  case TargetOpcode::G_PTRTOINT:
1058  if (TypeIdx != 0)
1059  return UnableToLegalize;
1060 
1062  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1064  return Legalized;
1065  case TargetOpcode::G_PHI: {
1066  unsigned NumParts = SizeOp0 / NarrowSize;
1067  SmallVector<Register, 2> DstRegs(NumParts);
1068  SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1070  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1071  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1072  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1073  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1074  SrcRegs[i / 2]);
1075  }
1076  MachineBasicBlock &MBB = *MI.getParent();
1078  for (unsigned i = 0; i < NumParts; ++i) {
1079  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1080  MachineInstrBuilder MIB =
1081  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1082  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1083  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1084  }
1086  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1088  MI.eraseFromParent();
1089  return Legalized;
1090  }
1091  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1092  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1093  if (TypeIdx != 2)
1094  return UnableToLegalize;
1095 
1096  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1098  narrowScalarSrc(MI, NarrowTy, OpIdx);
1100  return Legalized;
1101  }
1102  case TargetOpcode::G_ICMP: {
1103  uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1104  if (NarrowSize * 2 != SrcSize)
1105  return UnableToLegalize;
1106 
1108  Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1109  Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1110  MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1111 
1112  Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1113  Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1114  MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1115 
1116  CmpInst::Predicate Pred =
1117  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1118  LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1119 
1120  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1121  MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1122  MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1123  MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1124  MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1125  MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1126  } else {
1127  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1128  MachineInstrBuilder CmpHEQ =
1129  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1131  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1132  MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1133  }
1135  MI.eraseFromParent();
1136  return Legalized;
1137  }
1138  case TargetOpcode::G_SEXT_INREG: {
1139  if (TypeIdx != 0)
1140  return UnableToLegalize;
1141 
1142  int64_t SizeInBits = MI.getOperand(2).getImm();
1143 
1144  // So long as the new type has more bits than the bits we're extending we
1145  // don't need to break it apart.
1146  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1148  // We don't lose any non-extension bits by truncating the src and
1149  // sign-extending the dst.
1150  MachineOperand &MO1 = MI.getOperand(1);
1151  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1152  MO1.setReg(TruncMIB.getReg(0));
1153 
1154  MachineOperand &MO2 = MI.getOperand(0);
1155  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1157  MIRBuilder.buildSExt(MO2, DstExt);
1158  MO2.setReg(DstExt);
1160  return Legalized;
1161  }
1162 
1163  // Break it apart. Components below the extension point are unmodified. The
1164  // component containing the extension point becomes a narrower SEXT_INREG.
1165  // Components above it are ashr'd from the component containing the
1166  // extension point.
1167  if (SizeOp0 % NarrowSize != 0)
1168  return UnableToLegalize;
1169  int NumParts = SizeOp0 / NarrowSize;
1170 
1171  // List the registers where the destination will be scattered.
1172  SmallVector<Register, 2> DstRegs;
1173  // List the registers where the source will be split.
1174  SmallVector<Register, 2> SrcRegs;
1175 
1176  // Create all the temporary registers.
1177  for (int i = 0; i < NumParts; ++i) {
1178  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1179 
1180  SrcRegs.push_back(SrcReg);
1181  }
1182 
1183  // Explode the big arguments into smaller chunks.
1184  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1185 
1186  Register AshrCstReg =
1187  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1188  .getReg(0);
1189  Register FullExtensionReg = 0;
1190  Register PartialExtensionReg = 0;
1191 
1192  // Do the operation on each small part.
1193  for (int i = 0; i < NumParts; ++i) {
1194  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1195  DstRegs.push_back(SrcRegs[i]);
1196  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1197  assert(PartialExtensionReg &&
1198  "Expected to visit partial extension before full");
1199  if (FullExtensionReg) {
1200  DstRegs.push_back(FullExtensionReg);
1201  continue;
1202  }
1203  DstRegs.push_back(
1204  MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1205  .getReg(0));
1206  FullExtensionReg = DstRegs.back();
1207  } else {
1208  DstRegs.push_back(
1209  MIRBuilder
1210  .buildInstr(
1211  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1212  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1213  .getReg(0));
1214  PartialExtensionReg = DstRegs.back();
1215  }
1216  }
1217 
1218  // Gather the destination registers into the final destination.
1219  Register DstReg = MI.getOperand(0).getReg();
1220  MIRBuilder.buildMerge(DstReg, DstRegs);
1221  MI.eraseFromParent();
1222  return Legalized;
1223  }
1224  case TargetOpcode::G_BSWAP:
1225  case TargetOpcode::G_BITREVERSE: {
1226  if (SizeOp0 % NarrowSize != 0)
1227  return UnableToLegalize;
1228 
1230  SmallVector<Register, 2> SrcRegs, DstRegs;
1231  unsigned NumParts = SizeOp0 / NarrowSize;
1232  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1233 
1234  for (unsigned i = 0; i < NumParts; ++i) {
1235  auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1236  {SrcRegs[NumParts - 1 - i]});
1237  DstRegs.push_back(DstPart.getReg(0));
1238  }
1239 
1240  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1241 
1243  MI.eraseFromParent();
1244  return Legalized;
1245  }
1246  case TargetOpcode::G_PTR_ADD:
1247  case TargetOpcode::G_PTRMASK: {
1248  if (TypeIdx != 1)
1249  return UnableToLegalize;
1251  narrowScalarSrc(MI, NarrowTy, 2);
1253  return Legalized;
1254  }
1255  case TargetOpcode::G_FPTOUI: {
1256  if (TypeIdx != 0)
1257  return UnableToLegalize;
1259  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1261  return Legalized;
1262  }
1263  case TargetOpcode::G_FPTOSI: {
1264  if (TypeIdx != 0)
1265  return UnableToLegalize;
1267  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
1269  return Legalized;
1270  }
1271  case TargetOpcode::G_FPEXT:
1272  if (TypeIdx != 0)
1273  return UnableToLegalize;
1275  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1277  return Legalized;
1278  }
1279 }
1280 
1282  LLT Ty = MRI.getType(Val);
1283  if (Ty.isScalar())
1284  return Val;
1285 
1287  LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1288  if (Ty.isPointer()) {
1289  if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1290  return Register();
1291  return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1292  }
1293 
1294  Register NewVal = Val;
1295 
1296  assert(Ty.isVector());
1297  LLT EltTy = Ty.getElementType();
1298  if (EltTy.isPointer())
1299  NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1300  return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1301 }
1302 
1304  unsigned OpIdx, unsigned ExtOpcode) {
1305  MachineOperand &MO = MI.getOperand(OpIdx);
1306  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1307  MO.setReg(ExtB.getReg(0));
1308 }
1309 
1311  unsigned OpIdx) {
1312  MachineOperand &MO = MI.getOperand(OpIdx);
1313  auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1314  MO.setReg(ExtB.getReg(0));
1315 }
1316 
1318  unsigned OpIdx, unsigned TruncOpcode) {
1319  MachineOperand &MO = MI.getOperand(OpIdx);
1320  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1322  MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1323  MO.setReg(DstExt);
1324 }
1325 
1327  unsigned OpIdx, unsigned ExtOpcode) {
1328  MachineOperand &MO = MI.getOperand(OpIdx);
1329  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1331  MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1332  MO.setReg(DstTrunc);
1333 }
1334 
1336  unsigned OpIdx) {
1337  MachineOperand &MO = MI.getOperand(OpIdx);
1339  MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1340 }
1341 
1343  unsigned OpIdx) {
1344  MachineOperand &MO = MI.getOperand(OpIdx);
1345 
1346  LLT OldTy = MRI.getType(MO.getReg());
1347  unsigned OldElts = OldTy.getNumElements();
1348  unsigned NewElts = MoreTy.getNumElements();
1349 
1350  unsigned NumParts = NewElts / OldElts;
1351 
1352  // Use concat_vectors if the result is a multiple of the number of elements.
1353  if (NumParts * OldElts == NewElts) {
1355  Parts.push_back(MO.getReg());
1356 
1357  Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1358  for (unsigned I = 1; I != NumParts; ++I)
1359  Parts.push_back(ImpDef);
1360 
1361  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1362  MO.setReg(Concat.getReg(0));
1363  return;
1364  }
1365 
1366  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1367  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1368  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1369  MO.setReg(MoreReg);
1370 }
1371 
1372 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1373  MachineOperand &Op = MI.getOperand(OpIdx);
1374  Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1375 }
1376 
1377 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1378  MachineOperand &MO = MI.getOperand(OpIdx);
1379  Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1381  MIRBuilder.buildBitcast(MO, CastDst);
1382  MO.setReg(CastDst);
1383 }
1384 
1386 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1387  LLT WideTy) {
1388  if (TypeIdx != 1)
1389  return UnableToLegalize;
1390 
1391  Register DstReg = MI.getOperand(0).getReg();
1392  LLT DstTy = MRI.getType(DstReg);
1393  if (DstTy.isVector())
1394  return UnableToLegalize;
1395 
1396  Register Src1 = MI.getOperand(1).getReg();
1397  LLT SrcTy = MRI.getType(Src1);
1398  const int DstSize = DstTy.getSizeInBits();
1399  const int SrcSize = SrcTy.getSizeInBits();
1400  const int WideSize = WideTy.getSizeInBits();
1401  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1402 
1403  unsigned NumOps = MI.getNumOperands();
1404  unsigned NumSrc = MI.getNumOperands() - 1;
1405  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1406 
1407  if (WideSize >= DstSize) {
1408  // Directly pack the bits in the target type.
1409  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1410 
1411  for (unsigned I = 2; I != NumOps; ++I) {
1412  const unsigned Offset = (I - 1) * PartSize;
1413 
1414  Register SrcReg = MI.getOperand(I).getReg();
1415  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1416 
1417  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1418 
1419  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1420  MRI.createGenericVirtualRegister(WideTy);
1421 
1422  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1423  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1424  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1425  ResultReg = NextResult;
1426  }
1427 
1428  if (WideSize > DstSize)
1429  MIRBuilder.buildTrunc(DstReg, ResultReg);
1430  else if (DstTy.isPointer())
1431  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1432 
1433  MI.eraseFromParent();
1434  return Legalized;
1435  }
1436 
1437  // Unmerge the original values to the GCD type, and recombine to the next
1438  // multiple greater than the original type.
1439  //
1440  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1441  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1442  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1443  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1444  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1445  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1446  // %12:_(s12) = G_MERGE_VALUES %10, %11
1447  //
1448  // Padding with undef if necessary:
1449  //
1450  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1451  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1452  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1453  // %7:_(s2) = G_IMPLICIT_DEF
1454  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1455  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1456  // %10:_(s12) = G_MERGE_VALUES %8, %9
1457 
1458  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1459  LLT GCDTy = LLT::scalar(GCD);
1460 
1462  SmallVector<Register, 8> NewMergeRegs;
1463  SmallVector<Register, 8> Unmerges;
1464  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1465 
1466  // Decompose the original operands if they don't evenly divide.
1467  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1468  Register SrcReg = MI.getOperand(I).getReg();
1469  if (GCD == SrcSize) {
1470  Unmerges.push_back(SrcReg);
1471  } else {
1472  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1473  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1474  Unmerges.push_back(Unmerge.getReg(J));
1475  }
1476  }
1477 
1478  // Pad with undef to the next size that is a multiple of the requested size.
1479  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1480  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1481  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1482  Unmerges.push_back(UndefReg);
1483  }
1484 
1485  const int PartsPerGCD = WideSize / GCD;
1486 
1487  // Build merges of each piece.
1488  ArrayRef<Register> Slicer(Unmerges);
1489  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1490  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1491  NewMergeRegs.push_back(Merge.getReg(0));
1492  }
1493 
1494  // A truncate may be necessary if the requested type doesn't evenly divide the
1495  // original result type.
1496  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1497  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1498  } else {
1499  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1500  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1501  }
1502 
1503  MI.eraseFromParent();
1504  return Legalized;
1505 }
1506 
1508  Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1509  LLT OrigTy = MRI.getType(OrigReg);
1510  LLT LCMTy = getLCMType(WideTy, OrigTy);
1511 
1512  const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1513  const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1514 
1515  Register UnmergeSrc = WideReg;
1516 
1517  // Create a merge to the LCM type, padding with undef
1518  // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1519  // =>
1520  // %1:_(<4 x s32>) = G_FOO
1521  // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1522  // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1523  // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1524  if (NumMergeParts > 1) {
1525  Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1526  SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1527  MergeParts[0] = WideReg;
1528  UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1529  }
1530 
1531  // Unmerge to the original register and pad with dead defs.
1532  SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1533  UnmergeResults[0] = OrigReg;
1534  for (int I = 1; I != NumUnmergeParts; ++I)
1535  UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1536 
1537  MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1538  return WideReg;
1539 }
1540 
1542 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1543  LLT WideTy) {
1544  if (TypeIdx != 0)
1545  return UnableToLegalize;
1546 
1547  int NumDst = MI.getNumOperands() - 1;
1548  Register SrcReg = MI.getOperand(NumDst).getReg();
1549  LLT SrcTy = MRI.getType(SrcReg);
1550  if (SrcTy.isVector())
1551  return UnableToLegalize;
1552 
1553  Register Dst0Reg = MI.getOperand(0).getReg();
1554  LLT DstTy = MRI.getType(Dst0Reg);
1555  if (!DstTy.isScalar())
1556  return UnableToLegalize;
1557 
1558  if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1559  if (SrcTy.isPointer()) {
1561  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1562  LLVM_DEBUG(
1563  dbgs() << "Not casting non-integral address space integer\n");
1564  return UnableToLegalize;
1565  }
1566 
1567  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1568  SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1569  }
1570 
1571  // Widen SrcTy to WideTy. This does not affect the result, but since the
1572  // user requested this size, it is probably better handled than SrcTy and
1573  // should reduce the total number of legalization artifacts
1574  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1575  SrcTy = WideTy;
1576  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1577  }
1578 
1579  // Theres no unmerge type to target. Directly extract the bits from the
1580  // source type
1581  unsigned DstSize = DstTy.getSizeInBits();
1582 
1583  MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1584  for (int I = 1; I != NumDst; ++I) {
1585  auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1586  auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1587  MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1588  }
1589 
1590  MI.eraseFromParent();
1591  return Legalized;
1592  }
1593 
1594  // Extend the source to a wider type.
1595  LLT LCMTy = getLCMType(SrcTy, WideTy);
1596 
1597  Register WideSrc = SrcReg;
1598  if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1599  // TODO: If this is an integral address space, cast to integer and anyext.
1600  if (SrcTy.isPointer()) {
1601  LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1602  return UnableToLegalize;
1603  }
1604 
1605  WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1606  }
1607 
1608  auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1609 
1610  // Create a sequence of unmerges and merges to the original results. Since we
1611  // may have widened the source, we will need to pad the results with dead defs
1612  // to cover the source register.
1613  // e.g. widen s48 to s64:
1614  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1615  //
1616  // =>
1617  // %4:_(s192) = G_ANYEXT %0:_(s96)
1618  // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1619  // ; unpack to GCD type, with extra dead defs
1620  // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1621  // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1622  // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1623  // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1624  // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1625  const LLT GCDTy = getGCDType(WideTy, DstTy);
1626  const int NumUnmerge = Unmerge->getNumOperands() - 1;
1627  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1628 
1629  // Directly unmerge to the destination without going through a GCD type
1630  // if possible
1631  if (PartsPerRemerge == 1) {
1632  const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1633 
1634  for (int I = 0; I != NumUnmerge; ++I) {
1635  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1636 
1637  for (int J = 0; J != PartsPerUnmerge; ++J) {
1638  int Idx = I * PartsPerUnmerge + J;
1639  if (Idx < NumDst)
1640  MIB.addDef(MI.getOperand(Idx).getReg());
1641  else {
1642  // Create dead def for excess components.
1643  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1644  }
1645  }
1646 
1647  MIB.addUse(Unmerge.getReg(I));
1648  }
1649  } else {
1651  for (int J = 0; J != NumUnmerge; ++J)
1652  extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1653 
1654  SmallVector<Register, 8> RemergeParts;
1655  for (int I = 0; I != NumDst; ++I) {
1656  for (int J = 0; J < PartsPerRemerge; ++J) {
1657  const int Idx = I * PartsPerRemerge + J;
1658  RemergeParts.emplace_back(Parts[Idx]);
1659  }
1660 
1661  MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1662  RemergeParts.clear();
1663  }
1664  }
1665 
1666  MI.eraseFromParent();
1667  return Legalized;
1668 }
1669 
1671 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1672  LLT WideTy) {
1673  Register DstReg = MI.getOperand(0).getReg();
1674  Register SrcReg = MI.getOperand(1).getReg();
1675  LLT SrcTy = MRI.getType(SrcReg);
1676 
1677  LLT DstTy = MRI.getType(DstReg);
1678  unsigned Offset = MI.getOperand(2).getImm();
1679 
1680  if (TypeIdx == 0) {
1681  if (SrcTy.isVector() || DstTy.isVector())
1682  return UnableToLegalize;
1683 
1684  SrcOp Src(SrcReg);
1685  if (SrcTy.isPointer()) {
1686  // Extracts from pointers can be handled only if they are really just
1687  // simple integers.
1689  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1690  return UnableToLegalize;
1691 
1692  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1693  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1694  SrcTy = SrcAsIntTy;
1695  }
1696 
1697  if (DstTy.isPointer())
1698  return UnableToLegalize;
1699 
1700  if (Offset == 0) {
1701  // Avoid a shift in the degenerate case.
1702  MIRBuilder.buildTrunc(DstReg,
1703  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1704  MI.eraseFromParent();
1705  return Legalized;
1706  }
1707 
1708  // Do a shift in the source type.
1709  LLT ShiftTy = SrcTy;
1710  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1711  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1712  ShiftTy = WideTy;
1713  }
1714 
1715  auto LShr = MIRBuilder.buildLShr(
1716  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1717  MIRBuilder.buildTrunc(DstReg, LShr);
1718  MI.eraseFromParent();
1719  return Legalized;
1720  }
1721 
1722  if (SrcTy.isScalar()) {
1724  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1726  return Legalized;
1727  }
1728 
1729  if (!SrcTy.isVector())
1730  return UnableToLegalize;
1731 
1732  if (DstTy != SrcTy.getElementType())
1733  return UnableToLegalize;
1734 
1735  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1736  return UnableToLegalize;
1737 
1739  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1740 
1741  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1742  Offset);
1743  widenScalarDst(MI, WideTy.getScalarType(), 0);
1745  return Legalized;
1746 }
1747 
1749 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1750  LLT WideTy) {
1751  if (TypeIdx != 0 || WideTy.isVector())
1752  return UnableToLegalize;
1754  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1755  widenScalarDst(MI, WideTy);
1757  return Legalized;
1758 }
1759 
1761 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1762  LLT WideTy) {
1763  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1764  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1765  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1766  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1767  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1768  // We can convert this to:
1769  // 1. Any extend iN to iM
1770  // 2. SHL by M-N
1771  // 3. [US][ADD|SUB|SHL]SAT
1772  // 4. L/ASHR by M-N
1773  //
1774  // It may be more efficient to lower this to a min and a max operation in
1775  // the higher precision arithmetic if the promoted operation isn't legal,
1776  // but this decision is up to the target's lowering request.
1777  Register DstReg = MI.getOperand(0).getReg();
1778 
1779  unsigned NewBits = WideTy.getScalarSizeInBits();
1780  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1781 
1782  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1783  // must not left shift the RHS to preserve the shift amount.
1784  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1785  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1786  : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1787  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1788  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1789  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1790 
1791  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1792  {ShiftL, ShiftR}, MI.getFlags());
1793 
1794  // Use a shift that will preserve the number of sign bits when the trunc is
1795  // folded away.
1796  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1797  : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1798 
1799  MIRBuilder.buildTrunc(DstReg, Result);
1800  MI.eraseFromParent();
1801  return Legalized;
1802 }
1803 
1805 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1806  switch (MI.getOpcode()) {
1807  default:
1808  return UnableToLegalize;
1809  case TargetOpcode::G_EXTRACT:
1810  return widenScalarExtract(MI, TypeIdx, WideTy);
1811  case TargetOpcode::G_INSERT:
1812  return widenScalarInsert(MI, TypeIdx, WideTy);
1813  case TargetOpcode::G_MERGE_VALUES:
1814  return widenScalarMergeValues(MI, TypeIdx, WideTy);
1815  case TargetOpcode::G_UNMERGE_VALUES:
1816  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1817  case TargetOpcode::G_UADDO:
1818  case TargetOpcode::G_USUBO: {
1819  if (TypeIdx == 1)
1820  return UnableToLegalize; // TODO
1821  auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
1822  auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
1823  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1824  ? TargetOpcode::G_ADD
1825  : TargetOpcode::G_SUB;
1826  // Do the arithmetic in the larger type.
1827  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1828  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1829  APInt Mask =
1830  APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
1831  auto AndOp = MIRBuilder.buildAnd(
1832  WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
1833  // There is no overflow if the AndOp is the same as NewOp.
1834  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
1835  // Now trunc the NewOp to the original result.
1836  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1837  MI.eraseFromParent();
1838  return Legalized;
1839  }
1840  case TargetOpcode::G_SADDSAT:
1841  case TargetOpcode::G_SSUBSAT:
1842  case TargetOpcode::G_SSHLSAT:
1843  case TargetOpcode::G_UADDSAT:
1844  case TargetOpcode::G_USUBSAT:
1845  case TargetOpcode::G_USHLSAT:
1846  return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
1847  case TargetOpcode::G_CTTZ:
1848  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1849  case TargetOpcode::G_CTLZ:
1850  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1851  case TargetOpcode::G_CTPOP: {
1852  if (TypeIdx == 0) {
1854  widenScalarDst(MI, WideTy, 0);
1856  return Legalized;
1857  }
1858 
1859  Register SrcReg = MI.getOperand(1).getReg();
1860 
1861  // First ZEXT the input.
1862  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1863  LLT CurTy = MRI.getType(SrcReg);
1864  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1865  // The count is the same in the larger type except if the original
1866  // value was zero. This can be handled by setting the bit just off
1867  // the top of the original type.
1868  auto TopBit =
1869  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1870  MIBSrc = MIRBuilder.buildOr(
1871  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1872  }
1873 
1874  // Perform the operation at the larger size.
1875  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1876  // This is already the correct result for CTPOP and CTTZs
1877  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1878  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1879  // The correct result is NewOp - (Difference in widety and current ty).
1880  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1881  MIBNewOp = MIRBuilder.buildSub(
1882  WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1883  }
1884 
1885  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1886  MI.eraseFromParent();
1887  return Legalized;
1888  }
1889  case TargetOpcode::G_BSWAP: {
1891  Register DstReg = MI.getOperand(0).getReg();
1892 
1893  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1894  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1895  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1896  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1897 
1898  MI.getOperand(0).setReg(DstExt);
1899 
1901 
1902  LLT Ty = MRI.getType(DstReg);
1903  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1904  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1905  MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1906 
1907  MIRBuilder.buildTrunc(DstReg, ShrReg);
1909  return Legalized;
1910  }
1911  case TargetOpcode::G_BITREVERSE: {
1913 
1914  Register DstReg = MI.getOperand(0).getReg();
1915  LLT Ty = MRI.getType(DstReg);
1916  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1917 
1918  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1919  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1920  MI.getOperand(0).setReg(DstExt);
1922 
1923  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1924  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1925  MIRBuilder.buildTrunc(DstReg, Shift);
1927  return Legalized;
1928  }
1929  case TargetOpcode::G_FREEZE:
1931  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1932  widenScalarDst(MI, WideTy);
1934  return Legalized;
1935 
1936  case TargetOpcode::G_ADD:
1937  case TargetOpcode::G_AND:
1938  case TargetOpcode::G_MUL:
1939  case TargetOpcode::G_OR:
1940  case TargetOpcode::G_XOR:
1941  case TargetOpcode::G_SUB:
1942  // Perform operation at larger width (any extension is fines here, high bits
1943  // don't affect the result) and then truncate the result back to the
1944  // original type.
1946  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1947  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1948  widenScalarDst(MI, WideTy);
1950  return Legalized;
1951 
1952  case TargetOpcode::G_SHL:
1954 
1955  if (TypeIdx == 0) {
1956  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1957  widenScalarDst(MI, WideTy);
1958  } else {
1959  assert(TypeIdx == 1);
1960  // The "number of bits to shift" operand must preserve its value as an
1961  // unsigned integer:
1962  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1963  }
1964 
1966  return Legalized;
1967 
1968  case TargetOpcode::G_SDIV:
1969  case TargetOpcode::G_SREM:
1970  case TargetOpcode::G_SMIN:
1971  case TargetOpcode::G_SMAX:
1973  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1974  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1975  widenScalarDst(MI, WideTy);
1977  return Legalized;
1978 
1979  case TargetOpcode::G_ASHR:
1980  case TargetOpcode::G_LSHR:
1982 
1983  if (TypeIdx == 0) {
1984  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1985  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1986 
1987  widenScalarSrc(MI, WideTy, 1, CvtOp);
1988  widenScalarDst(MI, WideTy);
1989  } else {
1990  assert(TypeIdx == 1);
1991  // The "number of bits to shift" operand must preserve its value as an
1992  // unsigned integer:
1993  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1994  }
1995 
1997  return Legalized;
1998  case TargetOpcode::G_UDIV:
1999  case TargetOpcode::G_UREM:
2000  case TargetOpcode::G_UMIN:
2001  case TargetOpcode::G_UMAX:
2003  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2004  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2005  widenScalarDst(MI, WideTy);
2007  return Legalized;
2008 
2009  case TargetOpcode::G_SELECT:
2011  if (TypeIdx == 0) {
2012  // Perform operation at larger width (any extension is fine here, high
2013  // bits don't affect the result) and then truncate the result back to the
2014  // original type.
2015  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2016  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2017  widenScalarDst(MI, WideTy);
2018  } else {
2019  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2020  // Explicit extension is required here since high bits affect the result.
2021  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2022  }
2024  return Legalized;
2025 
2026  case TargetOpcode::G_FPTOSI:
2027  case TargetOpcode::G_FPTOUI:
2029 
2030  if (TypeIdx == 0)
2031  widenScalarDst(MI, WideTy);
2032  else
2033  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2034 
2036  return Legalized;
2037  case TargetOpcode::G_SITOFP:
2039 
2040  if (TypeIdx == 0)
2041  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2042  else
2043  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2044 
2046  return Legalized;
2047  case TargetOpcode::G_UITOFP:
2049 
2050  if (TypeIdx == 0)
2051  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2052  else
2053  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2054 
2056  return Legalized;
2057  case TargetOpcode::G_LOAD:
2058  case TargetOpcode::G_SEXTLOAD:
2059  case TargetOpcode::G_ZEXTLOAD:
2061  widenScalarDst(MI, WideTy);
2063  return Legalized;
2064 
2065  case TargetOpcode::G_STORE: {
2066  if (TypeIdx != 0)
2067  return UnableToLegalize;
2068 
2069  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2070  if (!Ty.isScalar())
2071  return UnableToLegalize;
2072 
2074 
2075  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2076  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2077  widenScalarSrc(MI, WideTy, 0, ExtType);
2078 
2080  return Legalized;
2081  }
2082  case TargetOpcode::G_CONSTANT: {
2083  MachineOperand &SrcMO = MI.getOperand(1);
2085  unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2086  MRI.getType(MI.getOperand(0).getReg()));
2087  assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2088  ExtOpc == TargetOpcode::G_ANYEXT) &&
2089  "Illegal Extend");
2090  const APInt &SrcVal = SrcMO.getCImm()->getValue();
2091  const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2092  ? SrcVal.sext(WideTy.getSizeInBits())
2093  : SrcVal.zext(WideTy.getSizeInBits());
2095  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2096 
2097  widenScalarDst(MI, WideTy);
2099  return Legalized;
2100  }
2101  case TargetOpcode::G_FCONSTANT: {
2102  MachineOperand &SrcMO = MI.getOperand(1);
2104  APFloat Val = SrcMO.getFPImm()->getValueAPF();
2105  bool LosesInfo;
2106  switch (WideTy.getSizeInBits()) {
2107  case 32:
2109  &LosesInfo);
2110  break;
2111  case 64:
2113  &LosesInfo);
2114  break;
2115  default:
2116  return UnableToLegalize;
2117  }
2118 
2119  assert(!LosesInfo && "extend should always be lossless");
2120 
2122  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2123 
2124  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2126  return Legalized;
2127  }
2128  case TargetOpcode::G_IMPLICIT_DEF: {
2130  widenScalarDst(MI, WideTy);
2132  return Legalized;
2133  }
2134  case TargetOpcode::G_BRCOND:
2136  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2138  return Legalized;
2139 
2140  case TargetOpcode::G_FCMP:
2142  if (TypeIdx == 0)
2143  widenScalarDst(MI, WideTy);
2144  else {
2145  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2146  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2147  }
2149  return Legalized;
2150 
2151  case TargetOpcode::G_ICMP:
2153  if (TypeIdx == 0)
2154  widenScalarDst(MI, WideTy);
2155  else {
2156  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2157  MI.getOperand(1).getPredicate()))
2158  ? TargetOpcode::G_SEXT
2159  : TargetOpcode::G_ZEXT;
2160  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2161  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2162  }
2164  return Legalized;
2165 
2166  case TargetOpcode::G_PTR_ADD:
2167  assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2169  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2171  return Legalized;
2172 
2173  case TargetOpcode::G_PHI: {
2174  assert(TypeIdx == 0 && "Expecting only Idx 0");
2175 
2177  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2178  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2179  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2180  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2181  }
2182 
2183  MachineBasicBlock &MBB = *MI.getParent();
2185  widenScalarDst(MI, WideTy);
2187  return Legalized;
2188  }
2189  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2190  if (TypeIdx == 0) {
2191  Register VecReg = MI.getOperand(1).getReg();
2192  LLT VecTy = MRI.getType(VecReg);
2194 
2196  WideTy.getSizeInBits()),
2197  1, TargetOpcode::G_SEXT);
2198 
2199  widenScalarDst(MI, WideTy, 0);
2201  return Legalized;
2202  }
2203 
2204  if (TypeIdx != 2)
2205  return UnableToLegalize;
2207  // TODO: Probably should be zext
2208  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2210  return Legalized;
2211  }
2212  case TargetOpcode::G_INSERT_VECTOR_ELT: {
2213  if (TypeIdx == 1) {
2215 
2216  Register VecReg = MI.getOperand(1).getReg();
2217  LLT VecTy = MRI.getType(VecReg);
2218  LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2219 
2220  widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2221  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2222  widenScalarDst(MI, WideVecTy, 0);
2224  return Legalized;
2225  }
2226 
2227  if (TypeIdx == 2) {
2229  // TODO: Probably should be zext
2230  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2232  return Legalized;
2233  }
2234 
2235  return UnableToLegalize;
2236  }
2237  case TargetOpcode::G_FADD:
2238  case TargetOpcode::G_FMUL:
2239  case TargetOpcode::G_FSUB:
2240  case TargetOpcode::G_FMA:
2241  case TargetOpcode::G_FMAD:
2242  case TargetOpcode::G_FNEG:
2243  case TargetOpcode::G_FABS:
2244  case TargetOpcode::G_FCANONICALIZE:
2245  case TargetOpcode::G_FMINNUM:
2246  case TargetOpcode::G_FMAXNUM:
2247  case TargetOpcode::G_FMINNUM_IEEE:
2248  case TargetOpcode::G_FMAXNUM_IEEE:
2249  case TargetOpcode::G_FMINIMUM:
2250  case TargetOpcode::G_FMAXIMUM:
2251  case TargetOpcode::G_FDIV:
2252  case TargetOpcode::G_FREM:
2253  case TargetOpcode::G_FCEIL:
2254  case TargetOpcode::G_FFLOOR:
2255  case TargetOpcode::G_FCOS:
2256  case TargetOpcode::G_FSIN:
2257  case TargetOpcode::G_FLOG10:
2258  case TargetOpcode::G_FLOG:
2259  case TargetOpcode::G_FLOG2:
2260  case TargetOpcode::G_FRINT:
2261  case TargetOpcode::G_FNEARBYINT:
2262  case TargetOpcode::G_FSQRT:
2263  case TargetOpcode::G_FEXP:
2264  case TargetOpcode::G_FEXP2:
2265  case TargetOpcode::G_FPOW:
2266  case TargetOpcode::G_INTRINSIC_TRUNC:
2267  case TargetOpcode::G_INTRINSIC_ROUND:
2268  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2269  assert(TypeIdx == 0);
2271 
2272  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2273  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2274 
2275  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2277  return Legalized;
2278  case TargetOpcode::G_FPOWI: {
2279  if (TypeIdx != 0)
2280  return UnableToLegalize;
2282  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2283  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2285  return Legalized;
2286  }
2287  case TargetOpcode::G_INTTOPTR:
2288  if (TypeIdx != 1)
2289  return UnableToLegalize;
2290 
2292  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2294  return Legalized;
2295  case TargetOpcode::G_PTRTOINT:
2296  if (TypeIdx != 0)
2297  return UnableToLegalize;
2298 
2300  widenScalarDst(MI, WideTy, 0);
2302  return Legalized;
2303  case TargetOpcode::G_BUILD_VECTOR: {
2305 
2306  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2307  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2308  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2309 
2310  // Avoid changing the result vector type if the source element type was
2311  // requested.
2312  if (TypeIdx == 1) {
2313  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2314  } else {
2315  widenScalarDst(MI, WideTy, 0);
2316  }
2317 
2319  return Legalized;
2320  }
2321  case TargetOpcode::G_SEXT_INREG:
2322  if (TypeIdx != 0)
2323  return UnableToLegalize;
2324 
2326  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2327  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2329  return Legalized;
2330  case TargetOpcode::G_PTRMASK: {
2331  if (TypeIdx != 1)
2332  return UnableToLegalize;
2334  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2336  return Legalized;
2337  }
2338  }
2339 }
2340 
2342  MachineIRBuilder &B, Register Src, LLT Ty) {
2343  auto Unmerge = B.buildUnmerge(Ty, Src);
2344  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2345  Pieces.push_back(Unmerge.getReg(I));
2346 }
2347 
2350  Register Dst = MI.getOperand(0).getReg();
2351  Register Src = MI.getOperand(1).getReg();
2352  LLT DstTy = MRI.getType(Dst);
2353  LLT SrcTy = MRI.getType(Src);
2354 
2355  if (SrcTy.isVector()) {
2356  LLT SrcEltTy = SrcTy.getElementType();
2357  SmallVector<Register, 8> SrcRegs;
2358 
2359  if (DstTy.isVector()) {
2360  int NumDstElt = DstTy.getNumElements();
2361  int NumSrcElt = SrcTy.getNumElements();
2362 
2363  LLT DstEltTy = DstTy.getElementType();
2364  LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2365  LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2366 
2367  // If there's an element size mismatch, insert intermediate casts to match
2368  // the result element type.
2369  if (NumSrcElt < NumDstElt) { // Source element type is larger.
2370  // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2371  //
2372  // =>
2373  //
2374  // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2375  // %3:_(<2 x s8>) = G_BITCAST %2
2376  // %4:_(<2 x s8>) = G_BITCAST %3
2377  // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2378  DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2379  SrcPartTy = SrcEltTy;
2380  } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2381  //
2382  // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2383  //
2384  // =>
2385  //
2386  // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2387  // %3:_(s16) = G_BITCAST %2
2388  // %4:_(s16) = G_BITCAST %3
2389  // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2390  SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2391  DstCastTy = DstEltTy;
2392  }
2393 
2394  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2395  for (Register &SrcReg : SrcRegs)
2396  SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2397  } else
2398  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2399 
2400  MIRBuilder.buildMerge(Dst, SrcRegs);
2401  MI.eraseFromParent();
2402  return Legalized;
2403  }
2404 
2405  if (DstTy.isVector()) {
2406  SmallVector<Register, 8> SrcRegs;
2407  getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2408  MIRBuilder.buildMerge(Dst, SrcRegs);
2409  MI.eraseFromParent();
2410  return Legalized;
2411  }
2412 
2413  return UnableToLegalize;
2414 }
2415 
2416 /// Figure out the bit offset into a register when coercing a vector index for
2417 /// the wide element type. This is only for the case when promoting vector to
2418 /// one with larger elements.
2419 //
2420 ///
2421 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2422 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2424  Register Idx,
2425  unsigned NewEltSize,
2426  unsigned OldEltSize) {
2427  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2428  LLT IdxTy = B.getMRI()->getType(Idx);
2429 
2430  // Now figure out the amount we need to shift to get the target bits.
2431  auto OffsetMask = B.buildConstant(
2432  IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2433  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2434  return B.buildShl(IdxTy, OffsetIdx,
2435  B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2436 }
2437 
2438 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2439 /// is casting to a vector with a smaller element size, perform multiple element
2440 /// extracts and merge the results. If this is coercing to a vector with larger
2441 /// elements, index the bitcasted vector and extract the target element with bit
2442 /// operations. This is intended to force the indexing in the native register
2443 /// size for architectures that can dynamically index the register file.
2446  LLT CastTy) {
2447  if (TypeIdx != 1)
2448  return UnableToLegalize;
2449 
2450  Register Dst = MI.getOperand(0).getReg();
2451  Register SrcVec = MI.getOperand(1).getReg();
2452  Register Idx = MI.getOperand(2).getReg();
2453  LLT SrcVecTy = MRI.getType(SrcVec);
2454  LLT IdxTy = MRI.getType(Idx);
2455 
2456  LLT SrcEltTy = SrcVecTy.getElementType();
2457  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2458  unsigned OldNumElts = SrcVecTy.getNumElements();
2459 
2460  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2461  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2462 
2463  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2464  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2465  if (NewNumElts > OldNumElts) {
2466  // Decreasing the vector element size
2467  //
2468  // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2469  // =>
2470  // v4i32:castx = bitcast x:v2i64
2471  //
2472  // i64 = bitcast
2473  // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2474  // (i32 (extract_vector_elt castx, (2 * y + 1)))
2475  //
2476  if (NewNumElts % OldNumElts != 0)
2477  return UnableToLegalize;
2478 
2479  // Type of the intermediate result vector.
2480  const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2481  LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
2482 
2483  auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2484 
2485  SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2486  auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2487 
2488  for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2489  auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2490  auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2491  auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2492  NewOps[I] = Elt.getReg(0);
2493  }
2494 
2495  auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2496  MIRBuilder.buildBitcast(Dst, NewVec);
2497  MI.eraseFromParent();
2498  return Legalized;
2499  }
2500 
2501  if (NewNumElts < OldNumElts) {
2502  if (NewEltSize % OldEltSize != 0)
2503  return UnableToLegalize;
2504 
2505  // This only depends on powers of 2 because we use bit tricks to figure out
2506  // the bit offset we need to shift to get the target element. A general
2507  // expansion could emit division/multiply.
2508  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2509  return UnableToLegalize;
2510 
2511  // Increasing the vector element size.
2512  // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2513  //
2514  // =>
2515  //
2516  // %cast = G_BITCAST %vec
2517  // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2518  // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2519  // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2520  // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2521  // %elt_bits = G_LSHR %wide_elt, %offset_bits
2522  // %elt = G_TRUNC %elt_bits
2523 
2524  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2525  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2526 
2527  // Divide to get the index in the wider element type.
2528  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2529 
2530  Register WideElt = CastVec;
2531  if (CastTy.isVector()) {
2532  WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2533  ScaledIdx).getReg(0);
2534  }
2535 
2536  // Compute the bit offset into the register of the target element.
2538  MIRBuilder, Idx, NewEltSize, OldEltSize);
2539 
2540  // Shift the wide element to get the target element.
2541  auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2542  MIRBuilder.buildTrunc(Dst, ExtractedBits);
2543  MI.eraseFromParent();
2544  return Legalized;
2545  }
2546 
2547  return UnableToLegalize;
2548 }
2549 
2550 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2551 /// TargetReg, while preserving other bits in \p TargetReg.
2552 ///
2553 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2555  Register TargetReg, Register InsertReg,
2556  Register OffsetBits) {
2557  LLT TargetTy = B.getMRI()->getType(TargetReg);
2558  LLT InsertTy = B.getMRI()->getType(InsertReg);
2559  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2560  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2561 
2562  // Produce a bitmask of the value to insert
2563  auto EltMask = B.buildConstant(
2564  TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2565  InsertTy.getSizeInBits()));
2566  // Shift it into position
2567  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2568  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2569 
2570  // Clear out the bits in the wide element
2571  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2572 
2573  // The value to insert has all zeros already, so stick it into the masked
2574  // wide element.
2575  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2576 }
2577 
2578 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2579 /// is increasing the element size, perform the indexing in the target element
2580 /// type, and use bit operations to insert at the element position. This is
2581 /// intended for architectures that can dynamically index the register file and
2582 /// want to force indexing in the native register size.
2585  LLT CastTy) {
2586  if (TypeIdx != 0)
2587  return UnableToLegalize;
2588 
2589  Register Dst = MI.getOperand(0).getReg();
2590  Register SrcVec = MI.getOperand(1).getReg();
2591  Register Val = MI.getOperand(2).getReg();
2592  Register Idx = MI.getOperand(3).getReg();
2593 
2594  LLT VecTy = MRI.getType(Dst);
2595  LLT IdxTy = MRI.getType(Idx);
2596 
2597  LLT VecEltTy = VecTy.getElementType();
2598  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2599  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2600  const unsigned OldEltSize = VecEltTy.getSizeInBits();
2601 
2602  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2603  unsigned OldNumElts = VecTy.getNumElements();
2604 
2605  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2606  if (NewNumElts < OldNumElts) {
2607  if (NewEltSize % OldEltSize != 0)
2608  return UnableToLegalize;
2609 
2610  // This only depends on powers of 2 because we use bit tricks to figure out
2611  // the bit offset we need to shift to get the target element. A general
2612  // expansion could emit division/multiply.
2613  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2614  return UnableToLegalize;
2615 
2616  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2617  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2618 
2619  // Divide to get the index in the wider element type.
2620  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2621 
2622  Register ExtractedElt = CastVec;
2623  if (CastTy.isVector()) {
2624  ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2625  ScaledIdx).getReg(0);
2626  }
2627 
2628  // Compute the bit offset into the register of the target element.
2630  MIRBuilder, Idx, NewEltSize, OldEltSize);
2631 
2632  Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2633  Val, OffsetBits);
2634  if (CastTy.isVector()) {
2635  InsertedElt = MIRBuilder.buildInsertVectorElement(
2636  CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2637  }
2638 
2639  MIRBuilder.buildBitcast(Dst, InsertedElt);
2640  MI.eraseFromParent();
2641  return Legalized;
2642  }
2643 
2644  return UnableToLegalize;
2645 }
2646 
2649  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2650  Register DstReg = MI.getOperand(0).getReg();
2651  Register PtrReg = MI.getOperand(1).getReg();
2652  LLT DstTy = MRI.getType(DstReg);
2653  auto &MMO = **MI.memoperands_begin();
2654 
2655  if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2656  if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2657  // This load needs splitting into power of 2 sized loads.
2658  if (DstTy.isVector())
2659  return UnableToLegalize;
2660  if (isPowerOf2_32(DstTy.getSizeInBits()))
2661  return UnableToLegalize; // Don't know what we're being asked to do.
2662 
2663  // Our strategy here is to generate anyextending loads for the smaller
2664  // types up to next power-2 result type, and then combine the two larger
2665  // result values together, before truncating back down to the non-pow-2
2666  // type.
2667  // E.g. v1 = i24 load =>
2668  // v2 = i32 zextload (2 byte)
2669  // v3 = i32 load (1 byte)
2670  // v4 = i32 shl v3, 16
2671  // v5 = i32 or v4, v2
2672  // v1 = i24 trunc v5
2673  // By doing this we generate the correct truncate which should get
2674  // combined away as an artifact with a matching extend.
2675  uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2676  uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2677 
2679  MachineMemOperand *LargeMMO =
2680  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2681  MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2682  &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2683 
2684  LLT PtrTy = MRI.getType(PtrReg);
2685  unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2686  LLT AnyExtTy = LLT::scalar(AnyExtSize);
2687  Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2688  Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2689  auto LargeLoad = MIRBuilder.buildLoadInstr(
2690  TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2691 
2692  auto OffsetCst = MIRBuilder.buildConstant(
2693  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2694  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2695  auto SmallPtr =
2696  MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2697  auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2698  *SmallMMO);
2699 
2700  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2701  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2702  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2703  MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2704  MI.eraseFromParent();
2705  return Legalized;
2706  }
2707 
2708  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2709  MI.eraseFromParent();
2710  return Legalized;
2711  }
2712 
2713  if (DstTy.isScalar()) {
2714  Register TmpReg =
2715  MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2716  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2717  switch (MI.getOpcode()) {
2718  default:
2719  llvm_unreachable("Unexpected opcode");
2720  case TargetOpcode::G_LOAD:
2721  MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
2722  break;
2723  case TargetOpcode::G_SEXTLOAD:
2724  MIRBuilder.buildSExt(DstReg, TmpReg);
2725  break;
2726  case TargetOpcode::G_ZEXTLOAD:
2727  MIRBuilder.buildZExt(DstReg, TmpReg);
2728  break;
2729  }
2730 
2731  MI.eraseFromParent();
2732  return Legalized;
2733  }
2734 
2735  return UnableToLegalize;
2736 }
2737 
2740  // Lower a non-power of 2 store into multiple pow-2 stores.
2741  // E.g. split an i24 store into an i16 store + i8 store.
2742  // We do this by first extending the stored value to the next largest power
2743  // of 2 type, and then using truncating stores to store the components.
2744  // By doing this, likewise with G_LOAD, generate an extend that can be
2745  // artifact-combined away instead of leaving behind extracts.
2746  Register SrcReg = MI.getOperand(0).getReg();
2747  Register PtrReg = MI.getOperand(1).getReg();
2748  LLT SrcTy = MRI.getType(SrcReg);
2749  MachineMemOperand &MMO = **MI.memoperands_begin();
2750  if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2751  return UnableToLegalize;
2752  if (SrcTy.isVector())
2753  return UnableToLegalize;
2754  if (isPowerOf2_32(SrcTy.getSizeInBits()))
2755  return UnableToLegalize; // Don't know what we're being asked to do.
2756 
2757  // Extend to the next pow-2.
2758  const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2759  auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2760 
2761  // Obtain the smaller value by shifting away the larger value.
2762  uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2763  uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2764  auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2765  auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2766 
2767  // Generate the PtrAdd and truncating stores.
2768  LLT PtrTy = MRI.getType(PtrReg);
2769  auto OffsetCst = MIRBuilder.buildConstant(
2770  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2771  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2772  auto SmallPtr =
2773  MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2774 
2776  MachineMemOperand *LargeMMO =
2777  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2778  MachineMemOperand *SmallMMO =
2779  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2780  MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2781  MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2782  MI.eraseFromParent();
2783  return Legalized;
2784 }
2785 
2787 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2788  switch (MI.getOpcode()) {
2789  case TargetOpcode::G_LOAD: {
2790  if (TypeIdx != 0)
2791  return UnableToLegalize;
2792 
2794  bitcastDst(MI, CastTy, 0);
2796  return Legalized;
2797  }
2798  case TargetOpcode::G_STORE: {
2799  if (TypeIdx != 0)
2800  return UnableToLegalize;
2801 
2803  bitcastSrc(MI, CastTy, 0);
2805  return Legalized;
2806  }
2807  case TargetOpcode::G_SELECT: {
2808  if (TypeIdx != 0)
2809  return UnableToLegalize;
2810 
2811  if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2812  LLVM_DEBUG(
2813  dbgs() << "bitcast action not implemented for vector select\n");
2814  return UnableToLegalize;
2815  }
2816 
2818  bitcastSrc(MI, CastTy, 2);
2819  bitcastSrc(MI, CastTy, 3);
2820  bitcastDst(MI, CastTy, 0);
2822  return Legalized;
2823  }
2824  case TargetOpcode::G_AND:
2825  case TargetOpcode::G_OR:
2826  case TargetOpcode::G_XOR: {
2828  bitcastSrc(MI, CastTy, 1);
2829  bitcastSrc(MI, CastTy, 2);
2830  bitcastDst(MI, CastTy, 0);
2832  return Legalized;
2833  }
2834  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2835  return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
2836  case TargetOpcode::G_INSERT_VECTOR_ELT:
2837  return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
2838  default:
2839  return UnableToLegalize;
2840  }
2841 }
2842 
2843 // Legalize an instruction by changing the opcode in place.
2844 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
2846  MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
2848 }
2849 
2851 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
2852  using namespace TargetOpcode;
2853 
2854  switch(MI.getOpcode()) {
2855  default:
2856  return UnableToLegalize;
2857  case TargetOpcode::G_BITCAST:
2858  return lowerBitcast(MI);
2859  case TargetOpcode::G_SREM:
2860  case TargetOpcode::G_UREM: {
2861  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2862  auto Quot =
2863  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2864  {MI.getOperand(1), MI.getOperand(2)});
2865 
2866  auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2867  MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2868  MI.eraseFromParent();
2869  return Legalized;
2870  }
2871  case TargetOpcode::G_SADDO:
2872  case TargetOpcode::G_SSUBO:
2873  return lowerSADDO_SSUBO(MI);
2874  case TargetOpcode::G_UMULH:
2875  case TargetOpcode::G_SMULH:
2876  return lowerSMULH_UMULH(MI);
2877  case TargetOpcode::G_SMULO:
2878  case TargetOpcode::G_UMULO: {
2879  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2880  // result.
2881  Register Res = MI.getOperand(0).getReg();
2882  Register Overflow = MI.getOperand(1).getReg();
2883  Register LHS = MI.getOperand(2).getReg();
2884  Register RHS = MI.getOperand(3).getReg();
2885  LLT Ty = MRI.getType(Res);
2886 
2887  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2888  ? TargetOpcode::G_SMULH
2889  : TargetOpcode::G_UMULH;
2890 
2892  const auto &TII = MIRBuilder.getTII();
2893  MI.setDesc(TII.get(TargetOpcode::G_MUL));
2894  MI.RemoveOperand(1);
2896 
2897  auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2898  auto Zero = MIRBuilder.buildConstant(Ty, 0);
2899 
2900  // Move insert point forward so we can use the Res register if needed.
2902 
2903  // For *signed* multiply, overflow is detected by checking:
2904  // (hi != (lo >> bitwidth-1))
2905  if (Opcode == TargetOpcode::G_SMULH) {
2906  auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2907  auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2908  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2909  } else {
2910  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2911  }
2912  return Legalized;
2913  }
2914  case TargetOpcode::G_FNEG: {
2915  Register Res = MI.getOperand(0).getReg();
2916  LLT Ty = MRI.getType(Res);
2917 
2918  // TODO: Handle vector types once we are able to
2919  // represent them.
2920  if (Ty.isVector())
2921  return UnableToLegalize;
2922  auto SignMask =
2924  Register SubByReg = MI.getOperand(1).getReg();
2925  MIRBuilder.buildXor(Res, SubByReg, SignMask);
2926  MI.eraseFromParent();
2927  return Legalized;
2928  }
2929  case TargetOpcode::G_FSUB: {
2930  Register Res = MI.getOperand(0).getReg();
2931  LLT Ty = MRI.getType(Res);
2932 
2933  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2934  // First, check if G_FNEG is marked as Lower. If so, we may
2935  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2936  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2937  return UnableToLegalize;
2938  Register LHS = MI.getOperand(1).getReg();
2939  Register RHS = MI.getOperand(2).getReg();
2940  Register Neg = MRI.createGenericVirtualRegister(Ty);
2941  MIRBuilder.buildFNeg(Neg, RHS);
2942  MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
2943  MI.eraseFromParent();
2944  return Legalized;
2945  }
2946  case TargetOpcode::G_FMAD:
2947  return lowerFMad(MI);
2948  case TargetOpcode::G_FFLOOR:
2949  return lowerFFloor(MI);
2950  case TargetOpcode::G_INTRINSIC_ROUND:
2951  return lowerIntrinsicRound(MI);
2952  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
2953  // Since round even is the assumed rounding mode for unconstrained FP
2954  // operations, rint and roundeven are the same operation.
2955  changeOpcode(MI, TargetOpcode::G_FRINT);
2956  return Legalized;
2957  }
2958  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2959  Register OldValRes = MI.getOperand(0).getReg();
2960  Register SuccessRes = MI.getOperand(1).getReg();
2961  Register Addr = MI.getOperand(2).getReg();
2962  Register CmpVal = MI.getOperand(3).getReg();
2963  Register NewVal = MI.getOperand(4).getReg();
2964  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2965  **MI.memoperands_begin());
2966  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2967  MI.eraseFromParent();
2968  return Legalized;
2969  }
2970  case TargetOpcode::G_LOAD:
2971  case TargetOpcode::G_SEXTLOAD:
2972  case TargetOpcode::G_ZEXTLOAD:
2973  return lowerLoad(MI);
2974  case TargetOpcode::G_STORE:
2975  return lowerStore(MI);
2976  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2977  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2978  case TargetOpcode::G_CTLZ:
2979  case TargetOpcode::G_CTTZ:
2980  case TargetOpcode::G_CTPOP:
2981  return lowerBitCount(MI);
2982  case G_UADDO: {
2983  Register Res = MI.getOperand(0).getReg();
2984  Register CarryOut = MI.getOperand(1).getReg();
2985  Register LHS = MI.getOperand(2).getReg();
2986  Register RHS = MI.getOperand(3).getReg();
2987 
2988  MIRBuilder.buildAdd(Res, LHS, RHS);
2989  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2990 
2991  MI.eraseFromParent();
2992  return Legalized;
2993  }
2994  case G_UADDE: {
2995  Register Res = MI.getOperand(0).getReg();
2996  Register CarryOut = MI.getOperand(1).getReg();
2997  Register LHS = MI.getOperand(2).getReg();
2998  Register RHS = MI.getOperand(3).getReg();
2999  Register CarryIn = MI.getOperand(4).getReg();
3000  LLT Ty = MRI.getType(Res);
3001 
3002  auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3003  auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3004  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3005  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3006 
3007  MI.eraseFromParent();
3008  return Legalized;
3009  }
3010  case G_USUBO: {
3011  Register Res = MI.getOperand(0).getReg();
3012  Register BorrowOut = MI.getOperand(1).getReg();
3013  Register LHS = MI.getOperand(2).getReg();
3014  Register RHS = MI.getOperand(3).getReg();
3015 
3016  MIRBuilder.buildSub(Res, LHS, RHS);
3017  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3018 
3019  MI.eraseFromParent();
3020  return Legalized;
3021  }
3022  case G_USUBE: {
3023  Register Res = MI.getOperand(0).getReg();
3024  Register BorrowOut = MI.getOperand(1).getReg();
3025  Register LHS = MI.getOperand(2).getReg();
3026  Register RHS = MI.getOperand(3).getReg();
3027  Register BorrowIn = MI.getOperand(4).getReg();
3028  const LLT CondTy = MRI.getType(BorrowOut);
3029  const LLT Ty = MRI.getType(Res);
3030 
3031  auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3032  auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3033  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3034 
3035  auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3036  auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3037  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3038 
3039  MI.eraseFromParent();
3040  return Legalized;
3041  }
3042  case G_UITOFP:
3043  return lowerUITOFP(MI);
3044  case G_SITOFP:
3045  return lowerSITOFP(MI);
3046  case G_FPTOUI:
3047  return lowerFPTOUI(MI);
3048  case G_FPTOSI:
3049  return lowerFPTOSI(MI);
3050  case G_FPTRUNC:
3051  return lowerFPTRUNC(MI);
3052  case G_FPOWI:
3053  return lowerFPOWI(MI);
3054  case G_SMIN:
3055  case G_SMAX:
3056  case G_UMIN:
3057  case G_UMAX:
3058  return lowerMinMax(MI);
3059  case G_FCOPYSIGN:
3060  return lowerFCopySign(MI);
3061  case G_FMINNUM:
3062  case G_FMAXNUM:
3063  return lowerFMinNumMaxNum(MI);
3064  case G_MERGE_VALUES:
3065  return lowerMergeValues(MI);
3066  case G_UNMERGE_VALUES:
3067  return lowerUnmergeValues(MI);
3068  case TargetOpcode::G_SEXT_INREG: {
3069  assert(MI.getOperand(2).isImm() && "Expected immediate");
3070  int64_t SizeInBits = MI.getOperand(2).getImm();
3071 
3072  Register DstReg = MI.getOperand(0).getReg();
3073  Register SrcReg = MI.getOperand(1).getReg();
3074  LLT DstTy = MRI.getType(DstReg);
3075  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3076 
3077  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3078  MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3079  MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3080  MI.eraseFromParent();
3081  return Legalized;
3082  }
3083  case G_EXTRACT_VECTOR_ELT:
3084  case G_INSERT_VECTOR_ELT:
3086  case G_SHUFFLE_VECTOR:
3087  return lowerShuffleVector(MI);
3088  case G_DYN_STACKALLOC:
3089  return lowerDynStackAlloc(MI);
3090  case G_EXTRACT:
3091  return lowerExtract(MI);
3092  case G_INSERT:
3093  return lowerInsert(MI);
3094  case G_BSWAP:
3095  return lowerBswap(MI);
3096  case G_BITREVERSE:
3097  return lowerBitreverse(MI);
3098  case G_READ_REGISTER:
3099  case G_WRITE_REGISTER:
3100  return lowerReadWriteRegister(MI);
3101  case G_UADDSAT:
3102  case G_USUBSAT: {
3103  // Try to make a reasonable guess about which lowering strategy to use. The
3104  // target can override this with custom lowering and calling the
3105  // implementation functions.
3106  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3107  if (LI.isLegalOrCustom({G_UMIN, Ty}))
3108  return lowerAddSubSatToMinMax(MI);
3109  return lowerAddSubSatToAddoSubo(MI);
3110  }
3111  case G_SADDSAT:
3112  case G_SSUBSAT: {
3113  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3114 
3115  // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3116  // since it's a shorter expansion. However, we would need to figure out the
3117  // preferred boolean type for the carry out for the query.
3118  if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3119  return lowerAddSubSatToMinMax(MI);
3120  return lowerAddSubSatToAddoSubo(MI);
3121  }
3122  case G_SSHLSAT:
3123  case G_USHLSAT:
3124  return lowerShlSat(MI);
3125  case G_ABS: {
3126  // Expand %res = G_ABS %a into:
3127  // %v1 = G_ASHR %a, scalar_size-1
3128  // %v2 = G_ADD %a, %v1
3129  // %res = G_XOR %v2, %v1
3130  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3131  Register OpReg = MI.getOperand(1).getReg();
3132  auto ShiftAmt =
3133  MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
3134  auto Shift =
3135  MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
3136  auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
3137  MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
3138  MI.eraseFromParent();
3139  return Legalized;
3140  }
3141  case G_SELECT:
3142  return lowerSelect(MI);
3143  }
3144 }
3145 
3147  Align MinAlign) const {
3148  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3149  // datalayout for the preferred alignment. Also there should be a target hook
3150  // for this to allow targets to reduce the alignment and ignore the
3151  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3152  // the type.
3154 }
3155 
3158  MachinePointerInfo &PtrInfo) {
3161  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3162 
3163  unsigned AddrSpace = DL.getAllocaAddrSpace();
3164  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3165 
3166  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3167  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3168 }
3169 
3171  LLT VecTy) {
3172  int64_t IdxVal;
3173  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3174  return IdxReg;
3175 
3176  LLT IdxTy = B.getMRI()->getType(IdxReg);
3177  unsigned NElts = VecTy.getNumElements();
3178  if (isPowerOf2_32(NElts)) {
3179  APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3180  return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3181  }
3182 
3183  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3184  .getReg(0);
3185 }
3186 
3188  Register Index) {
3189  LLT EltTy = VecTy.getElementType();
3190 
3191  // Calculate the element offset and add it to the pointer.
3192  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3193  assert(EltSize * 8 == EltTy.getSizeInBits() &&
3194  "Converting bits to bytes lost precision");
3195 
3197 
3198  LLT IdxTy = MRI.getType(Index);
3199  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3200  MIRBuilder.buildConstant(IdxTy, EltSize));
3201 
3202  LLT PtrTy = MRI.getType(VecPtr);
3203  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3204 }
3205 
3207  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3208  Register DstReg = MI.getOperand(0).getReg();
3209  LLT DstTy = MRI.getType(DstReg);
3210  LLT LCMTy = getLCMType(DstTy, NarrowTy);
3211 
3212  unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3213 
3214  auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3215  SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3216 
3217  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3218  MI.eraseFromParent();
3219  return Legalized;
3220 }
3221 
3222 // Handle splitting vector operations which need to have the same number of
3223 // elements in each type index, but each type index may have a different element
3224 // type.
3225 //
3226 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3227 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3228 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3229 //
3230 // Also handles some irregular breakdown cases, e.g.
3231 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3232 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3233 // s64 = G_SHL s64, s32
3236  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3237  if (TypeIdx != 0)
3238  return UnableToLegalize;
3239 
3240  const LLT NarrowTy0 = NarrowTyArg;
3241  const unsigned NewNumElts =
3242  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
3243 
3244  const Register DstReg = MI.getOperand(0).getReg();
3245  LLT DstTy = MRI.getType(DstReg);
3246  LLT LeftoverTy0;
3247 
3248  // All of the operands need to have the same number of elements, so if we can
3249  // determine a type breakdown for the result type, we can for all of the
3250  // source types.
3251  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3252  if (NumParts < 0)
3253  return UnableToLegalize;
3254 
3256 
3257  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3258  SmallVector<Register, 4> PartRegs, LeftoverRegs;
3259 
3260  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3261  Register SrcReg = MI.getOperand(I).getReg();
3262  LLT SrcTyI = MRI.getType(SrcReg);
3263  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
3264  LLT LeftoverTyI;
3265 
3266  // Split this operand into the requested typed registers, and any leftover
3267  // required to reproduce the original type.
3268  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3269  LeftoverRegs))
3270  return UnableToLegalize;
3271 
3272  if (I == 1) {
3273  // For the first operand, create an instruction for each part and setup
3274  // the result.
3275  for (Register PartReg : PartRegs) {
3276  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3277  NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3278  .addDef(PartDstReg)
3279  .addUse(PartReg));
3280  DstRegs.push_back(PartDstReg);
3281  }
3282 
3283  for (Register LeftoverReg : LeftoverRegs) {
3284  Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3285  NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3286  .addDef(PartDstReg)
3287  .addUse(LeftoverReg));
3288  LeftoverDstRegs.push_back(PartDstReg);
3289  }
3290  } else {
3291  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3292 
3293  // Add the newly created operand splits to the existing instructions. The
3294  // odd-sized pieces are ordered after the requested NarrowTyArg sized
3295  // pieces.
3296  unsigned InstCount = 0;
3297  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3298  NewInsts[InstCount++].addUse(PartRegs[J]);
3299  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3300  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3301  }
3302 
3303  PartRegs.clear();
3304  LeftoverRegs.clear();
3305  }
3306 
3307  // Insert the newly built operations and rebuild the result register.
3308  for (auto &MIB : NewInsts)
3309  MIRBuilder.insertInstr(MIB);
3310 
3311  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3312 
3313  MI.eraseFromParent();
3314  return Legalized;
3315 }
3316 
3319  LLT NarrowTy) {
3320  if (TypeIdx != 0)
3321  return UnableToLegalize;
3322 
3323  Register DstReg = MI.getOperand(0).getReg();
3324  Register SrcReg = MI.getOperand(1).getReg();
3325  LLT DstTy = MRI.getType(DstReg);
3326  LLT SrcTy = MRI.getType(SrcReg);
3327 
3328  LLT NarrowTy0 = NarrowTy;
3329  LLT NarrowTy1;
3330  unsigned NumParts;
3331 
3332  if (NarrowTy.isVector()) {
3333  // Uneven breakdown not handled.
3334  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3335  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3336  return UnableToLegalize;
3337 
3338  NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
3339  } else {
3340  NumParts = DstTy.getNumElements();
3341  NarrowTy1 = SrcTy.getElementType();
3342  }
3343 
3344  SmallVector<Register, 4> SrcRegs, DstRegs;
3345  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3346 
3347  for (unsigned I = 0; I < NumParts; ++I) {
3348  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3349  MachineInstr *NewInst =
3350  MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3351 
3352  NewInst->setFlags(MI.getFlags());
3353  DstRegs.push_back(DstReg);
3354  }
3355 
3356  if (NarrowTy.isVector())
3357  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3358  else
3359  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3360 
3361  MI.eraseFromParent();
3362  return Legalized;
3363 }
3364 
3367  LLT NarrowTy) {
3368  Register DstReg = MI.getOperand(0).getReg();
3369  Register Src0Reg = MI.getOperand(2).getReg();
3370  LLT DstTy = MRI.getType(DstReg);
3371  LLT SrcTy = MRI.getType(Src0Reg);
3372 
3373  unsigned NumParts;
3374  LLT NarrowTy0, NarrowTy1;
3375 
3376  if (TypeIdx == 0) {
3377  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3378  unsigned OldElts = DstTy.getNumElements();
3379 
3380  NarrowTy0 = NarrowTy;
3381  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3382  NarrowTy1 = NarrowTy.isVector() ?
3383  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
3384  SrcTy.getElementType();
3385 
3386  } else {
3387  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3388  unsigned OldElts = SrcTy.getNumElements();
3389 
3390  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3391  NarrowTy.getNumElements();
3392  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
3393  DstTy.getScalarSizeInBits());
3394  NarrowTy1 = NarrowTy;
3395  }
3396 
3397  // FIXME: Don't know how to handle the situation where the small vectors
3398  // aren't all the same size yet.
3399  if (NarrowTy1.isVector() &&
3400  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3401  return UnableToLegalize;
3402 
3403  CmpInst::Predicate Pred
3404  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3405 
3406  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3407  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3408  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3409 
3410  for (unsigned I = 0; I < NumParts; ++I) {
3411  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3412  DstRegs.push_back(DstReg);
3413 
3414  if (MI.getOpcode() == TargetOpcode::G_ICMP)
3415  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3416  else {
3417  MachineInstr *NewCmp
3418  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3419  NewCmp->setFlags(MI.getFlags());
3420  }
3421  }
3422 
3423  if (NarrowTy1.isVector())
3424  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3425  else
3426  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3427 
3428  MI.eraseFromParent();
3429  return Legalized;
3430 }
3431 
3434  LLT NarrowTy) {
3435  Register DstReg = MI.getOperand(0).getReg();
3436  Register CondReg = MI.getOperand(1).getReg();
3437 
3438  unsigned NumParts = 0;
3439  LLT NarrowTy0, NarrowTy1;
3440 
3441  LLT DstTy = MRI.getType(DstReg);
3442  LLT CondTy = MRI.getType(CondReg);
3443  unsigned Size = DstTy.getSizeInBits();
3444 
3445  assert(TypeIdx == 0 || CondTy.isVector());
3446 
3447  if (TypeIdx == 0) {
3448  NarrowTy0 = NarrowTy;
3449  NarrowTy1 = CondTy;
3450 
3451  unsigned NarrowSize = NarrowTy0.getSizeInBits();
3452  // FIXME: Don't know how to handle the situation where the small vectors
3453  // aren't all the same size yet.
3454  if (Size % NarrowSize != 0)
3455  return UnableToLegalize;
3456 
3457  NumParts = Size / NarrowSize;
3458 
3459  // Need to break down the condition type
3460  if (CondTy.isVector()) {
3461  if (CondTy.getNumElements() == NumParts)
3462  NarrowTy1 = CondTy.getElementType();
3463  else
3464  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3465  CondTy.getScalarSizeInBits());
3466  }
3467  } else {
3468  NumParts = CondTy.getNumElements();
3469  if (NarrowTy.isVector()) {
3470  // TODO: Handle uneven breakdown.
3471  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3472  return UnableToLegalize;
3473 
3474  return UnableToLegalize;
3475  } else {
3476  NarrowTy0 = DstTy.getElementType();
3477  NarrowTy1 = NarrowTy;
3478  }
3479  }
3480 
3481  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3482  if (CondTy.isVector())
3483  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3484 
3485  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3486  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3487 
3488  for (unsigned i = 0; i < NumParts; ++i) {
3489  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3490  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3491  Src1Regs[i], Src2Regs[i]);
3492  DstRegs.push_back(DstReg);
3493  }
3494 
3495  if (NarrowTy0.isVector())
3496  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3497  else
3498  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3499 
3500  MI.eraseFromParent();
3501  return Legalized;
3502 }
3503 
3506  LLT NarrowTy) {
3507  const Register DstReg = MI.getOperand(0).getReg();
3508  LLT PhiTy = MRI.getType(DstReg);
3509  LLT LeftoverTy;
3510 
3511  // All of the operands need to have the same number of elements, so if we can
3512  // determine a type breakdown for the result type, we can for all of the
3513  // source types.
3514  int NumParts, NumLeftover;
3515  std::tie(NumParts, NumLeftover)
3516  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3517  if (NumParts < 0)
3518  return UnableToLegalize;
3519 
3520  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3522 
3523  const int TotalNumParts = NumParts + NumLeftover;
3524 
3525  // Insert the new phis in the result block first.
3526  for (int I = 0; I != TotalNumParts; ++I) {
3527  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3528  Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3529  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3530  .addDef(PartDstReg));
3531  if (I < NumParts)
3532  DstRegs.push_back(PartDstReg);
3533  else
3534  LeftoverDstRegs.push_back(PartDstReg);
3535  }
3536 
3537  MachineBasicBlock *MBB = MI.getParent();
3539  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3540 
3541  SmallVector<Register, 4> PartRegs, LeftoverRegs;
3542 
3543  // Insert code to extract the incoming values in each predecessor block.
3544  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3545  PartRegs.clear();
3546  LeftoverRegs.clear();
3547 
3548  Register SrcReg = MI.getOperand(I).getReg();
3549  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3550  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3551 
3552  LLT Unused;
3553  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3554  LeftoverRegs))
3555  return UnableToLegalize;
3556 
3557  // Add the newly created operand splits to the existing instructions. The
3558  // odd-sized pieces are ordered after the requested NarrowTyArg sized
3559  // pieces.
3560  for (int J = 0; J != TotalNumParts; ++J) {
3561  MachineInstrBuilder MIB = NewInsts[J];
3562  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3563  MIB.addMBB(&OpMBB);
3564  }
3565  }
3566 
3567  MI.eraseFromParent();
3568  return Legalized;
3569 }
3570 
3573  unsigned TypeIdx,
3574  LLT NarrowTy) {
3575  if (TypeIdx != 1)
3576  return UnableToLegalize;
3577 
3578  const int NumDst = MI.getNumOperands() - 1;
3579  const Register SrcReg = MI.getOperand(NumDst).getReg();
3580  LLT SrcTy = MRI.getType(SrcReg);
3581 
3582  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3583 
3584  // TODO: Create sequence of extracts.
3585  if (DstTy == NarrowTy)
3586  return UnableToLegalize;
3587 
3588  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3589  if (DstTy == GCDTy) {
3590  // This would just be a copy of the same unmerge.
3591  // TODO: Create extracts, pad with undef and create intermediate merges.
3592  return UnableToLegalize;
3593  }
3594 
3595  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3596  const int NumUnmerge = Unmerge->getNumOperands() - 1;
3597  const int PartsPerUnmerge = NumDst / NumUnmerge;
3598 
3599  for (int I = 0; I != NumUnmerge; ++I) {
3600  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3601 
3602  for (int J = 0; J != PartsPerUnmerge; ++J)
3603  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3604  MIB.addUse(Unmerge.getReg(I));
3605  }
3606 
3607  MI.eraseFromParent();
3608  return Legalized;
3609 }
3610 
3611 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3612 // a vector
3613 //
3614 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3615 // undef as necessary.
3616 //
3617 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3618 // -> <2 x s16>
3619 //
3620 // %4:_(s16) = G_IMPLICIT_DEF
3621 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3622 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3623 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
3624 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3625 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3628  LLT NarrowTy) {
3629  Register DstReg = MI.getOperand(0).getReg();
3630  LLT DstTy = MRI.getType(DstReg);
3631  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3632  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3633 
3634  // Break into a common type
3636  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3637  extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3638 
3639  // Build the requested new merge, padding with undef.
3640  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3641  TargetOpcode::G_ANYEXT);
3642 
3643  // Pack into the original result register.
3644  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3645 
3646  MI.eraseFromParent();
3647  return Legalized;
3648 }
3649 
3652  unsigned TypeIdx,
3653  LLT NarrowVecTy) {
3654  Register DstReg = MI.getOperand(0).getReg();
3655  Register SrcVec = MI.getOperand(1).getReg();
3656  Register InsertVal;
3657  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3658 
3659  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3660  if (IsInsert)
3661  InsertVal = MI.getOperand(2).getReg();
3662 
3663  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3664 
3665  // TODO: Handle total scalarization case.
3666  if (!NarrowVecTy.isVector())
3667  return UnableToLegalize;
3668 
3669  LLT VecTy = MRI.getType(SrcVec);
3670 
3671  // If the index is a constant, we can really break this down as you would
3672  // expect, and index into the target size pieces.
3673  int64_t IdxVal;
3674  if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
3675  // Avoid out of bounds indexing the pieces.
3676  if (IdxVal >= VecTy.getNumElements()) {
3677  MIRBuilder.buildUndef(DstReg);
3678  MI.eraseFromParent();
3679  return Legalized;
3680  }
3681 
3682  SmallVector<Register, 8> VecParts;
3683  LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3684 
3685  // Build a sequence of NarrowTy pieces in VecParts for this operand.
3686  LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3687  TargetOpcode::G_ANYEXT);
3688 
3689  unsigned NewNumElts = NarrowVecTy.getNumElements();
3690 
3691  LLT IdxTy = MRI.getType(Idx);
3692  int64_t PartIdx = IdxVal / NewNumElts;
3693  auto NewIdx =
3694  MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3695 
3696  if (IsInsert) {
3697  LLT PartTy = MRI.getType(VecParts[PartIdx]);
3698 
3699  // Use the adjusted index to insert into one of the subvectors.
3700  auto InsertPart = MIRBuilder.buildInsertVectorElement(
3701  PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3702  VecParts[PartIdx] = InsertPart.getReg(0);
3703 
3704  // Recombine the inserted subvector with the others to reform the result
3705  // vector.
3706  buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3707  } else {
3708  MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3709  }
3710 
3711  MI.eraseFromParent();
3712  return Legalized;
3713  }
3714 
3715  // With a variable index, we can't perform the operation in a smaller type, so
3716  // we're forced to expand this.
3717  //
3718  // TODO: We could emit a chain of compare/select to figure out which piece to
3719  // index.
3721 }
3722 
3725  LLT NarrowTy) {
3726  // FIXME: Don't know how to handle secondary types yet.
3727  if (TypeIdx != 0)
3728  return UnableToLegalize;
3729 
3730  MachineMemOperand *MMO = *MI.memoperands_begin();
3731 
3732  // This implementation doesn't work for atomics. Give up instead of doing
3733  // something invalid.
3734  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3736  return UnableToLegalize;
3737 
3738  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3739  Register ValReg = MI.getOperand(0).getReg();
3740  Register AddrReg = MI.getOperand(1).getReg();
3741  LLT ValTy = MRI.getType(ValReg);
3742 
3743  // FIXME: Do we need a distinct NarrowMemory legalize action?
3744  if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3745  LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3746  return UnableToLegalize;
3747  }
3748 
3749  int NumParts = -1;
3750  int NumLeftover = -1;
3751  LLT LeftoverTy;
3752  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3753  if (IsLoad) {
3754  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3755  } else {
3756  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3757  NarrowLeftoverRegs)) {
3758  NumParts = NarrowRegs.size();
3759  NumLeftover = NarrowLeftoverRegs.size();
3760  }
3761  }
3762 
3763  if (NumParts == -1)
3764  return UnableToLegalize;
3765 
3766  LLT PtrTy = MRI.getType(AddrReg);
3767  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
3768 
3769  unsigned TotalSize = ValTy.getSizeInBits();
3770 
3771  // Split the load/store into PartTy sized pieces starting at Offset. If this
3772  // is a load, return the new registers in ValRegs. For a store, each elements
3773  // of ValRegs should be PartTy. Returns the next offset that needs to be
3774  // handled.
3775  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3776  unsigned Offset) -> unsigned {
3778  unsigned PartSize = PartTy.getSizeInBits();
3779  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3780  Offset += PartSize, ++Idx) {
3781  unsigned ByteSize = PartSize / 8;
3782  unsigned ByteOffset = Offset / 8;
3783  Register NewAddrReg;
3784 
3785  MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3786 
3787  MachineMemOperand *NewMMO =
3788  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3789 
3790  if (IsLoad) {
3791  Register Dst = MRI.createGenericVirtualRegister(PartTy);
3792  ValRegs.push_back(Dst);
3793  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3794  } else {
3795  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3796  }
3797  }
3798 
3799  return Offset;
3800  };
3801 
3802  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3803 
3804  // Handle the rest of the register if this isn't an even type breakdown.
3805  if (LeftoverTy.isValid())
3806  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3807 
3808  if (IsLoad) {
3809  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3810  LeftoverTy, NarrowLeftoverRegs);
3811  }
3812 
3813  MI.eraseFromParent();
3814  return Legalized;
3815 }
3816 
3819  LLT NarrowTy) {
3820  assert(TypeIdx == 0 && "only one type index expected");
3821 
3822  const unsigned Opc = MI.getOpcode();
3823  const int NumOps = MI.getNumOperands() - 1;
3824  const Register DstReg = MI.getOperand(0).getReg();
3825  const unsigned Flags = MI.getFlags();
3826  const unsigned NarrowSize = NarrowTy.getSizeInBits();
3827  const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3828 
3829  assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
3830 
3831  // First of all check whether we are narrowing (changing the element type)
3832  // or reducing the vector elements
3833  const LLT DstTy = MRI.getType(DstReg);
3834  const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3835 
3836  SmallVector<Register, 8> ExtractedRegs[3];
3838 
3839  unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3840 
3841  // Break down all the sources into NarrowTy pieces we can operate on. This may
3842  // involve creating merges to a wider type, padded with undef.
3843  for (int I = 0; I != NumOps; ++I) {
3844  Register SrcReg = MI.getOperand(I + 1).getReg();
3845  LLT SrcTy = MRI.getType(SrcReg);
3846 
3847  // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3848  // For fewerElements, this is a smaller vector with the same element type.
3849  LLT OpNarrowTy;
3850  if (IsNarrow) {
3851  OpNarrowTy = NarrowScalarTy;
3852 
3853  // In case of narrowing, we need to cast vectors to scalars for this to
3854  // work properly
3855  // FIXME: Can we do without the bitcast here if we're narrowing?
3856  if (SrcTy.isVector()) {
3857  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3858  SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
3859  }
3860  } else {
3861  OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
3862  }
3863 
3864  LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
3865 
3866  // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
3867  buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
3868  TargetOpcode::G_ANYEXT);
3869  }
3870 
3871  SmallVector<Register, 8> ResultRegs;
3872 
3873  // Input operands for each sub-instruction.
3874  SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
3875 
3876  int NumParts = ExtractedRegs[0].size();
3877  const unsigned DstSize = DstTy.getSizeInBits();
3878  const LLT DstScalarTy = LLT::scalar(DstSize);
3879 
3880  // Narrowing needs to use scalar types
3881  LLT DstLCMTy, NarrowDstTy;
3882  if (IsNarrow) {
3883  DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
3884  NarrowDstTy = NarrowScalarTy;
3885  } else {
3886  DstLCMTy = getLCMType(DstTy, NarrowTy);
3887  NarrowDstTy = NarrowTy;
3888  }
3889 
3890  // We widened the source registers to satisfy merge/unmerge size
3891  // constraints. We'll have some extra fully undef parts.
3892  const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
3893 
3894  for (int I = 0; I != NumRealParts; ++I) {
3895  // Emit this instruction on each of the split pieces.
3896  for (int J = 0; J != NumOps; ++J)
3897  InputRegs[J] = ExtractedRegs[J][I];
3898 
3899  auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
3900  ResultRegs.push_back(Inst.getReg(0));
3901  }
3902 
3903  // Fill out the widened result with undef instead of creating instructions
3904  // with undef inputs.
3905  int NumUndefParts = NumParts - NumRealParts;
3906  if (NumUndefParts != 0)
3907  ResultRegs.append(NumUndefParts,
3908  MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
3909 
3910  // Extract the possibly padded result. Use a scratch register if we need to do
3911  // a final bitcast, otherwise use the original result register.
3912  Register MergeDstReg;
3913  if (IsNarrow && DstTy.isVector())
3914  MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
3915  else
3916  MergeDstReg = DstReg;
3917 
3918  buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
3919 
3920  // Recast to vector if we narrowed a vector
3921  if (IsNarrow && DstTy.isVector())
3922  MIRBuilder.buildBitcast(DstReg, MergeDstReg);
3923 
3924  MI.eraseFromParent();
3925  return Legalized;
3926 }
3927 
3930  LLT NarrowTy) {
3931  Register DstReg = MI.getOperand(0).getReg();
3932  Register SrcReg = MI.getOperand(1).getReg();
3933  int64_t Imm = MI.getOperand(2).getImm();
3934 
3935  LLT DstTy = MRI.getType(DstReg);
3936 
3938  LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3939  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
3940 
3941  for (Register &R : Parts)
3942  R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
3943 
3944  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3945 
3946  MI.eraseFromParent();
3947  return Legalized;
3948 }
3949 
3952  LLT NarrowTy) {
3953  using namespace TargetOpcode;
3954 
3955  switch (MI.getOpcode()) {
3956  case G_IMPLICIT_DEF:
3957  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
3958  case G_TRUNC:
3959  case G_AND:
3960  case G_OR:
3961  case G_XOR:
3962  case G_ADD:
3963  case G_SUB:
3964  case G_MUL:
3965  case G_PTR_ADD:
3966  case G_SMULH:
3967  case G_UMULH:
3968  case G_FADD:
3969  case G_FMUL:
3970  case G_FSUB:
3971  case G_FNEG:
3972  case G_FABS:
3973  case G_FCANONICALIZE:
3974  case G_FDIV:
3975  case G_FREM:
3976  case G_FMA:
3977  case G_FMAD:
3978  case G_FPOW:
3979  case G_FEXP:
3980  case G_FEXP2:
3981  case G_FLOG:
3982  case G_FLOG2:
3983  case G_FLOG10:
3984  case G_FNEARBYINT:
3985  case G_FCEIL:
3986  case G_FFLOOR:
3987  case G_FRINT:
3988  case G_INTRINSIC_ROUND:
3989  case G_INTRINSIC_ROUNDEVEN:
3990  case G_INTRINSIC_TRUNC:
3991  case G_FCOS:
3992  case G_FSIN:
3993  case G_FSQRT:
3994  case G_BSWAP:
3995  case G_BITREVERSE:
3996  case G_SDIV:
3997  case G_UDIV:
3998  case G_SREM:
3999  case G_UREM:
4000  case G_SMIN:
4001  case G_SMAX:
4002  case G_UMIN:
4003  case G_UMAX:
4004  case G_FMINNUM:
4005  case G_FMAXNUM:
4006  case G_FMINNUM_IEEE:
4007  case G_FMAXNUM_IEEE:
4008  case G_FMINIMUM:
4009  case G_FMAXIMUM:
4010  case G_FSHL:
4011  case G_FSHR:
4012  case G_FREEZE:
4013  case G_SADDSAT:
4014  case G_SSUBSAT:
4015  case G_UADDSAT:
4016  case G_USUBSAT:
4017  return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4018  case G_SHL:
4019  case G_LSHR:
4020  case G_ASHR:
4021  case G_SSHLSAT:
4022  case G_USHLSAT:
4023  case G_CTLZ:
4024  case G_CTLZ_ZERO_UNDEF:
4025  case G_CTTZ:
4026  case G_CTTZ_ZERO_UNDEF:
4027  case G_CTPOP:
4028  case G_FCOPYSIGN:
4029  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4030  case G_ZEXT:
4031  case G_SEXT:
4032  case G_ANYEXT:
4033  case G_FPEXT:
4034  case G_FPTRUNC:
4035  case G_SITOFP:
4036  case G_UITOFP:
4037  case G_FPTOSI:
4038  case G_FPTOUI:
4039  case G_INTTOPTR:
4040  case G_PTRTOINT:
4041  case G_ADDRSPACE_CAST:
4042  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4043  case G_ICMP:
4044  case G_FCMP:
4045  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4046  case G_SELECT:
4047  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4048  case G_PHI:
4049  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4050  case G_UNMERGE_VALUES:
4051  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4052  case G_BUILD_VECTOR:
4053  assert(TypeIdx == 0 && "not a vector type index");
4054  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4055  case G_CONCAT_VECTORS:
4056  if (TypeIdx != 1) // TODO: This probably does work as expected already.
4057  return UnableToLegalize;
4058  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4059  case G_EXTRACT_VECTOR_ELT:
4060  case G_INSERT_VECTOR_ELT:
4061  return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4062  case G_LOAD:
4063  case G_STORE:
4064  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
4065  case G_SEXT_INREG:
4066  return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4067  default:
4068  return UnableToLegalize;
4069  }
4070 }
4071 
4074  const LLT HalfTy, const LLT AmtTy) {
4075 
4076  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4077  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4078  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4079 
4080  if (Amt.isNullValue()) {
4081  MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4082  MI.eraseFromParent();
4083  return Legalized;
4084  }
4085 
4086  LLT NVT = HalfTy;
4087  unsigned NVTBits = HalfTy.getSizeInBits();
4088  unsigned VTBits = 2 * NVTBits;
4089 
4090  SrcOp Lo(Register(0)), Hi(Register(0));
4091  if (MI.getOpcode() == TargetOpcode::G_SHL) {
4092  if (Amt.ugt(VTBits)) {
4093  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4094  } else if (Amt.ugt(NVTBits)) {
4095  Lo = MIRBuilder.buildConstant(NVT, 0);
4096  Hi = MIRBuilder.buildShl(NVT, InL,
4097  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4098  } else if (Amt == NVTBits) {
4099  Lo = MIRBuilder.buildConstant(NVT, 0);
4100  Hi = InL;
4101  } else {
4102  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4103  auto OrLHS =
4104  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4105  auto OrRHS = MIRBuilder.buildLShr(
4106  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4107  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4108  }
4109  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4110  if (Amt.ugt(VTBits)) {
4111  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4112  } else if (Amt.ugt(NVTBits)) {
4113  Lo = MIRBuilder.buildLShr(NVT, InH,
4114  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4115  Hi = MIRBuilder.buildConstant(NVT, 0);
4116  } else if (Amt == NVTBits) {
4117  Lo = InH;
4118  Hi = MIRBuilder.buildConstant(NVT, 0);
4119  } else {
4120  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4121 
4122  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4123  auto OrRHS = MIRBuilder.buildShl(
4124  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4125 
4126  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4127  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4128  }
4129  } else {
4130  if (Amt.ugt(VTBits)) {
4131  Hi = Lo = MIRBuilder.buildAShr(
4132  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4133  } else if (Amt.ugt(NVTBits)) {
4134  Lo = MIRBuilder.buildAShr(NVT, InH,
4135  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4136  Hi = MIRBuilder.buildAShr(NVT, InH,
4137  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4138  } else if (Amt == NVTBits) {
4139  Lo = InH;
4140  Hi = MIRBuilder.buildAShr(NVT, InH,
4141  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4142  } else {
4143  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4144 
4145  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4146  auto OrRHS = MIRBuilder.buildShl(
4147  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4148 
4149  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4150  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4151  }
4152  }
4153 
4154  MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4155  MI.eraseFromParent();
4156 
4157  return Legalized;
4158 }
4159 
4160 // TODO: Optimize if constant shift amount.
4163  LLT RequestedTy) {
4164  if (TypeIdx == 1) {
4166  narrowScalarSrc(MI, RequestedTy, 2);
4168  return Legalized;
4169  }
4170 
4171  Register DstReg = MI.getOperand(0).getReg();
4172  LLT DstTy = MRI.getType(DstReg);
4173  if (DstTy.isVector())
4174  return UnableToLegalize;
4175 
4176  Register Amt = MI.getOperand(2).getReg();
4177  LLT ShiftAmtTy = MRI.getType(Amt);
4178  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4179  if (DstEltSize % 2 != 0)
4180  return UnableToLegalize;
4181 
4182  // Ignore the input type. We can only go to exactly half the size of the
4183  // input. If that isn't small enough, the resulting pieces will be further
4184  // legalized.
4185  const unsigned NewBitSize = DstEltSize / 2;
4186  const LLT HalfTy = LLT::scalar(NewBitSize);
4187  const LLT CondTy = LLT::scalar(1);
4188 
4189  if (const MachineInstr *KShiftAmt =
4190  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4192  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4193  }
4194 
4195  // TODO: Expand with known bits.
4196 
4197  // Handle the fully general expansion by an unknown amount.
4198  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4199 
4200  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4201  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4202  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4203 
4204  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4205  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4206 
4207  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4208  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4209  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4210 
4211  Register ResultRegs[2];
4212  switch (MI.getOpcode()) {
4213  case TargetOpcode::G_SHL: {
4214  // Short: ShAmt < NewBitSize
4215  auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4216 
4217  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4218  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4219  auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4220 
4221  // Long: ShAmt >= NewBitSize
4222  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4223  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4224 
4225  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4226  auto Hi = MIRBuilder.buildSelect(
4227  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4228 
4229  ResultRegs[0] = Lo.getReg(0);
4230  ResultRegs[1] = Hi.getReg(0);
4231  break;
4232  }
4233  case TargetOpcode::G_LSHR:
4234  case TargetOpcode::G_ASHR: {
4235  // Short: ShAmt < NewBitSize
4236  auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4237 
4238  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4239  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4240  auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4241 
4242  // Long: ShAmt >= NewBitSize
4243  MachineInstrBuilder HiL;
4244  if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4245  HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4246  } else {
4247  auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4248  HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
4249  }
4250  auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4251  {InH, AmtExcess}); // Lo from Hi part.
4252 
4253  auto Lo = MIRBuilder.buildSelect(
4254  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4255 
4256  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4257 
4258  ResultRegs[0] = Lo.getReg(0);
4259  ResultRegs[1] = Hi.getReg(0);
4260  break;
4261  }
4262  default:
4263  llvm_unreachable("not a shift");
4264  }
4265 
4266  MIRBuilder.buildMerge(DstReg, ResultRegs);
4267  MI.eraseFromParent();
4268  return Legalized;
4269 }
4270 
4273  LLT MoreTy) {
4274  assert(TypeIdx == 0 && "Expecting only Idx 0");
4275 
4277  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4278  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4279  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4280  moreElementsVectorSrc(MI, MoreTy, I);
4281  }
4282 
4283  MachineBasicBlock &MBB = *MI.getParent();
4285  moreElementsVectorDst(MI, MoreTy, 0);
4287  return Legalized;
4288 }
4289 
4292  LLT MoreTy) {
4293  unsigned Opc = MI.getOpcode();
4294  switch (Opc) {
4295  case TargetOpcode::G_IMPLICIT_DEF:
4296  case TargetOpcode::G_LOAD: {
4297  if (TypeIdx != 0)
4298  return UnableToLegalize;
4300  moreElementsVectorDst(MI, MoreTy, 0);
4302  return Legalized;
4303  }
4304  case TargetOpcode::G_STORE:
4305  if (TypeIdx != 0)
4306  return UnableToLegalize;
4308  moreElementsVectorSrc(MI, MoreTy, 0);
4310  return Legalized;
4311  case TargetOpcode::G_AND:
4312  case TargetOpcode::G_OR:
4313  case TargetOpcode::G_XOR:
4314  case TargetOpcode::G_SMIN:
4315  case TargetOpcode::G_SMAX:
4316  case TargetOpcode::G_UMIN:
4317  case TargetOpcode::G_UMAX:
4318  case TargetOpcode::G_FMINNUM:
4319  case TargetOpcode::G_FMAXNUM:
4320  case TargetOpcode::G_FMINNUM_IEEE:
4321  case TargetOpcode::G_FMAXNUM_IEEE:
4322  case TargetOpcode::G_FMINIMUM:
4323  case TargetOpcode::G_FMAXIMUM: {
4325  moreElementsVectorSrc(MI, MoreTy, 1);
4326  moreElementsVectorSrc(MI, MoreTy, 2);
4327  moreElementsVectorDst(MI, MoreTy, 0);
4329  return Legalized;
4330  }
4331  case TargetOpcode::G_EXTRACT:
4332  if (TypeIdx != 1)
4333  return UnableToLegalize;
4335  moreElementsVectorSrc(MI, MoreTy, 1);
4337  return Legalized;
4338  case TargetOpcode::G_INSERT:
4339  case TargetOpcode::G_FREEZE:
4340  if (TypeIdx != 0)
4341  return UnableToLegalize;
4343  moreElementsVectorSrc(MI, MoreTy, 1);
4344  moreElementsVectorDst(MI, MoreTy, 0);
4346  return Legalized;
4347  case TargetOpcode::G_SELECT:
4348  if (TypeIdx != 0)
4349  return UnableToLegalize;
4350  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4351  return UnableToLegalize;
4352 
4354  moreElementsVectorSrc(MI, MoreTy, 2);
4355  moreElementsVectorSrc(MI, MoreTy, 3);
4356  moreElementsVectorDst(MI, MoreTy, 0);
4358  return Legalized;
4359  case TargetOpcode::G_UNMERGE_VALUES: {
4360  if (TypeIdx != 1)
4361  return UnableToLegalize;
4362 
4363  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4364  int NumDst = MI.getNumOperands() - 1;
4365  moreElementsVectorSrc(MI, MoreTy, NumDst);
4366 
4367  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4368  for (int I = 0; I != NumDst; ++I)
4369  MIB.addDef(MI.getOperand(I).getReg());
4370 
4371  int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
4372  for (int I = NumDst; I != NewNumDst; ++I)
4373  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
4374 
4375  MIB.addUse(MI.getOperand(NumDst).getReg());
4376  MI.eraseFromParent();
4377  return Legalized;
4378  }
4379  case TargetOpcode::G_PHI:
4380  return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
4381  default:
4382  return UnableToLegalize;
4383  }
4384 }
4385 
4386 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
4387  ArrayRef<Register> Src1Regs,
4388  ArrayRef<Register> Src2Regs,
4389  LLT NarrowTy) {
4391  unsigned SrcParts = Src1Regs.size();
4392  unsigned DstParts = DstRegs.size();
4393 
4394  unsigned DstIdx = 0; // Low bits of the result.
4395  Register FactorSum =
4396  B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4397  DstRegs[DstIdx] = FactorSum;
4398 
4399  unsigned CarrySumPrevDstIdx;
4400  SmallVector<Register, 4> Factors;
4401 
4402  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4403  // Collect low parts of muls for DstIdx.
4404  for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4405  i <= std::min(DstIdx, SrcParts - 1); ++i) {
4407  B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
4408  Factors.push_back(Mul.getReg(0));
4409  }
4410  // Collect high parts of muls from previous DstIdx.
4411  for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4412  i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
4413  MachineInstrBuilder Umulh =
4414  B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
4415  Factors.push_back(Umulh.getReg(0));
4416  }
4417  // Add CarrySum from additions calculated for previous DstIdx.
4418  if (DstIdx != 1) {
4419  Factors.push_back(CarrySumPrevDstIdx);
4420  }
4421 
4422  Register CarrySum;
4423  // Add all factors and accumulate all carries into CarrySum.
4424  if (DstIdx != DstParts - 1) {
4425  MachineInstrBuilder Uaddo =
4426  B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
4427  FactorSum = Uaddo.getReg(0);
4428  CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
4429  for (unsigned i = 2; i < Factors.size(); ++i) {
4430  MachineInstrBuilder Uaddo =
4431  B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
4432  FactorSum = Uaddo.getReg(0);
4433  MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
4434  CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4435  }
4436  } else {
4437  // Since value for the next index is not calculated, neither is CarrySum.
4438  FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4439  for (unsigned i = 2; i < Factors.size(); ++i)
4440  FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
4441  }
4442 
4443  CarrySumPrevDstIdx = CarrySum;
4444  DstRegs[DstIdx] = FactorSum;
4445  Factors.clear();
4446  }
4447 }
4448 
4451  Register DstReg = MI.getOperand(0).getReg();
4452  Register Src1 = MI.getOperand(1).getReg();
4453  Register Src2 = MI.getOperand(2).getReg();
4454 
4455  LLT Ty = MRI.getType(DstReg);
4456  if (Ty.isVector())
4457  return UnableToLegalize;
4458 
4459  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
4460  unsigned DstSize = Ty.getSizeInBits();
4461  unsigned NarrowSize = NarrowTy.getSizeInBits();
4462  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
4463  return UnableToLegalize;
4464 
4465  unsigned NumDstParts = DstSize / NarrowSize;
4466  unsigned NumSrcParts = SrcSize / NarrowSize;
4467  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
4468  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
4469 
4470  SmallVector<Register, 2> Src1Parts, Src2Parts;
4471  SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
4472  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
4473  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
4474  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
4475 
4476  // Take only high half of registers if this is high mul.
4477  ArrayRef<Register> DstRegs(
4478  IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
4479  MIRBuilder.buildMerge(DstReg, DstRegs);
4480  MI.eraseFromParent();
4481  return Legalized;
4482 }
4483 
4486  LLT NarrowTy) {
4487  if (TypeIdx != 1)
4488  return UnableToLegalize;
4489 
4490  uint64_t NarrowSize = NarrowTy.getSizeInBits();
4491 
4492  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4493  // FIXME: add support for when SizeOp1 isn't an exact multiple of
4494  // NarrowSize.
4495  if (SizeOp1 % NarrowSize != 0)
4496  return UnableToLegalize;
4497  int NumParts = SizeOp1 / NarrowSize;
4498 
4499  SmallVector<Register, 2> SrcRegs, DstRegs;
4500  SmallVector<uint64_t, 2> Indexes;
4501  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4502 
4503  Register OpReg = MI.getOperand(0).getReg();
4504  uint64_t OpStart = MI.getOperand(2).getImm();
4505  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4506  for (int i = 0; i < NumParts; ++i) {
4507  unsigned SrcStart = i * NarrowSize;
4508 
4509  if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
4510  // No part of the extract uses this subregister, ignore it.
4511  continue;
4512  } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4513  // The entire subregister is extracted, forward the value.
4514  DstRegs.push_back(SrcRegs[i]);
4515  continue;
4516  }
4517 
4518  // OpSegStart is where this destination segment would start in OpReg if it
4519  // extended infinitely in both directions.
4520  int64_t ExtractOffset;
4521  uint64_t SegSize;
4522  if (OpStart < SrcStart) {
4523  ExtractOffset = 0;
4524  SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
4525  } else {
4526  ExtractOffset = OpStart - SrcStart;
4527  SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
4528  }
4529 
4530  Register SegReg = SrcRegs[i];
4531  if (ExtractOffset != 0 || SegSize != NarrowSize) {
4532  // A genuine extract is needed.
4533  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4534  MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
4535  }
4536 
4537  DstRegs.push_back(SegReg);
4538  }
4539 
4540  Register DstReg = MI.getOperand(0).getReg();
4541  if (MRI.getType(DstReg).isVector())
4542  MIRBuilder.buildBuildVector(DstReg, DstRegs);
4543  else if (DstRegs.size() > 1)
4544  MIRBuilder.buildMerge(DstReg, DstRegs);
4545  else
4546  MIRBuilder.buildCopy(DstReg, DstRegs[0]);
4547  MI.eraseFromParent();
4548  return Legalized;
4549 }
4550 
4553  LLT NarrowTy) {
4554  // FIXME: Don't know how to handle secondary types yet.
4555  if (TypeIdx != 0)
4556  return UnableToLegalize;
4557 
4558  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4559  uint64_t NarrowSize = NarrowTy.getSizeInBits();
4560 
4561  // FIXME: add support for when SizeOp0 isn't an exact multiple of
4562  // NarrowSize.
4563  if (SizeOp0 % NarrowSize != 0)
4564  return UnableToLegalize;
4565 
4566  int NumParts = SizeOp0 / NarrowSize;
4567 
4568  SmallVector<Register, 2> SrcRegs, DstRegs;
4569  SmallVector<uint64_t, 2> Indexes;
4570  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4571 
4572  Register OpReg = MI.getOperand(2).getReg();
4573  uint64_t OpStart = MI.getOperand(3).getImm();
4574  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4575  for (int i = 0; i < NumParts; ++i) {
4576  unsigned DstStart = i * NarrowSize;
4577 
4578  if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
4579  // No part of the insert affects this subregister, forward the original.
4580  DstRegs.push_back(SrcRegs[i]);
4581  continue;
4582  } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4583  // The entire subregister is defined by this insert, forward the new
4584  // value.
4585  DstRegs.push_back(OpReg);
4586  continue;
4587  }
4588 
4589  // OpSegStart is where this destination segment would start in OpReg if it
4590  // extended infinitely in both directions.
4591  int64_t ExtractOffset, InsertOffset;
4592  uint64_t SegSize;
4593  if (OpStart < DstStart) {
4594  InsertOffset = 0;
4595  ExtractOffset = DstStart - OpStart;
4596  SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
4597  } else {
4598  InsertOffset = OpStart - DstStart;
4599  ExtractOffset = 0;
4600  SegSize =
4601  std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
4602  }
4603 
4604  Register SegReg = OpReg;
4605  if (ExtractOffset != 0 || SegSize != OpSize) {
4606  // A genuine extract is needed.
4607  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4608  MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
4609  }
4610 
4611  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4612  MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
4613  DstRegs.push_back(DstReg);
4614  }
4615 
4616  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
4617  Register DstReg = MI.getOperand(0).getReg();
4618  if(MRI.getType(DstReg).isVector())
4619  MIRBuilder.buildBuildVector(DstReg, DstRegs);
4620  else
4621  MIRBuilder.buildMerge(DstReg, DstRegs);
4622  MI.eraseFromParent();
4623  return Legalized;
4624 }
4625 
4628  LLT NarrowTy) {
4629  Register DstReg = MI.getOperand(0).getReg();
4630  LLT DstTy = MRI.getType(DstReg);
4631 
4632  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
4633 
4634  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4635  SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
4636  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4637  LLT LeftoverTy;
4638  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
4639  Src0Regs, Src0LeftoverRegs))
4640  return UnableToLegalize;
4641 
4642  LLT Unused;
4643  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
4644  Src1Regs, Src1LeftoverRegs))
4645  llvm_unreachable("inconsistent extractParts result");
4646 
4647  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4648  auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
4649  {Src0Regs[I], Src1Regs[I]});
4650  DstRegs.push_back(Inst.getReg(0));
4651  }
4652 
4653  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4654  auto Inst = MIRBuilder.buildInstr(
4655  MI.getOpcode(),
4656  {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
4657  DstLeftoverRegs.push_back(Inst.getReg(0));
4658  }
4659 
4660  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4661  LeftoverTy, DstLeftoverRegs);
4662 
4663  MI.eraseFromParent();
4664  return Legalized;
4665 }
4666 
4669  LLT NarrowTy) {
4670  if (TypeIdx != 0)
4671  return UnableToLegalize;
4672 
4673  Register DstReg = MI.getOperand(0).getReg();
4674  Register SrcReg = MI.getOperand(1).getReg();
4675 
4676  LLT DstTy = MRI.getType(DstReg);
4677  if (DstTy.isVector())
4678  return UnableToLegalize;
4679 
4681  LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4682  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
4683  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4684 
4685  MI.eraseFromParent();
4686  return Legalized;
4687 }
4688 
4691  LLT NarrowTy) {
4692  if (TypeIdx != 0)
4693  return UnableToLegalize;
4694 
4695  Register CondReg = MI.getOperand(1).getReg();
4696  LLT CondTy = MRI.getType(CondReg);
4697  if (CondTy.isVector()) // TODO: Handle vselect
4698  return UnableToLegalize;
4699 
4700  Register DstReg = MI.getOperand(0).getReg();
4701  LLT DstTy = MRI.getType(DstReg);
4702 
4703  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4704  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4705  SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
4706  LLT LeftoverTy;
4707  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
4708  Src1Regs, Src1LeftoverRegs))
4709  return UnableToLegalize;
4710 
4711  LLT Unused;
4712  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
4713  Src2Regs, Src2LeftoverRegs))
4714  llvm_unreachable("inconsistent extractParts result");
4715 
4716  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4717  auto Select = MIRBuilder.buildSelect(NarrowTy,
4718  CondReg, Src1Regs[I], Src2Regs[I]);
4719  DstRegs.push_back(Select.getReg(0));
4720  }
4721 
4722  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4723  auto Select = MIRBuilder.buildSelect(
4724  LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
4725  DstLeftoverRegs.push_back(Select.getReg(0));
4726  }
4727 
4728  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4729  LeftoverTy, DstLeftoverRegs);
4730 
4731  MI.eraseFromParent();
4732  return Legalized;
4733 }
4734 
4737  LLT NarrowTy) {
4738  if (TypeIdx != 1)
4739  return UnableToLegalize;
4740 
4741  Register DstReg = MI.getOperand(0).getReg();
4742  Register SrcReg = MI.getOperand(1).getReg();
4743  LLT DstTy = MRI.getType(DstReg);
4744  LLT SrcTy = MRI.getType(SrcReg);
4745  unsigned NarrowSize = NarrowTy.getSizeInBits();
4746 
4747  if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4748  const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
4749 
4751  auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4752  // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
4753  auto C_0 = B.buildConstant(NarrowTy, 0);
4754  auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4755  UnmergeSrc.getReg(1), C_0);
4756  auto LoCTLZ = IsUndef ?
4757  B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
4758  B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
4759  auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4760  auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
4761  auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
4762  B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
4763 
4764  MI.eraseFromParent();
4765  return Legalized;
4766  }
4767 
4768  return UnableToLegalize;
4769 }
4770 
4773  LLT NarrowTy) {
4774  if (TypeIdx != 1)
4775  return UnableToLegalize;
4776 
4777  Register DstReg = MI.getOperand(0).getReg();
4778  Register SrcReg = MI.getOperand(1).getReg();
4779  LLT DstTy = MRI.getType(DstReg);
4780  LLT SrcTy = MRI.getType(SrcReg);
4781  unsigned NarrowSize = NarrowTy.getSizeInBits();
4782 
4783  if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4784