LLVM  10.0.0svn
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
23 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33 ///
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
36 ///
37 /// Returns -1 in the first element of the pair if the breakdown is not
38 /// satisfiable.
39 static std::pair<int, int>
40 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
41  assert(!LeftoverTy.isValid() && "this is an out argument");
42 
43  unsigned Size = OrigTy.getSizeInBits();
44  unsigned NarrowSize = NarrowTy.getSizeInBits();
45  unsigned NumParts = Size / NarrowSize;
46  unsigned LeftoverSize = Size - NumParts * NarrowSize;
47  assert(Size > NarrowSize);
48 
49  if (LeftoverSize == 0)
50  return {NumParts, 0};
51 
52  if (NarrowTy.isVector()) {
53  unsigned EltSize = OrigTy.getScalarSizeInBits();
54  if (LeftoverSize % EltSize != 0)
55  return {-1, -1};
56  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
57  } else {
58  LeftoverTy = LLT::scalar(LeftoverSize);
59  }
60 
61  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
62  return std::make_pair(NumParts, NumLeftover);
63 }
64 
66  GISelChangeObserver &Observer,
67  MachineIRBuilder &Builder)
68  : MIRBuilder(Builder), MRI(MF.getRegInfo()),
69  LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
70  MIRBuilder.setMF(MF);
71  MIRBuilder.setChangeObserver(Observer);
72 }
73 
75  GISelChangeObserver &Observer,
77  : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
78  MIRBuilder.setMF(MF);
79  MIRBuilder.setChangeObserver(Observer);
80 }
83  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
84 
85  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
86  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
87  return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
89  auto Step = LI.getAction(MI, MRI);
90  switch (Step.Action) {
91  case Legal:
92  LLVM_DEBUG(dbgs() << ".. Already legal\n");
93  return AlreadyLegal;
94  case Libcall:
95  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
96  return libcall(MI);
97  case NarrowScalar:
98  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
99  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
100  case WidenScalar:
101  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
102  return widenScalar(MI, Step.TypeIdx, Step.NewType);
103  case Lower:
104  LLVM_DEBUG(dbgs() << ".. Lower\n");
105  return lower(MI, Step.TypeIdx, Step.NewType);
106  case FewerElements:
107  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
108  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
109  case MoreElements:
110  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
111  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
112  case Custom:
113  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
114  return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
116  default:
117  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
118  return UnableToLegalize;
119  }
120 }
121 
122 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
123  SmallVectorImpl<Register> &VRegs) {
124  for (int i = 0; i < NumParts; ++i)
126  MIRBuilder.buildUnmerge(VRegs, Reg);
127 }
128 
129 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
130  LLT MainTy, LLT &LeftoverTy,
132  SmallVectorImpl<Register> &LeftoverRegs) {
133  assert(!LeftoverTy.isValid() && "this is an out argument");
134 
135  unsigned RegSize = RegTy.getSizeInBits();
136  unsigned MainSize = MainTy.getSizeInBits();
137  unsigned NumParts = RegSize / MainSize;
138  unsigned LeftoverSize = RegSize - NumParts * MainSize;
139 
140  // Use an unmerge when possible.
141  if (LeftoverSize == 0) {
142  for (unsigned I = 0; I < NumParts; ++I)
143  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
144  MIRBuilder.buildUnmerge(VRegs, Reg);
145  return true;
146  }
147 
148  if (MainTy.isVector()) {
149  unsigned EltSize = MainTy.getScalarSizeInBits();
150  if (LeftoverSize % EltSize != 0)
151  return false;
152  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
153  } else {
154  LeftoverTy = LLT::scalar(LeftoverSize);
155  }
156 
157  // For irregular sizes, extract the individual parts.
158  for (unsigned I = 0; I != NumParts; ++I) {
159  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
160  VRegs.push_back(NewReg);
161  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
162  }
163 
164  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
165  Offset += LeftoverSize) {
166  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
167  LeftoverRegs.push_back(NewReg);
168  MIRBuilder.buildExtract(NewReg, Reg, Offset);
169  }
170 
171  return true;
172 }
173 
174 static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
175  if (OrigTy.isVector() && TargetTy.isVector()) {
176  assert(OrigTy.getElementType() == TargetTy.getElementType());
177  int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
178  TargetTy.getNumElements());
179  return LLT::scalarOrVector(GCD, OrigTy.getElementType());
180  }
181 
182  if (OrigTy.isVector() && !TargetTy.isVector()) {
183  assert(OrigTy.getElementType() == TargetTy);
184  return TargetTy;
185  }
186 
187  assert(!OrigTy.isVector() && !TargetTy.isVector());
188 
189  int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
190  TargetTy.getSizeInBits());
191  return LLT::scalar(GCD);
192 }
193 
194 void LegalizerHelper::insertParts(Register DstReg,
195  LLT ResultTy, LLT PartTy,
196  ArrayRef<Register> PartRegs,
197  LLT LeftoverTy,
198  ArrayRef<Register> LeftoverRegs) {
199  if (!LeftoverTy.isValid()) {
200  assert(LeftoverRegs.empty());
201 
202  if (!ResultTy.isVector()) {
203  MIRBuilder.buildMerge(DstReg, PartRegs);
204  return;
205  }
206 
207  if (PartTy.isVector())
208  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
209  else
210  MIRBuilder.buildBuildVector(DstReg, PartRegs);
211  return;
212  }
213 
214  unsigned PartSize = PartTy.getSizeInBits();
215  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
216 
217  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
218  MIRBuilder.buildUndef(CurResultReg);
219 
220  unsigned Offset = 0;
221  for (Register PartReg : PartRegs) {
222  Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
223  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
224  CurResultReg = NewResultReg;
225  Offset += PartSize;
226  }
227 
228  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
229  // Use the original output register for the final insert to avoid a copy.
230  Register NewResultReg = (I + 1 == E) ?
231  DstReg : MRI.createGenericVirtualRegister(ResultTy);
232 
233  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
234  CurResultReg = NewResultReg;
235  Offset += LeftoverPartSize;
236  }
237 }
238 
239 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
240  switch (Opcode) {
241  case TargetOpcode::G_SDIV:
242  assert((Size == 32 || Size == 64) && "Unsupported size");
243  return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
244  case TargetOpcode::G_UDIV:
245  assert((Size == 32 || Size == 64) && "Unsupported size");
246  return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
247  case TargetOpcode::G_SREM:
248  assert((Size == 32 || Size == 64) && "Unsupported size");
249  return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
250  case TargetOpcode::G_UREM:
251  assert((Size == 32 || Size == 64) && "Unsupported size");
252  return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
253  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
254  assert(Size == 32 && "Unsupported size");
255  return RTLIB::CTLZ_I32;
256  case TargetOpcode::G_FADD:
257  assert((Size == 32 || Size == 64) && "Unsupported size");
258  return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
259  case TargetOpcode::G_FSUB:
260  assert((Size == 32 || Size == 64) && "Unsupported size");
261  return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
262  case TargetOpcode::G_FMUL:
263  assert((Size == 32 || Size == 64) && "Unsupported size");
264  return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
265  case TargetOpcode::G_FDIV:
266  assert((Size == 32 || Size == 64) && "Unsupported size");
267  return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
268  case TargetOpcode::G_FEXP:
269  assert((Size == 32 || Size == 64) && "Unsupported size");
270  return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
271  case TargetOpcode::G_FEXP2:
272  assert((Size == 32 || Size == 64) && "Unsupported size");
273  return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
274  case TargetOpcode::G_FREM:
275  return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
276  case TargetOpcode::G_FPOW:
277  return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
278  case TargetOpcode::G_FMA:
279  assert((Size == 32 || Size == 64) && "Unsupported size");
280  return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
281  case TargetOpcode::G_FSIN:
282  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
283  return Size == 128 ? RTLIB::SIN_F128
284  : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
285  case TargetOpcode::G_FCOS:
286  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
287  return Size == 128 ? RTLIB::COS_F128
288  : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
289  case TargetOpcode::G_FLOG10:
290  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
291  return Size == 128 ? RTLIB::LOG10_F128
292  : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
293  case TargetOpcode::G_FLOG:
294  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
295  return Size == 128 ? RTLIB::LOG_F128
296  : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
297  case TargetOpcode::G_FLOG2:
298  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
299  return Size == 128 ? RTLIB::LOG2_F128
300  : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
301  case TargetOpcode::G_FCEIL:
302  assert((Size == 32 || Size == 64) && "Unsupported size");
303  return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
304  case TargetOpcode::G_FFLOOR:
305  assert((Size == 32 || Size == 64) && "Unsupported size");
306  return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
307  }
308  llvm_unreachable("Unknown libcall function");
309 }
310 
313  const CallLowering::ArgInfo &Result,
315  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
316  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
317  const char *Name = TLI.getLibcallName(Libcall);
318 
319  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
320 
322  Info.CallConv = TLI.getLibcallCallingConv(Libcall);
323  Info.Callee = MachineOperand::CreateES(Name);
324  Info.OrigRet = Result;
325  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
326  if (!CLI.lowerCall(MIRBuilder, Info))
328 
330 }
331 
332 // Useful for libcalls where all operands have the same type.
335  Type *OpType) {
336  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
337 
339  for (unsigned i = 1; i < MI.getNumOperands(); i++)
340  Args.push_back({MI.getOperand(i).getReg(), OpType});
341  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
342  Args);
343 }
344 
347  MachineInstr &MI) {
348  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
349  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
350 
352  for (unsigned i = 1; i < MI.getNumOperands(); i++) {
353  Register Reg = MI.getOperand(i).getReg();
354 
355  // Need derive an IR type for call lowering.
356  LLT OpLLT = MRI.getType(Reg);
357  Type *OpTy = nullptr;
358  if (OpLLT.isPointer())
359  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
360  else
361  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
362  Args.push_back({Reg, OpTy});
363  }
364 
365  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
366  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
368  RTLIB::Libcall RTLibcall;
369  switch (ID) {
370  case Intrinsic::memcpy:
371  RTLibcall = RTLIB::MEMCPY;
372  break;
373  case Intrinsic::memset:
374  RTLibcall = RTLIB::MEMSET;
375  break;
376  case Intrinsic::memmove:
377  RTLibcall = RTLIB::MEMMOVE;
378  break;
379  default:
381  }
382  const char *Name = TLI.getLibcallName(RTLibcall);
383 
384  MIRBuilder.setInstr(MI);
385  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
386 
388  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
389  Info.Callee = MachineOperand::CreateES(Name);
391  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
392  if (!CLI.lowerCall(MIRBuilder, Info))
394 
396 }
397 
398 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
399  Type *FromType) {
400  auto ToMVT = MVT::getVT(ToType);
401  auto FromMVT = MVT::getVT(FromType);
402 
403  switch (Opcode) {
404  case TargetOpcode::G_FPEXT:
405  return RTLIB::getFPEXT(FromMVT, ToMVT);
406  case TargetOpcode::G_FPTRUNC:
407  return RTLIB::getFPROUND(FromMVT, ToMVT);
408  case TargetOpcode::G_FPTOSI:
409  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
410  case TargetOpcode::G_FPTOUI:
411  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
412  case TargetOpcode::G_SITOFP:
413  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
414  case TargetOpcode::G_UITOFP:
415  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
416  }
417  llvm_unreachable("Unsupported libcall function");
418 }
419 
422  Type *FromType) {
424  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
425  {{MI.getOperand(1).getReg(), FromType}});
426 }
427 
430  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
431  unsigned Size = LLTy.getSizeInBits();
432  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
433 
434  MIRBuilder.setInstr(MI);
435 
436  switch (MI.getOpcode()) {
437  default:
438  return UnableToLegalize;
439  case TargetOpcode::G_SDIV:
440  case TargetOpcode::G_UDIV:
441  case TargetOpcode::G_SREM:
442  case TargetOpcode::G_UREM:
443  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
444  Type *HLTy = IntegerType::get(Ctx, Size);
445  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
446  if (Status != Legalized)
447  return Status;
448  break;
449  }
450  case TargetOpcode::G_FADD:
451  case TargetOpcode::G_FSUB:
452  case TargetOpcode::G_FMUL:
453  case TargetOpcode::G_FDIV:
454  case TargetOpcode::G_FMA:
455  case TargetOpcode::G_FPOW:
456  case TargetOpcode::G_FREM:
457  case TargetOpcode::G_FCOS:
458  case TargetOpcode::G_FSIN:
459  case TargetOpcode::G_FLOG10:
460  case TargetOpcode::G_FLOG:
461  case TargetOpcode::G_FLOG2:
462  case TargetOpcode::G_FEXP:
463  case TargetOpcode::G_FEXP2:
464  case TargetOpcode::G_FCEIL:
465  case TargetOpcode::G_FFLOOR: {
466  if (Size > 64) {
467  LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
468  return UnableToLegalize;
469  }
470  Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
471  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
472  if (Status != Legalized)
473  return Status;
474  break;
475  }
476  case TargetOpcode::G_FPEXT: {
477  // FIXME: Support other floating point types (half, fp128 etc)
478  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
479  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
480  if (ToSize != 64 || FromSize != 32)
481  return UnableToLegalize;
484  if (Status != Legalized)
485  return Status;
486  break;
487  }
488  case TargetOpcode::G_FPTRUNC: {
489  // FIXME: Support other floating point types (half, fp128 etc)
490  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
491  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
492  if (ToSize != 32 || FromSize != 64)
493  return UnableToLegalize;
496  if (Status != Legalized)
497  return Status;
498  break;
499  }
500  case TargetOpcode::G_FPTOSI:
501  case TargetOpcode::G_FPTOUI: {
502  // FIXME: Support other types
503  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
504  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
505  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
506  return UnableToLegalize;
508  MI, MIRBuilder,
509  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
510  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
511  if (Status != Legalized)
512  return Status;
513  break;
514  }
515  case TargetOpcode::G_SITOFP:
516  case TargetOpcode::G_UITOFP: {
517  // FIXME: Support other types
518  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
519  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
520  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
521  return UnableToLegalize;
523  MI, MIRBuilder,
524  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
525  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
526  if (Status != Legalized)
527  return Status;
528  break;
529  }
530  }
531 
532  MI.eraseFromParent();
533  return Legalized;
534 }
535 
537  unsigned TypeIdx,
538  LLT NarrowTy) {
539  MIRBuilder.setInstr(MI);
540 
541  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
542  uint64_t NarrowSize = NarrowTy.getSizeInBits();
543 
544  switch (MI.getOpcode()) {
545  default:
546  return UnableToLegalize;
547  case TargetOpcode::G_IMPLICIT_DEF: {
548  // FIXME: add support for when SizeOp0 isn't an exact multiple of
549  // NarrowSize.
550  if (SizeOp0 % NarrowSize != 0)
551  return UnableToLegalize;
552  int NumParts = SizeOp0 / NarrowSize;
553 
554  SmallVector<Register, 2> DstRegs;
555  for (int i = 0; i < NumParts; ++i)
556  DstRegs.push_back(
557  MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
558 
559  Register DstReg = MI.getOperand(0).getReg();
560  if(MRI.getType(DstReg).isVector())
561  MIRBuilder.buildBuildVector(DstReg, DstRegs);
562  else
563  MIRBuilder.buildMerge(DstReg, DstRegs);
564  MI.eraseFromParent();
565  return Legalized;
566  }
567  case TargetOpcode::G_CONSTANT: {
568  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
569  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
570  unsigned TotalSize = Ty.getSizeInBits();
571  unsigned NarrowSize = NarrowTy.getSizeInBits();
572  int NumParts = TotalSize / NarrowSize;
573 
574  SmallVector<Register, 4> PartRegs;
575  for (int I = 0; I != NumParts; ++I) {
576  unsigned Offset = I * NarrowSize;
577  auto K = MIRBuilder.buildConstant(NarrowTy,
578  Val.lshr(Offset).trunc(NarrowSize));
579  PartRegs.push_back(K.getReg(0));
580  }
581 
582  LLT LeftoverTy;
583  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
584  SmallVector<Register, 1> LeftoverRegs;
585  if (LeftoverBits != 0) {
586  LeftoverTy = LLT::scalar(LeftoverBits);
587  auto K = MIRBuilder.buildConstant(
588  LeftoverTy,
589  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
590  LeftoverRegs.push_back(K.getReg(0));
591  }
592 
593  insertParts(MI.getOperand(0).getReg(),
594  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
595 
596  MI.eraseFromParent();
597  return Legalized;
598  }
599  case TargetOpcode::G_SEXT: {
600  if (TypeIdx != 0)
601  return UnableToLegalize;
602 
603  if (NarrowTy.getSizeInBits() != SizeOp0 / 2) {
604  LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
605  return UnableToLegalize;
606  }
607 
608  Register SrcReg = MI.getOperand(1).getReg();
609 
610  // Shift the sign bit of the low register through the high register.
611  auto ShiftAmt =
612  MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
613  auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
614  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
615  MI.eraseFromParent();
616  return Legalized;
617  }
618 
619  case TargetOpcode::G_ADD: {
620  // FIXME: add support for when SizeOp0 isn't an exact multiple of
621  // NarrowSize.
622  if (SizeOp0 % NarrowSize != 0)
623  return UnableToLegalize;
624  // Expand in terms of carry-setting/consuming G_ADDE instructions.
625  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
626 
627  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
628  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
629  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
630 
632  MIRBuilder.buildConstant(CarryIn, 0);
633 
634  for (int i = 0; i < NumParts; ++i) {
635  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
637 
638  MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
639  Src2Regs[i], CarryIn);
640 
641  DstRegs.push_back(DstReg);
642  CarryIn = CarryOut;
643  }
644  Register DstReg = MI.getOperand(0).getReg();
645  if(MRI.getType(DstReg).isVector())
646  MIRBuilder.buildBuildVector(DstReg, DstRegs);
647  else
648  MIRBuilder.buildMerge(DstReg, DstRegs);
649  MI.eraseFromParent();
650  return Legalized;
651  }
652  case TargetOpcode::G_SUB: {
653  // FIXME: add support for when SizeOp0 isn't an exact multiple of
654  // NarrowSize.
655  if (SizeOp0 % NarrowSize != 0)
656  return UnableToLegalize;
657 
658  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
659 
660  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
661  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
662  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
663 
664  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
666  MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
667  {Src1Regs[0], Src2Regs[0]});
668  DstRegs.push_back(DstReg);
669  Register BorrowIn = BorrowOut;
670  for (int i = 1; i < NumParts; ++i) {
671  DstReg = MRI.createGenericVirtualRegister(NarrowTy);
672  BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
673 
674  MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
675  {Src1Regs[i], Src2Regs[i], BorrowIn});
676 
677  DstRegs.push_back(DstReg);
678  BorrowIn = BorrowOut;
679  }
680  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
681  MI.eraseFromParent();
682  return Legalized;
683  }
684  case TargetOpcode::G_MUL:
685  case TargetOpcode::G_UMULH:
686  return narrowScalarMul(MI, NarrowTy);
687  case TargetOpcode::G_EXTRACT:
688  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
689  case TargetOpcode::G_INSERT:
690  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
691  case TargetOpcode::G_LOAD: {
692  const auto &MMO = **MI.memoperands_begin();
693  Register DstReg = MI.getOperand(0).getReg();
694  LLT DstTy = MRI.getType(DstReg);
695  if (DstTy.isVector())
696  return UnableToLegalize;
697 
698  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
699  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
700  auto &MMO = **MI.memoperands_begin();
701  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
702  MIRBuilder.buildAnyExt(DstReg, TmpReg);
703  MI.eraseFromParent();
704  return Legalized;
705  }
706 
707  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
708  }
709  case TargetOpcode::G_ZEXTLOAD:
710  case TargetOpcode::G_SEXTLOAD: {
711  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
712  Register DstReg = MI.getOperand(0).getReg();
713  Register PtrReg = MI.getOperand(1).getReg();
714 
715  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
716  auto &MMO = **MI.memoperands_begin();
717  if (MMO.getSizeInBits() == NarrowSize) {
718  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
719  } else {
720  unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
721  : TargetOpcode::G_SEXTLOAD;
722  MIRBuilder.buildInstr(ExtLoad)
723  .addDef(TmpReg)
724  .addUse(PtrReg)
725  .addMemOperand(&MMO);
726  }
727 
728  if (ZExt)
729  MIRBuilder.buildZExt(DstReg, TmpReg);
730  else
731  MIRBuilder.buildSExt(DstReg, TmpReg);
732 
733  MI.eraseFromParent();
734  return Legalized;
735  }
736  case TargetOpcode::G_STORE: {
737  const auto &MMO = **MI.memoperands_begin();
738 
739  Register SrcReg = MI.getOperand(0).getReg();
740  LLT SrcTy = MRI.getType(SrcReg);
741  if (SrcTy.isVector())
742  return UnableToLegalize;
743 
744  int NumParts = SizeOp0 / NarrowSize;
745  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
746  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
747  if (SrcTy.isVector() && LeftoverBits != 0)
748  return UnableToLegalize;
749 
750  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
751  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
752  auto &MMO = **MI.memoperands_begin();
753  MIRBuilder.buildTrunc(TmpReg, SrcReg);
754  MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
755  MI.eraseFromParent();
756  return Legalized;
757  }
758 
759  return reduceLoadStoreWidth(MI, 0, NarrowTy);
760  }
761  case TargetOpcode::G_SELECT:
762  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
763  case TargetOpcode::G_AND:
764  case TargetOpcode::G_OR:
765  case TargetOpcode::G_XOR: {
766  // Legalize bitwise operation:
767  // A = BinOp<Ty> B, C
768  // into:
769  // B1, ..., BN = G_UNMERGE_VALUES B
770  // C1, ..., CN = G_UNMERGE_VALUES C
771  // A1 = BinOp<Ty/N> B1, C2
772  // ...
773  // AN = BinOp<Ty/N> BN, CN
774  // A = G_MERGE_VALUES A1, ..., AN
775  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
776  }
777  case TargetOpcode::G_SHL:
778  case TargetOpcode::G_LSHR:
779  case TargetOpcode::G_ASHR:
780  return narrowScalarShift(MI, TypeIdx, NarrowTy);
781  case TargetOpcode::G_CTLZ:
782  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
783  case TargetOpcode::G_CTTZ:
784  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
785  case TargetOpcode::G_CTPOP:
786  if (TypeIdx != 0)
787  return UnableToLegalize; // TODO
788 
789  Observer.changingInstr(MI);
790  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
791  Observer.changedInstr(MI);
792  return Legalized;
793  case TargetOpcode::G_INTTOPTR:
794  if (TypeIdx != 1)
795  return UnableToLegalize;
796 
797  Observer.changingInstr(MI);
798  narrowScalarSrc(MI, NarrowTy, 1);
799  Observer.changedInstr(MI);
800  return Legalized;
801  case TargetOpcode::G_PTRTOINT:
802  if (TypeIdx != 0)
803  return UnableToLegalize;
804 
805  Observer.changingInstr(MI);
806  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
807  Observer.changedInstr(MI);
808  return Legalized;
809  case TargetOpcode::G_PHI: {
810  unsigned NumParts = SizeOp0 / NarrowSize;
811  SmallVector<Register, 2> DstRegs;
813  DstRegs.resize(NumParts);
814  SrcRegs.resize(MI.getNumOperands() / 2);
815  Observer.changingInstr(MI);
816  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
817  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
818  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
819  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
820  SrcRegs[i / 2]);
821  }
822  MachineBasicBlock &MBB = *MI.getParent();
823  MIRBuilder.setInsertPt(MBB, MI);
824  for (unsigned i = 0; i < NumParts; ++i) {
825  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
826  MachineInstrBuilder MIB =
827  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
828  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
829  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
830  }
831  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
832  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
833  Observer.changedInstr(MI);
834  MI.eraseFromParent();
835  return Legalized;
836  }
837  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
838  case TargetOpcode::G_INSERT_VECTOR_ELT: {
839  if (TypeIdx != 2)
840  return UnableToLegalize;
841 
842  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
843  Observer.changingInstr(MI);
844  narrowScalarSrc(MI, NarrowTy, OpIdx);
845  Observer.changedInstr(MI);
846  return Legalized;
847  }
848  case TargetOpcode::G_ICMP: {
849  uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
850  if (NarrowSize * 2 != SrcSize)
851  return UnableToLegalize;
852 
853  Observer.changingInstr(MI);
854  Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
855  Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
856  MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
857 
858  Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
859  Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
860  MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
861 
862  CmpInst::Predicate Pred =
863  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
864  LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
865 
866  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
867  MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
868  MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
869  MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
870  MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
871  MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
872  } else {
873  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
874  MachineInstrBuilder CmpHEQ =
875  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
877  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
878  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
879  }
880  Observer.changedInstr(MI);
881  MI.eraseFromParent();
882  return Legalized;
883  }
884  case TargetOpcode::G_SEXT_INREG: {
885  if (TypeIdx != 0)
886  return UnableToLegalize;
887 
888  if (!MI.getOperand(2).isImm())
889  return UnableToLegalize;
890  int64_t SizeInBits = MI.getOperand(2).getImm();
891 
892  // So long as the new type has more bits than the bits we're extending we
893  // don't need to break it apart.
894  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
895  Observer.changingInstr(MI);
896  // We don't lose any non-extension bits by truncating the src and
897  // sign-extending the dst.
898  MachineOperand &MO1 = MI.getOperand(1);
899  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
900  MO1.setReg(TruncMIB->getOperand(0).getReg());
901 
902  MachineOperand &MO2 = MI.getOperand(0);
903  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
905  MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
906  MO2.setReg(DstExt);
907  Observer.changedInstr(MI);
908  return Legalized;
909  }
910 
911  // Break it apart. Components below the extension point are unmodified. The
912  // component containing the extension point becomes a narrower SEXT_INREG.
913  // Components above it are ashr'd from the component containing the
914  // extension point.
915  if (SizeOp0 % NarrowSize != 0)
916  return UnableToLegalize;
917  int NumParts = SizeOp0 / NarrowSize;
918 
919  // List the registers where the destination will be scattered.
920  SmallVector<Register, 2> DstRegs;
921  // List the registers where the source will be split.
922  SmallVector<Register, 2> SrcRegs;
923 
924  // Create all the temporary registers.
925  for (int i = 0; i < NumParts; ++i) {
926  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
927 
928  SrcRegs.push_back(SrcReg);
929  }
930 
931  // Explode the big arguments into smaller chunks.
932  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
933 
934  Register AshrCstReg =
935  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
936  ->getOperand(0)
937  .getReg();
938  Register FullExtensionReg = 0;
939  Register PartialExtensionReg = 0;
940 
941  // Do the operation on each small part.
942  for (int i = 0; i < NumParts; ++i) {
943  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
944  DstRegs.push_back(SrcRegs[i]);
945  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
946  assert(PartialExtensionReg &&
947  "Expected to visit partial extension before full");
948  if (FullExtensionReg) {
949  DstRegs.push_back(FullExtensionReg);
950  continue;
951  }
952  DstRegs.push_back(MIRBuilder
953  .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
954  {PartialExtensionReg, AshrCstReg})
955  ->getOperand(0)
956  .getReg());
957  FullExtensionReg = DstRegs.back();
958  } else {
959  DstRegs.push_back(
960  MIRBuilder
961  .buildInstr(
962  TargetOpcode::G_SEXT_INREG, {NarrowTy},
963  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
964  ->getOperand(0)
965  .getReg());
966  PartialExtensionReg = DstRegs.back();
967  }
968  }
969 
970  // Gather the destination registers into the final destination.
971  Register DstReg = MI.getOperand(0).getReg();
972  MIRBuilder.buildMerge(DstReg, DstRegs);
973  MI.eraseFromParent();
974  return Legalized;
975  }
976  }
977 }
978 
979 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
980  unsigned OpIdx, unsigned ExtOpcode) {
981  MachineOperand &MO = MI.getOperand(OpIdx);
982  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
983  MO.setReg(ExtB->getOperand(0).getReg());
984 }
985 
986 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
987  unsigned OpIdx) {
988  MachineOperand &MO = MI.getOperand(OpIdx);
989  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
990  {MO.getReg()});
991  MO.setReg(ExtB->getOperand(0).getReg());
992 }
993 
994 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
995  unsigned OpIdx, unsigned TruncOpcode) {
996  MachineOperand &MO = MI.getOperand(OpIdx);
997  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
999  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
1000  MO.setReg(DstExt);
1001 }
1002 
1003 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1004  unsigned OpIdx, unsigned ExtOpcode) {
1005  MachineOperand &MO = MI.getOperand(OpIdx);
1006  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1008  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
1009  MO.setReg(DstTrunc);
1010 }
1011 
1012 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1013  unsigned OpIdx) {
1014  MachineOperand &MO = MI.getOperand(OpIdx);
1015  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1017  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
1018  MO.setReg(DstExt);
1019 }
1020 
1021 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1022  unsigned OpIdx) {
1023  MachineOperand &MO = MI.getOperand(OpIdx);
1024 
1025  LLT OldTy = MRI.getType(MO.getReg());
1026  unsigned OldElts = OldTy.getNumElements();
1027  unsigned NewElts = MoreTy.getNumElements();
1028 
1029  unsigned NumParts = NewElts / OldElts;
1030 
1031  // Use concat_vectors if the result is a multiple of the number of elements.
1032  if (NumParts * OldElts == NewElts) {
1034  Parts.push_back(MO.getReg());
1035 
1036  Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1037  for (unsigned I = 1; I != NumParts; ++I)
1038  Parts.push_back(ImpDef);
1039 
1040  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1041  MO.setReg(Concat.getReg(0));
1042  return;
1043  }
1044 
1045  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1046  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1047  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1048  MO.setReg(MoreReg);
1049 }
1050 
1052 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1053  LLT WideTy) {
1054  if (TypeIdx != 1)
1055  return UnableToLegalize;
1056 
1057  Register DstReg = MI.getOperand(0).getReg();
1058  LLT DstTy = MRI.getType(DstReg);
1059  if (DstTy.isVector())
1060  return UnableToLegalize;
1061 
1062  Register Src1 = MI.getOperand(1).getReg();
1063  LLT SrcTy = MRI.getType(Src1);
1064  const int DstSize = DstTy.getSizeInBits();
1065  const int SrcSize = SrcTy.getSizeInBits();
1066  const int WideSize = WideTy.getSizeInBits();
1067  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1068 
1069  unsigned NumOps = MI.getNumOperands();
1070  unsigned NumSrc = MI.getNumOperands() - 1;
1071  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1072 
1073  if (WideSize >= DstSize) {
1074  // Directly pack the bits in the target type.
1075  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1076 
1077  for (unsigned I = 2; I != NumOps; ++I) {
1078  const unsigned Offset = (I - 1) * PartSize;
1079 
1080  Register SrcReg = MI.getOperand(I).getReg();
1081  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1082 
1083  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1084 
1085  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1086  MRI.createGenericVirtualRegister(WideTy);
1087 
1088  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1089  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1090  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1091  ResultReg = NextResult;
1092  }
1093 
1094  if (WideSize > DstSize)
1095  MIRBuilder.buildTrunc(DstReg, ResultReg);
1096  else if (DstTy.isPointer())
1097  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1098 
1099  MI.eraseFromParent();
1100  return Legalized;
1101  }
1102 
1103  // Unmerge the original values to the GCD type, and recombine to the next
1104  // multiple greater than the original type.
1105  //
1106  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1107  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1108  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1109  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1110  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1111  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1112  // %12:_(s12) = G_MERGE_VALUES %10, %11
1113  //
1114  // Padding with undef if necessary:
1115  //
1116  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1117  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1118  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1119  // %7:_(s2) = G_IMPLICIT_DEF
1120  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1121  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1122  // %10:_(s12) = G_MERGE_VALUES %8, %9
1123 
1124  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1125  LLT GCDTy = LLT::scalar(GCD);
1126 
1128  SmallVector<Register, 8> NewMergeRegs;
1129  SmallVector<Register, 8> Unmerges;
1130  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1131 
1132  // Decompose the original operands if they don't evenly divide.
1133  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1134  Register SrcReg = MI.getOperand(I).getReg();
1135  if (GCD == SrcSize) {
1136  Unmerges.push_back(SrcReg);
1137  } else {
1138  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1139  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1140  Unmerges.push_back(Unmerge.getReg(J));
1141  }
1142  }
1143 
1144  // Pad with undef to the next size that is a multiple of the requested size.
1145  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1146  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1147  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1148  Unmerges.push_back(UndefReg);
1149  }
1150 
1151  const int PartsPerGCD = WideSize / GCD;
1152 
1153  // Build merges of each piece.
1154  ArrayRef<Register> Slicer(Unmerges);
1155  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1156  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1157  NewMergeRegs.push_back(Merge.getReg(0));
1158  }
1159 
1160  // A truncate may be necessary if the requested type doesn't evenly divide the
1161  // original result type.
1162  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1163  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1164  } else {
1165  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1166  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1167  }
1168 
1169  MI.eraseFromParent();
1170  return Legalized;
1171 }
1172 
1174 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1175  LLT WideTy) {
1176  if (TypeIdx != 0)
1177  return UnableToLegalize;
1178 
1179  unsigned NumDst = MI.getNumOperands() - 1;
1180  Register SrcReg = MI.getOperand(NumDst).getReg();
1181  LLT SrcTy = MRI.getType(SrcReg);
1182  if (!SrcTy.isScalar())
1183  return UnableToLegalize;
1184 
1185  Register Dst0Reg = MI.getOperand(0).getReg();
1186  LLT DstTy = MRI.getType(Dst0Reg);
1187  if (!DstTy.isScalar())
1188  return UnableToLegalize;
1189 
1190  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1191  LLT NewSrcTy = LLT::scalar(NewSrcSize);
1192  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1193 
1194  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1195 
1196  for (unsigned I = 1; I != NumDst; ++I) {
1197  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1198  auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1199  WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1200  }
1201 
1202  Observer.changingInstr(MI);
1203 
1204  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1205  for (unsigned I = 0; I != NumDst; ++I)
1206  widenScalarDst(MI, WideTy, I);
1207 
1208  Observer.changedInstr(MI);
1209 
1210  return Legalized;
1211 }
1212 
1214 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1215  LLT WideTy) {
1216  Register DstReg = MI.getOperand(0).getReg();
1217  Register SrcReg = MI.getOperand(1).getReg();
1218  LLT SrcTy = MRI.getType(SrcReg);
1219 
1220  LLT DstTy = MRI.getType(DstReg);
1221  unsigned Offset = MI.getOperand(2).getImm();
1222 
1223  if (TypeIdx == 0) {
1224  if (SrcTy.isVector() || DstTy.isVector())
1225  return UnableToLegalize;
1226 
1227  SrcOp Src(SrcReg);
1228  if (SrcTy.isPointer()) {
1229  // Extracts from pointers can be handled only if they are really just
1230  // simple integers.
1231  const DataLayout &DL = MIRBuilder.getDataLayout();
1233  return UnableToLegalize;
1234 
1235  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1236  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1237  SrcTy = SrcAsIntTy;
1238  }
1239 
1240  if (DstTy.isPointer())
1241  return UnableToLegalize;
1242 
1243  if (Offset == 0) {
1244  // Avoid a shift in the degenerate case.
1245  MIRBuilder.buildTrunc(DstReg,
1246  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1247  MI.eraseFromParent();
1248  return Legalized;
1249  }
1250 
1251  // Do a shift in the source type.
1252  LLT ShiftTy = SrcTy;
1253  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1254  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1255  ShiftTy = WideTy;
1256  } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1257  return UnableToLegalize;
1258 
1259  auto LShr = MIRBuilder.buildLShr(
1260  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1261  MIRBuilder.buildTrunc(DstReg, LShr);
1262  MI.eraseFromParent();
1263  return Legalized;
1264  }
1265 
1266  if (SrcTy.isScalar()) {
1267  Observer.changingInstr(MI);
1268  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1269  Observer.changedInstr(MI);
1270  return Legalized;
1271  }
1272 
1273  if (!SrcTy.isVector())
1274  return UnableToLegalize;
1275 
1276  if (DstTy != SrcTy.getElementType())
1277  return UnableToLegalize;
1278 
1279  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1280  return UnableToLegalize;
1281 
1282  Observer.changingInstr(MI);
1283  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1284 
1285  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1286  Offset);
1287  widenScalarDst(MI, WideTy.getScalarType(), 0);
1288  Observer.changedInstr(MI);
1289  return Legalized;
1290 }
1291 
1293 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1294  LLT WideTy) {
1295  if (TypeIdx != 0)
1296  return UnableToLegalize;
1297  Observer.changingInstr(MI);
1298  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1299  widenScalarDst(MI, WideTy);
1300  Observer.changedInstr(MI);
1301  return Legalized;
1302 }
1303 
1305 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1306  MIRBuilder.setInstr(MI);
1307 
1308  switch (MI.getOpcode()) {
1309  default:
1310  return UnableToLegalize;
1311  case TargetOpcode::G_EXTRACT:
1312  return widenScalarExtract(MI, TypeIdx, WideTy);
1313  case TargetOpcode::G_INSERT:
1314  return widenScalarInsert(MI, TypeIdx, WideTy);
1315  case TargetOpcode::G_MERGE_VALUES:
1316  return widenScalarMergeValues(MI, TypeIdx, WideTy);
1317  case TargetOpcode::G_UNMERGE_VALUES:
1318  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1319  case TargetOpcode::G_UADDO:
1320  case TargetOpcode::G_USUBO: {
1321  if (TypeIdx == 1)
1322  return UnableToLegalize; // TODO
1323  auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1324  {MI.getOperand(2).getReg()});
1325  auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1326  {MI.getOperand(3).getReg()});
1327  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1328  ? TargetOpcode::G_ADD
1329  : TargetOpcode::G_SUB;
1330  // Do the arithmetic in the larger type.
1331  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1332  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1334  auto AndOp = MIRBuilder.buildInstr(
1335  TargetOpcode::G_AND, {WideTy},
1336  {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1337  // There is no overflow if the AndOp is the same as NewOp.
1339  AndOp);
1340  // Now trunc the NewOp to the original result.
1341  MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1342  MI.eraseFromParent();
1343  return Legalized;
1344  }
1345  case TargetOpcode::G_CTTZ:
1346  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1347  case TargetOpcode::G_CTLZ:
1348  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1349  case TargetOpcode::G_CTPOP: {
1350  if (TypeIdx == 0) {
1351  Observer.changingInstr(MI);
1352  widenScalarDst(MI, WideTy, 0);
1353  Observer.changedInstr(MI);
1354  return Legalized;
1355  }
1356 
1357  Register SrcReg = MI.getOperand(1).getReg();
1358 
1359  // First ZEXT the input.
1360  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1361  LLT CurTy = MRI.getType(SrcReg);
1362  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1363  // The count is the same in the larger type except if the original
1364  // value was zero. This can be handled by setting the bit just off
1365  // the top of the original type.
1366  auto TopBit =
1367  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1368  MIBSrc = MIRBuilder.buildOr(
1369  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1370  }
1371 
1372  // Perform the operation at the larger size.
1373  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1374  // This is already the correct result for CTPOP and CTTZs
1375  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1376  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1377  // The correct result is NewOp - (Difference in widety and current ty).
1378  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1379  MIBNewOp = MIRBuilder.buildInstr(
1380  TargetOpcode::G_SUB, {WideTy},
1381  {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1382  }
1383 
1384  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1385  MI.eraseFromParent();
1386  return Legalized;
1387  }
1388  case TargetOpcode::G_BSWAP: {
1389  Observer.changingInstr(MI);
1390  Register DstReg = MI.getOperand(0).getReg();
1391 
1392  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1393  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1394  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1395  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1396 
1397  MI.getOperand(0).setReg(DstExt);
1398 
1400 
1401  LLT Ty = MRI.getType(DstReg);
1402  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1403  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1404  MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1405  .addDef(ShrReg)
1406  .addUse(DstExt)
1407  .addUse(ShiftAmtReg);
1408 
1409  MIRBuilder.buildTrunc(DstReg, ShrReg);
1410  Observer.changedInstr(MI);
1411  return Legalized;
1412  }
1413  case TargetOpcode::G_ADD:
1414  case TargetOpcode::G_AND:
1415  case TargetOpcode::G_MUL:
1416  case TargetOpcode::G_OR:
1417  case TargetOpcode::G_XOR:
1418  case TargetOpcode::G_SUB:
1419  // Perform operation at larger width (any extension is fines here, high bits
1420  // don't affect the result) and then truncate the result back to the
1421  // original type.
1422  Observer.changingInstr(MI);
1423  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1424  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1425  widenScalarDst(MI, WideTy);
1426  Observer.changedInstr(MI);
1427  return Legalized;
1428 
1429  case TargetOpcode::G_SHL:
1430  Observer.changingInstr(MI);
1431 
1432  if (TypeIdx == 0) {
1433  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1434  widenScalarDst(MI, WideTy);
1435  } else {
1436  assert(TypeIdx == 1);
1437  // The "number of bits to shift" operand must preserve its value as an
1438  // unsigned integer:
1439  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1440  }
1441 
1442  Observer.changedInstr(MI);
1443  return Legalized;
1444 
1445  case TargetOpcode::G_SDIV:
1446  case TargetOpcode::G_SREM:
1447  case TargetOpcode::G_SMIN:
1448  case TargetOpcode::G_SMAX:
1449  Observer.changingInstr(MI);
1450  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1451  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1452  widenScalarDst(MI, WideTy);
1453  Observer.changedInstr(MI);
1454  return Legalized;
1455 
1456  case TargetOpcode::G_ASHR:
1457  case TargetOpcode::G_LSHR:
1458  Observer.changingInstr(MI);
1459 
1460  if (TypeIdx == 0) {
1461  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1462  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1463 
1464  widenScalarSrc(MI, WideTy, 1, CvtOp);
1465  widenScalarDst(MI, WideTy);
1466  } else {
1467  assert(TypeIdx == 1);
1468  // The "number of bits to shift" operand must preserve its value as an
1469  // unsigned integer:
1470  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1471  }
1472 
1473  Observer.changedInstr(MI);
1474  return Legalized;
1475  case TargetOpcode::G_UDIV:
1476  case TargetOpcode::G_UREM:
1477  case TargetOpcode::G_UMIN:
1478  case TargetOpcode::G_UMAX:
1479  Observer.changingInstr(MI);
1480  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1481  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1482  widenScalarDst(MI, WideTy);
1483  Observer.changedInstr(MI);
1484  return Legalized;
1485 
1486  case TargetOpcode::G_SELECT:
1487  Observer.changingInstr(MI);
1488  if (TypeIdx == 0) {
1489  // Perform operation at larger width (any extension is fine here, high
1490  // bits don't affect the result) and then truncate the result back to the
1491  // original type.
1492  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1493  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1494  widenScalarDst(MI, WideTy);
1495  } else {
1496  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1497  // Explicit extension is required here since high bits affect the result.
1498  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1499  }
1500  Observer.changedInstr(MI);
1501  return Legalized;
1502 
1503  case TargetOpcode::G_FPTOSI:
1504  case TargetOpcode::G_FPTOUI:
1505  if (TypeIdx != 0)
1506  return UnableToLegalize;
1507  Observer.changingInstr(MI);
1508  widenScalarDst(MI, WideTy);
1509  Observer.changedInstr(MI);
1510  return Legalized;
1511 
1512  case TargetOpcode::G_SITOFP:
1513  if (TypeIdx != 1)
1514  return UnableToLegalize;
1515  Observer.changingInstr(MI);
1516  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1517  Observer.changedInstr(MI);
1518  return Legalized;
1519 
1520  case TargetOpcode::G_UITOFP:
1521  if (TypeIdx != 1)
1522  return UnableToLegalize;
1523  Observer.changingInstr(MI);
1524  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1525  Observer.changedInstr(MI);
1526  return Legalized;
1527 
1528  case TargetOpcode::G_LOAD:
1529  case TargetOpcode::G_SEXTLOAD:
1530  case TargetOpcode::G_ZEXTLOAD:
1531  Observer.changingInstr(MI);
1532  widenScalarDst(MI, WideTy);
1533  Observer.changedInstr(MI);
1534  return Legalized;
1535 
1536  case TargetOpcode::G_STORE: {
1537  if (TypeIdx != 0)
1538  return UnableToLegalize;
1539 
1540  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1541  if (!isPowerOf2_32(Ty.getSizeInBits()))
1542  return UnableToLegalize;
1543 
1544  Observer.changingInstr(MI);
1545 
1546  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1547  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1548  widenScalarSrc(MI, WideTy, 0, ExtType);
1549 
1550  Observer.changedInstr(MI);
1551  return Legalized;
1552  }
1553  case TargetOpcode::G_CONSTANT: {
1554  MachineOperand &SrcMO = MI.getOperand(1);
1556  const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1557  Observer.changingInstr(MI);
1558  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1559 
1560  widenScalarDst(MI, WideTy);
1561  Observer.changedInstr(MI);
1562  return Legalized;
1563  }
1564  case TargetOpcode::G_FCONSTANT: {
1565  MachineOperand &SrcMO = MI.getOperand(1);
1567  APFloat Val = SrcMO.getFPImm()->getValueAPF();
1568  bool LosesInfo;
1569  switch (WideTy.getSizeInBits()) {
1570  case 32:
1572  &LosesInfo);
1573  break;
1574  case 64:
1576  &LosesInfo);
1577  break;
1578  default:
1579  return UnableToLegalize;
1580  }
1581 
1582  assert(!LosesInfo && "extend should always be lossless");
1583 
1584  Observer.changingInstr(MI);
1585  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1586 
1587  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1588  Observer.changedInstr(MI);
1589  return Legalized;
1590  }
1591  case TargetOpcode::G_IMPLICIT_DEF: {
1592  Observer.changingInstr(MI);
1593  widenScalarDst(MI, WideTy);
1594  Observer.changedInstr(MI);
1595  return Legalized;
1596  }
1597  case TargetOpcode::G_BRCOND:
1598  Observer.changingInstr(MI);
1599  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1600  Observer.changedInstr(MI);
1601  return Legalized;
1602 
1603  case TargetOpcode::G_FCMP:
1604  Observer.changingInstr(MI);
1605  if (TypeIdx == 0)
1606  widenScalarDst(MI, WideTy);
1607  else {
1608  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1609  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1610  }
1611  Observer.changedInstr(MI);
1612  return Legalized;
1613 
1614  case TargetOpcode::G_ICMP:
1615  Observer.changingInstr(MI);
1616  if (TypeIdx == 0)
1617  widenScalarDst(MI, WideTy);
1618  else {
1619  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1620  MI.getOperand(1).getPredicate()))
1621  ? TargetOpcode::G_SEXT
1622  : TargetOpcode::G_ZEXT;
1623  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1624  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1625  }
1626  Observer.changedInstr(MI);
1627  return Legalized;
1628 
1629  case TargetOpcode::G_GEP:
1630  assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1631  Observer.changingInstr(MI);
1632  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1633  Observer.changedInstr(MI);
1634  return Legalized;
1635 
1636  case TargetOpcode::G_PHI: {
1637  assert(TypeIdx == 0 && "Expecting only Idx 0");
1638 
1639  Observer.changingInstr(MI);
1640  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1641  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1642  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1643  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1644  }
1645 
1646  MachineBasicBlock &MBB = *MI.getParent();
1647  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1648  widenScalarDst(MI, WideTy);
1649  Observer.changedInstr(MI);
1650  return Legalized;
1651  }
1652  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1653  if (TypeIdx == 0) {
1654  Register VecReg = MI.getOperand(1).getReg();
1655  LLT VecTy = MRI.getType(VecReg);
1656  Observer.changingInstr(MI);
1657 
1658  widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1659  WideTy.getSizeInBits()),
1660  1, TargetOpcode::G_SEXT);
1661 
1662  widenScalarDst(MI, WideTy, 0);
1663  Observer.changedInstr(MI);
1664  return Legalized;
1665  }
1666 
1667  if (TypeIdx != 2)
1668  return UnableToLegalize;
1669  Observer.changingInstr(MI);
1670  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1671  Observer.changedInstr(MI);
1672  return Legalized;
1673  }
1674  case TargetOpcode::G_FADD:
1675  case TargetOpcode::G_FMUL:
1676  case TargetOpcode::G_FSUB:
1677  case TargetOpcode::G_FMA:
1678  case TargetOpcode::G_FNEG:
1679  case TargetOpcode::G_FABS:
1680  case TargetOpcode::G_FCANONICALIZE:
1681  case TargetOpcode::G_FMINNUM:
1682  case TargetOpcode::G_FMAXNUM:
1683  case TargetOpcode::G_FMINNUM_IEEE:
1684  case TargetOpcode::G_FMAXNUM_IEEE:
1685  case TargetOpcode::G_FMINIMUM:
1686  case TargetOpcode::G_FMAXIMUM:
1687  case TargetOpcode::G_FDIV:
1688  case TargetOpcode::G_FREM:
1689  case TargetOpcode::G_FCEIL:
1690  case TargetOpcode::G_FFLOOR:
1691  case TargetOpcode::G_FCOS:
1692  case TargetOpcode::G_FSIN:
1693  case TargetOpcode::G_FLOG10:
1694  case TargetOpcode::G_FLOG:
1695  case TargetOpcode::G_FLOG2:
1696  case TargetOpcode::G_FRINT:
1697  case TargetOpcode::G_FNEARBYINT:
1698  case TargetOpcode::G_FSQRT:
1699  case TargetOpcode::G_FEXP:
1700  case TargetOpcode::G_FEXP2:
1701  case TargetOpcode::G_FPOW:
1702  case TargetOpcode::G_INTRINSIC_TRUNC:
1703  case TargetOpcode::G_INTRINSIC_ROUND:
1704  assert(TypeIdx == 0);
1705  Observer.changingInstr(MI);
1706 
1707  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1708  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1709 
1710  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1711  Observer.changedInstr(MI);
1712  return Legalized;
1713  case TargetOpcode::G_INTTOPTR:
1714  if (TypeIdx != 1)
1715  return UnableToLegalize;
1716 
1717  Observer.changingInstr(MI);
1718  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1719  Observer.changedInstr(MI);
1720  return Legalized;
1721  case TargetOpcode::G_PTRTOINT:
1722  if (TypeIdx != 0)
1723  return UnableToLegalize;
1724 
1725  Observer.changingInstr(MI);
1726  widenScalarDst(MI, WideTy, 0);
1727  Observer.changedInstr(MI);
1728  return Legalized;
1729  case TargetOpcode::G_BUILD_VECTOR: {
1730  Observer.changingInstr(MI);
1731 
1732  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1733  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1734  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1735 
1736  // Avoid changing the result vector type if the source element type was
1737  // requested.
1738  if (TypeIdx == 1) {
1739  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1740  MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1741  } else {
1742  widenScalarDst(MI, WideTy, 0);
1743  }
1744 
1745  Observer.changedInstr(MI);
1746  return Legalized;
1747  }
1748  case TargetOpcode::G_SEXT_INREG:
1749  if (TypeIdx != 0)
1750  return UnableToLegalize;
1751 
1752  Observer.changingInstr(MI);
1753  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1754  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
1755  Observer.changedInstr(MI);
1756  return Legalized;
1757  }
1758 }
1759 
1761 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1762  using namespace TargetOpcode;
1763  MIRBuilder.setInstr(MI);
1764 
1765  switch(MI.getOpcode()) {
1766  default:
1767  return UnableToLegalize;
1768  case TargetOpcode::G_SREM:
1769  case TargetOpcode::G_UREM: {
1770  Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1771  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1772  .addDef(QuotReg)
1773  .addUse(MI.getOperand(1).getReg())
1774  .addUse(MI.getOperand(2).getReg());
1775 
1776  Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1777  MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1779  ProdReg);
1780  MI.eraseFromParent();
1781  return Legalized;
1782  }
1783  case TargetOpcode::G_SMULO:
1784  case TargetOpcode::G_UMULO: {
1785  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1786  // result.
1787  Register Res = MI.getOperand(0).getReg();
1788  Register Overflow = MI.getOperand(1).getReg();
1789  Register LHS = MI.getOperand(2).getReg();
1790  Register RHS = MI.getOperand(3).getReg();
1791 
1792  MIRBuilder.buildMul(Res, LHS, RHS);
1793 
1794  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1795  ? TargetOpcode::G_SMULH
1796  : TargetOpcode::G_UMULH;
1797 
1798  Register HiPart = MRI.createGenericVirtualRegister(Ty);
1799  MIRBuilder.buildInstr(Opcode)
1800  .addDef(HiPart)
1801  .addUse(LHS)
1802  .addUse(RHS);
1803 
1804  Register Zero = MRI.createGenericVirtualRegister(Ty);
1805  MIRBuilder.buildConstant(Zero, 0);
1806 
1807  // For *signed* multiply, overflow is detected by checking:
1808  // (hi != (lo >> bitwidth-1))
1809  if (Opcode == TargetOpcode::G_SMULH) {
1810  Register Shifted = MRI.createGenericVirtualRegister(Ty);
1811  Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1812  MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1813  MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1814  .addDef(Shifted)
1815  .addUse(Res)
1816  .addUse(ShiftAmt);
1817  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1818  } else {
1819  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1820  }
1821  MI.eraseFromParent();
1822  return Legalized;
1823  }
1824  case TargetOpcode::G_FNEG: {
1825  // TODO: Handle vector types once we are able to
1826  // represent them.
1827  if (Ty.isVector())
1828  return UnableToLegalize;
1829  Register Res = MI.getOperand(0).getReg();
1830  Type *ZeroTy;
1832  switch (Ty.getSizeInBits()) {
1833  case 16:
1834  ZeroTy = Type::getHalfTy(Ctx);
1835  break;
1836  case 32:
1837  ZeroTy = Type::getFloatTy(Ctx);
1838  break;
1839  case 64:
1840  ZeroTy = Type::getDoubleTy(Ctx);
1841  break;
1842  case 128:
1843  ZeroTy = Type::getFP128Ty(Ctx);
1844  break;
1845  default:
1846  llvm_unreachable("unexpected floating-point type");
1847  }
1848  ConstantFP &ZeroForNegation =
1849  *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1850  auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1851  Register SubByReg = MI.getOperand(1).getReg();
1852  Register ZeroReg = Zero->getOperand(0).getReg();
1853  MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1854  MI.getFlags());
1855  MI.eraseFromParent();
1856  return Legalized;
1857  }
1858  case TargetOpcode::G_FSUB: {
1859  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1860  // First, check if G_FNEG is marked as Lower. If so, we may
1861  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1862  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1863  return UnableToLegalize;
1864  Register Res = MI.getOperand(0).getReg();
1865  Register LHS = MI.getOperand(1).getReg();
1866  Register RHS = MI.getOperand(2).getReg();
1867  Register Neg = MRI.createGenericVirtualRegister(Ty);
1868  MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1869  MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
1870  MI.eraseFromParent();
1871  return Legalized;
1872  }
1873  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1874  Register OldValRes = MI.getOperand(0).getReg();
1875  Register SuccessRes = MI.getOperand(1).getReg();
1876  Register Addr = MI.getOperand(2).getReg();
1877  Register CmpVal = MI.getOperand(3).getReg();
1878  Register NewVal = MI.getOperand(4).getReg();
1879  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1880  **MI.memoperands_begin());
1881  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1882  MI.eraseFromParent();
1883  return Legalized;
1884  }
1885  case TargetOpcode::G_LOAD:
1886  case TargetOpcode::G_SEXTLOAD:
1887  case TargetOpcode::G_ZEXTLOAD: {
1888  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1889  Register DstReg = MI.getOperand(0).getReg();
1890  Register PtrReg = MI.getOperand(1).getReg();
1891  LLT DstTy = MRI.getType(DstReg);
1892  auto &MMO = **MI.memoperands_begin();
1893 
1894  if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
1895  if (MI.getOpcode() == TargetOpcode::G_LOAD) {
1896  // This load needs splitting into power of 2 sized loads.
1897  if (DstTy.isVector())
1898  return UnableToLegalize;
1899  if (isPowerOf2_32(DstTy.getSizeInBits()))
1900  return UnableToLegalize; // Don't know what we're being asked to do.
1901 
1902  // Our strategy here is to generate anyextending loads for the smaller
1903  // types up to next power-2 result type, and then combine the two larger
1904  // result values together, before truncating back down to the non-pow-2
1905  // type.
1906  // E.g. v1 = i24 load =>
1907  // v2 = i32 load (2 byte)
1908  // v3 = i32 load (1 byte)
1909  // v4 = i32 shl v3, 16
1910  // v5 = i32 or v4, v2
1911  // v1 = i24 trunc v5
1912  // By doing this we generate the correct truncate which should get
1913  // combined away as an artifact with a matching extend.
1914  uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
1915  uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
1916 
1918  MachineMemOperand *LargeMMO =
1919  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
1920  MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
1921  &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
1922 
1923  LLT PtrTy = MRI.getType(PtrReg);
1924  unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
1925  LLT AnyExtTy = LLT::scalar(AnyExtSize);
1926  Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
1927  Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
1928  auto LargeLoad =
1929  MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
1930 
1931  auto OffsetCst =
1932  MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
1933  Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
1934  auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
1935  auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
1936  *SmallMMO);
1937 
1938  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
1939  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
1940  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
1941  MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
1942  MI.eraseFromParent();
1943  return Legalized;
1944  }
1945  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1946  MI.eraseFromParent();
1947  return Legalized;
1948  }
1949 
1950  if (DstTy.isScalar()) {
1951  Register TmpReg =
1952  MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
1953  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1954  switch (MI.getOpcode()) {
1955  default:
1956  llvm_unreachable("Unexpected opcode");
1957  case TargetOpcode::G_LOAD:
1958  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1959  break;
1960  case TargetOpcode::G_SEXTLOAD:
1961  MIRBuilder.buildSExt(DstReg, TmpReg);
1962  break;
1963  case TargetOpcode::G_ZEXTLOAD:
1964  MIRBuilder.buildZExt(DstReg, TmpReg);
1965  break;
1966  }
1967  MI.eraseFromParent();
1968  return Legalized;
1969  }
1970 
1971  return UnableToLegalize;
1972  }
1973  case TargetOpcode::G_STORE: {
1974  // Lower a non-power of 2 store into multiple pow-2 stores.
1975  // E.g. split an i24 store into an i16 store + i8 store.
1976  // We do this by first extending the stored value to the next largest power
1977  // of 2 type, and then using truncating stores to store the components.
1978  // By doing this, likewise with G_LOAD, generate an extend that can be
1979  // artifact-combined away instead of leaving behind extracts.
1980  Register SrcReg = MI.getOperand(0).getReg();
1981  Register PtrReg = MI.getOperand(1).getReg();
1982  LLT SrcTy = MRI.getType(SrcReg);
1983  MachineMemOperand &MMO = **MI.memoperands_begin();
1984  if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
1985  return UnableToLegalize;
1986  if (SrcTy.isVector())
1987  return UnableToLegalize;
1988  if (isPowerOf2_32(SrcTy.getSizeInBits()))
1989  return UnableToLegalize; // Don't know what we're being asked to do.
1990 
1991  // Extend to the next pow-2.
1992  const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
1993  auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
1994 
1995  // Obtain the smaller value by shifting away the larger value.
1996  uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
1997  uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
1998  auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
1999  auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2000 
2001  // Generate the GEP and truncating stores.
2002  LLT PtrTy = MRI.getType(PtrReg);
2003  auto OffsetCst =
2004  MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2005  Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
2006  auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
2007 
2009  MachineMemOperand *LargeMMO =
2010  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2011  MachineMemOperand *SmallMMO =
2012  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2013  MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2014  MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2015  MI.eraseFromParent();
2016  return Legalized;
2017  }
2018  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2019  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2020  case TargetOpcode::G_CTLZ:
2021  case TargetOpcode::G_CTTZ:
2022  case TargetOpcode::G_CTPOP:
2023  return lowerBitCount(MI, TypeIdx, Ty);
2024  case G_UADDO: {
2025  Register Res = MI.getOperand(0).getReg();
2026  Register CarryOut = MI.getOperand(1).getReg();
2027  Register LHS = MI.getOperand(2).getReg();
2028  Register RHS = MI.getOperand(3).getReg();
2029 
2030  MIRBuilder.buildAdd(Res, LHS, RHS);
2031  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2032 
2033  MI.eraseFromParent();
2034  return Legalized;
2035  }
2036  case G_UADDE: {
2037  Register Res = MI.getOperand(0).getReg();
2038  Register CarryOut = MI.getOperand(1).getReg();
2039  Register LHS = MI.getOperand(2).getReg();
2040  Register RHS = MI.getOperand(3).getReg();
2041  Register CarryIn = MI.getOperand(4).getReg();
2042 
2043  Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2044  Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
2045 
2046  MIRBuilder.buildAdd(TmpRes, LHS, RHS);
2047  MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
2048  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2049  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2050 
2051  MI.eraseFromParent();
2052  return Legalized;
2053  }
2054  case G_USUBO: {
2055  Register Res = MI.getOperand(0).getReg();
2056  Register BorrowOut = MI.getOperand(1).getReg();
2057  Register LHS = MI.getOperand(2).getReg();
2058  Register RHS = MI.getOperand(3).getReg();
2059 
2060  MIRBuilder.buildSub(Res, LHS, RHS);
2061  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2062 
2063  MI.eraseFromParent();
2064  return Legalized;
2065  }
2066  case G_USUBE: {
2067  Register Res = MI.getOperand(0).getReg();
2068  Register BorrowOut = MI.getOperand(1).getReg();
2069  Register LHS = MI.getOperand(2).getReg();
2070  Register RHS = MI.getOperand(3).getReg();
2071  Register BorrowIn = MI.getOperand(4).getReg();
2072 
2073  Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2074  Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
2075  Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2076  Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2077 
2078  MIRBuilder.buildSub(TmpRes, LHS, RHS);
2079  MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
2080  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2081  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
2082  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
2083  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2084 
2085  MI.eraseFromParent();
2086  return Legalized;
2087  }
2088  case G_UITOFP:
2089  return lowerUITOFP(MI, TypeIdx, Ty);
2090  case G_SITOFP:
2091  return lowerSITOFP(MI, TypeIdx, Ty);
2092  case G_SMIN:
2093  case G_SMAX:
2094  case G_UMIN:
2095  case G_UMAX:
2096  return lowerMinMax(MI, TypeIdx, Ty);
2097  case G_FCOPYSIGN:
2098  return lowerFCopySign(MI, TypeIdx, Ty);
2099  case G_FMINNUM:
2100  case G_FMAXNUM:
2101  return lowerFMinNumMaxNum(MI);
2102  case G_UNMERGE_VALUES:
2103  return lowerUnmergeValues(MI);
2104  case TargetOpcode::G_SEXT_INREG: {
2105  assert(MI.getOperand(2).isImm() && "Expected immediate");
2106  int64_t SizeInBits = MI.getOperand(2).getImm();
2107 
2108  Register DstReg = MI.getOperand(0).getReg();
2109  Register SrcReg = MI.getOperand(1).getReg();
2110  LLT DstTy = MRI.getType(DstReg);
2111  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2112 
2113  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2114  MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
2115  MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
2116  MI.eraseFromParent();
2117  return Legalized;
2118  }
2119  case G_SHUFFLE_VECTOR:
2120  return lowerShuffleVector(MI);
2121  }
2122 }
2123 
2125  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2126  SmallVector<Register, 2> DstRegs;
2127 
2128  unsigned NarrowSize = NarrowTy.getSizeInBits();
2129  Register DstReg = MI.getOperand(0).getReg();
2130  unsigned Size = MRI.getType(DstReg).getSizeInBits();
2131  int NumParts = Size / NarrowSize;
2132  // FIXME: Don't know how to handle the situation where the small vectors
2133  // aren't all the same size yet.
2134  if (Size % NarrowSize != 0)
2135  return UnableToLegalize;
2136 
2137  for (int i = 0; i < NumParts; ++i) {
2138  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2139  MIRBuilder.buildUndef(TmpReg);
2140  DstRegs.push_back(TmpReg);
2141  }
2142 
2143  if (NarrowTy.isVector())
2144  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2145  else
2146  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2147 
2148  MI.eraseFromParent();
2149  return Legalized;
2150 }
2151 
2154  LLT NarrowTy) {
2155  const unsigned Opc = MI.getOpcode();
2156  const unsigned NumOps = MI.getNumOperands() - 1;
2157  const unsigned NarrowSize = NarrowTy.getSizeInBits();
2158  const Register DstReg = MI.getOperand(0).getReg();
2159  const unsigned Flags = MI.getFlags();
2160  const LLT DstTy = MRI.getType(DstReg);
2161  const unsigned Size = DstTy.getSizeInBits();
2162  const int NumParts = Size / NarrowSize;
2163  const LLT EltTy = DstTy.getElementType();
2164  const unsigned EltSize = EltTy.getSizeInBits();
2165  const unsigned BitsForNumParts = NarrowSize * NumParts;
2166 
2167  // Check if we have any leftovers. If we do, then only handle the case where
2168  // the leftover is one element.
2169  if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
2170  return UnableToLegalize;
2171 
2172  if (BitsForNumParts != Size) {
2173  Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
2174  MIRBuilder.buildUndef(AccumDstReg);
2175 
2176  // Handle the pieces which evenly divide into the requested type with
2177  // extract/op/insert sequence.
2178  for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
2179  SmallVector<SrcOp, 4> SrcOps;
2180  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2181  Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
2182  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
2183  SrcOps.push_back(PartOpReg);
2184  }
2185 
2186  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
2187  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2188 
2189  Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
2190  MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
2191  AccumDstReg = PartInsertReg;
2192  }
2193 
2194  // Handle the remaining element sized leftover piece.
2195  SmallVector<SrcOp, 4> SrcOps;
2196  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2197  Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
2198  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
2199  BitsForNumParts);
2200  SrcOps.push_back(PartOpReg);
2201  }
2202 
2203  Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
2204  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2205  MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
2206  MI.eraseFromParent();
2207 
2208  return Legalized;
2209  }
2210 
2211  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2212 
2213  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
2214 
2215  if (NumOps >= 2)
2216  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
2217 
2218  if (NumOps >= 3)
2219  extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
2220 
2221  for (int i = 0; i < NumParts; ++i) {
2222  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2223 
2224  if (NumOps == 1)
2225  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
2226  else if (NumOps == 2) {
2227  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
2228  } else if (NumOps == 3) {
2229  MIRBuilder.buildInstr(Opc, {DstReg},
2230  {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
2231  }
2232 
2233  DstRegs.push_back(DstReg);
2234  }
2235 
2236  if (NarrowTy.isVector())
2237  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2238  else
2239  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2240 
2241  MI.eraseFromParent();
2242  return Legalized;
2243 }
2244 
2245 // Handle splitting vector operations which need to have the same number of
2246 // elements in each type index, but each type index may have a different element
2247 // type.
2248 //
2249 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2250 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2251 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2252 //
2253 // Also handles some irregular breakdown cases, e.g.
2254 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2255 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2256 // s64 = G_SHL s64, s32
2259  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2260  if (TypeIdx != 0)
2261  return UnableToLegalize;
2262 
2263  const LLT NarrowTy0 = NarrowTyArg;
2264  const unsigned NewNumElts =
2265  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2266 
2267  const Register DstReg = MI.getOperand(0).getReg();
2268  LLT DstTy = MRI.getType(DstReg);
2269  LLT LeftoverTy0;
2270 
2271  // All of the operands need to have the same number of elements, so if we can
2272  // determine a type breakdown for the result type, we can for all of the
2273  // source types.
2274  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2275  if (NumParts < 0)
2276  return UnableToLegalize;
2277 
2279 
2280  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2281  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2282 
2283  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2284  LLT LeftoverTy;
2285  Register SrcReg = MI.getOperand(I).getReg();
2286  LLT SrcTyI = MRI.getType(SrcReg);
2287  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2288  LLT LeftoverTyI;
2289 
2290  // Split this operand into the requested typed registers, and any leftover
2291  // required to reproduce the original type.
2292  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2293  LeftoverRegs))
2294  return UnableToLegalize;
2295 
2296  if (I == 1) {
2297  // For the first operand, create an instruction for each part and setup
2298  // the result.
2299  for (Register PartReg : PartRegs) {
2300  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2302  .addDef(PartDstReg)
2303  .addUse(PartReg));
2304  DstRegs.push_back(PartDstReg);
2305  }
2306 
2307  for (Register LeftoverReg : LeftoverRegs) {
2308  Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2310  .addDef(PartDstReg)
2311  .addUse(LeftoverReg));
2312  LeftoverDstRegs.push_back(PartDstReg);
2313  }
2314  } else {
2315  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2316 
2317  // Add the newly created operand splits to the existing instructions. The
2318  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2319  // pieces.
2320  unsigned InstCount = 0;
2321  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2322  NewInsts[InstCount++].addUse(PartRegs[J]);
2323  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2324  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2325  }
2326 
2327  PartRegs.clear();
2328  LeftoverRegs.clear();
2329  }
2330 
2331  // Insert the newly built operations and rebuild the result register.
2332  for (auto &MIB : NewInsts)
2333  MIRBuilder.insertInstr(MIB);
2334 
2335  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2336 
2337  MI.eraseFromParent();
2338  return Legalized;
2339 }
2340 
2343  LLT NarrowTy) {
2344  if (TypeIdx != 0)
2345  return UnableToLegalize;
2346 
2347  Register DstReg = MI.getOperand(0).getReg();
2348  Register SrcReg = MI.getOperand(1).getReg();
2349  LLT DstTy = MRI.getType(DstReg);
2350  LLT SrcTy = MRI.getType(SrcReg);
2351 
2352  LLT NarrowTy0 = NarrowTy;
2353  LLT NarrowTy1;
2354  unsigned NumParts;
2355 
2356  if (NarrowTy.isVector()) {
2357  // Uneven breakdown not handled.
2358  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2359  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2360  return UnableToLegalize;
2361 
2362  NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2363  } else {
2364  NumParts = DstTy.getNumElements();
2365  NarrowTy1 = SrcTy.getElementType();
2366  }
2367 
2368  SmallVector<Register, 4> SrcRegs, DstRegs;
2369  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2370 
2371  for (unsigned I = 0; I < NumParts; ++I) {
2372  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2373  MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2374  .addDef(DstReg)
2375  .addUse(SrcRegs[I]);
2376 
2377  NewInst->setFlags(MI.getFlags());
2378  DstRegs.push_back(DstReg);
2379  }
2380 
2381  if (NarrowTy.isVector())
2382  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2383  else
2384  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2385 
2386  MI.eraseFromParent();
2387  return Legalized;
2388 }
2389 
2392  LLT NarrowTy) {
2393  Register DstReg = MI.getOperand(0).getReg();
2394  Register Src0Reg = MI.getOperand(2).getReg();
2395  LLT DstTy = MRI.getType(DstReg);
2396  LLT SrcTy = MRI.getType(Src0Reg);
2397 
2398  unsigned NumParts;
2399  LLT NarrowTy0, NarrowTy1;
2400 
2401  if (TypeIdx == 0) {
2402  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2403  unsigned OldElts = DstTy.getNumElements();
2404 
2405  NarrowTy0 = NarrowTy;
2406  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2407  NarrowTy1 = NarrowTy.isVector() ?
2408  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2409  SrcTy.getElementType();
2410 
2411  } else {
2412  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2413  unsigned OldElts = SrcTy.getNumElements();
2414 
2415  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2416  NarrowTy.getNumElements();
2417  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2418  DstTy.getScalarSizeInBits());
2419  NarrowTy1 = NarrowTy;
2420  }
2421 
2422  // FIXME: Don't know how to handle the situation where the small vectors
2423  // aren't all the same size yet.
2424  if (NarrowTy1.isVector() &&
2425  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2426  return UnableToLegalize;
2427 
2428  CmpInst::Predicate Pred
2429  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2430 
2431  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2432  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2433  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2434 
2435  for (unsigned I = 0; I < NumParts; ++I) {
2436  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2437  DstRegs.push_back(DstReg);
2438 
2439  if (MI.getOpcode() == TargetOpcode::G_ICMP)
2440  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2441  else {
2442  MachineInstr *NewCmp
2443  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2444  NewCmp->setFlags(MI.getFlags());
2445  }
2446  }
2447 
2448  if (NarrowTy1.isVector())
2449  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2450  else
2451  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2452 
2453  MI.eraseFromParent();
2454  return Legalized;
2455 }
2456 
2459  LLT NarrowTy) {
2460  Register DstReg = MI.getOperand(0).getReg();
2461  Register CondReg = MI.getOperand(1).getReg();
2462 
2463  unsigned NumParts = 0;
2464  LLT NarrowTy0, NarrowTy1;
2465 
2466  LLT DstTy = MRI.getType(DstReg);
2467  LLT CondTy = MRI.getType(CondReg);
2468  unsigned Size = DstTy.getSizeInBits();
2469 
2470  assert(TypeIdx == 0 || CondTy.isVector());
2471 
2472  if (TypeIdx == 0) {
2473  NarrowTy0 = NarrowTy;
2474  NarrowTy1 = CondTy;
2475 
2476  unsigned NarrowSize = NarrowTy0.getSizeInBits();
2477  // FIXME: Don't know how to handle the situation where the small vectors
2478  // aren't all the same size yet.
2479  if (Size % NarrowSize != 0)
2480  return UnableToLegalize;
2481 
2482  NumParts = Size / NarrowSize;
2483 
2484  // Need to break down the condition type
2485  if (CondTy.isVector()) {
2486  if (CondTy.getNumElements() == NumParts)
2487  NarrowTy1 = CondTy.getElementType();
2488  else
2489  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2490  CondTy.getScalarSizeInBits());
2491  }
2492  } else {
2493  NumParts = CondTy.getNumElements();
2494  if (NarrowTy.isVector()) {
2495  // TODO: Handle uneven breakdown.
2496  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2497  return UnableToLegalize;
2498 
2499  return UnableToLegalize;
2500  } else {
2501  NarrowTy0 = DstTy.getElementType();
2502  NarrowTy1 = NarrowTy;
2503  }
2504  }
2505 
2506  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2507  if (CondTy.isVector())
2508  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2509 
2510  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2511  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2512 
2513  for (unsigned i = 0; i < NumParts; ++i) {
2514  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2515  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2516  Src1Regs[i], Src2Regs[i]);
2517  DstRegs.push_back(DstReg);
2518  }
2519 
2520  if (NarrowTy0.isVector())
2521  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2522  else
2523  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2524 
2525  MI.eraseFromParent();
2526  return Legalized;
2527 }
2528 
2531  LLT NarrowTy) {
2532  const Register DstReg = MI.getOperand(0).getReg();
2533  LLT PhiTy = MRI.getType(DstReg);
2534  LLT LeftoverTy;
2535 
2536  // All of the operands need to have the same number of elements, so if we can
2537  // determine a type breakdown for the result type, we can for all of the
2538  // source types.
2539  int NumParts, NumLeftover;
2540  std::tie(NumParts, NumLeftover)
2541  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2542  if (NumParts < 0)
2543  return UnableToLegalize;
2544 
2545  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2547 
2548  const int TotalNumParts = NumParts + NumLeftover;
2549 
2550  // Insert the new phis in the result block first.
2551  for (int I = 0; I != TotalNumParts; ++I) {
2552  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2553  Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2554  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2555  .addDef(PartDstReg));
2556  if (I < NumParts)
2557  DstRegs.push_back(PartDstReg);
2558  else
2559  LeftoverDstRegs.push_back(PartDstReg);
2560  }
2561 
2562  MachineBasicBlock *MBB = MI.getParent();
2563  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2564  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2565 
2566  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2567 
2568  // Insert code to extract the incoming values in each predecessor block.
2569  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2570  PartRegs.clear();
2571  LeftoverRegs.clear();
2572 
2573  Register SrcReg = MI.getOperand(I).getReg();
2574  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2575  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2576 
2577  LLT Unused;
2578  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2579  LeftoverRegs))
2580  return UnableToLegalize;
2581 
2582  // Add the newly created operand splits to the existing instructions. The
2583  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2584  // pieces.
2585  for (int J = 0; J != TotalNumParts; ++J) {
2586  MachineInstrBuilder MIB = NewInsts[J];
2587  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2588  MIB.addMBB(&OpMBB);
2589  }
2590  }
2591 
2592  MI.eraseFromParent();
2593  return Legalized;
2594 }
2595 
2598  unsigned TypeIdx,
2599  LLT NarrowTy) {
2600  if (TypeIdx != 1)
2601  return UnableToLegalize;
2602 
2603  const int NumDst = MI.getNumOperands() - 1;
2604  const Register SrcReg = MI.getOperand(NumDst).getReg();
2605  LLT SrcTy = MRI.getType(SrcReg);
2606 
2607  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2608 
2609  // TODO: Create sequence of extracts.
2610  if (DstTy == NarrowTy)
2611  return UnableToLegalize;
2612 
2613  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
2614  if (DstTy == GCDTy) {
2615  // This would just be a copy of the same unmerge.
2616  // TODO: Create extracts, pad with undef and create intermediate merges.
2617  return UnableToLegalize;
2618  }
2619 
2620  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2621  const int NumUnmerge = Unmerge->getNumOperands() - 1;
2622  const int PartsPerUnmerge = NumDst / NumUnmerge;
2623 
2624  for (int I = 0; I != NumUnmerge; ++I) {
2625  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2626 
2627  for (int J = 0; J != PartsPerUnmerge; ++J)
2628  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
2629  MIB.addUse(Unmerge.getReg(I));
2630  }
2631 
2632  MI.eraseFromParent();
2633  return Legalized;
2634 }
2635 
2638  LLT NarrowTy) {
2639  // FIXME: Don't know how to handle secondary types yet.
2640  if (TypeIdx != 0)
2641  return UnableToLegalize;
2642 
2643  MachineMemOperand *MMO = *MI.memoperands_begin();
2644 
2645  // This implementation doesn't work for atomics. Give up instead of doing
2646  // something invalid.
2647  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2649  return UnableToLegalize;
2650 
2651  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2652  Register ValReg = MI.getOperand(0).getReg();
2653  Register AddrReg = MI.getOperand(1).getReg();
2654  LLT ValTy = MRI.getType(ValReg);
2655 
2656  int NumParts = -1;
2657  int NumLeftover = -1;
2658  LLT LeftoverTy;
2659  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2660  if (IsLoad) {
2661  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2662  } else {
2663  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2664  NarrowLeftoverRegs)) {
2665  NumParts = NarrowRegs.size();
2666  NumLeftover = NarrowLeftoverRegs.size();
2667  }
2668  }
2669 
2670  if (NumParts == -1)
2671  return UnableToLegalize;
2672 
2673  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2674 
2675  unsigned TotalSize = ValTy.getSizeInBits();
2676 
2677  // Split the load/store into PartTy sized pieces starting at Offset. If this
2678  // is a load, return the new registers in ValRegs. For a store, each elements
2679  // of ValRegs should be PartTy. Returns the next offset that needs to be
2680  // handled.
2681  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2682  unsigned Offset) -> unsigned {
2684  unsigned PartSize = PartTy.getSizeInBits();
2685  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2686  Offset += PartSize, ++Idx) {
2687  unsigned ByteSize = PartSize / 8;
2688  unsigned ByteOffset = Offset / 8;
2689  Register NewAddrReg;
2690 
2691  MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2692 
2693  MachineMemOperand *NewMMO =
2694  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2695 
2696  if (IsLoad) {
2697  Register Dst = MRI.createGenericVirtualRegister(PartTy);
2698  ValRegs.push_back(Dst);
2699  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2700  } else {
2701  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2702  }
2703  }
2704 
2705  return Offset;
2706  };
2707 
2708  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2709 
2710  // Handle the rest of the register if this isn't an even type breakdown.
2711  if (LeftoverTy.isValid())
2712  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2713 
2714  if (IsLoad) {
2715  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2716  LeftoverTy, NarrowLeftoverRegs);
2717  }
2718 
2719  MI.eraseFromParent();
2720  return Legalized;
2721 }
2722 
2725  LLT NarrowTy) {
2726  using namespace TargetOpcode;
2727 
2728  MIRBuilder.setInstr(MI);
2729  switch (MI.getOpcode()) {
2730  case G_IMPLICIT_DEF:
2731  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2732  case G_AND:
2733  case G_OR:
2734  case G_XOR:
2735  case G_ADD:
2736  case G_SUB:
2737  case G_MUL:
2738  case G_SMULH:
2739  case G_UMULH:
2740  case G_FADD:
2741  case G_FMUL:
2742  case G_FSUB:
2743  case G_FNEG:
2744  case G_FABS:
2745  case G_FCANONICALIZE:
2746  case G_FDIV:
2747  case G_FREM:
2748  case G_FMA:
2749  case G_FPOW:
2750  case G_FEXP:
2751  case G_FEXP2:
2752  case G_FLOG:
2753  case G_FLOG2:
2754  case G_FLOG10:
2755  case G_FNEARBYINT:
2756  case G_FCEIL:
2757  case G_FFLOOR:
2758  case G_FRINT:
2759  case G_INTRINSIC_ROUND:
2760  case G_INTRINSIC_TRUNC:
2761  case G_FCOS:
2762  case G_FSIN:
2763  case G_FSQRT:
2764  case G_BSWAP:
2765  case G_SDIV:
2766  case G_SMIN:
2767  case G_SMAX:
2768  case G_UMIN:
2769  case G_UMAX:
2770  case G_FMINNUM:
2771  case G_FMAXNUM:
2772  case G_FMINNUM_IEEE:
2773  case G_FMAXNUM_IEEE:
2774  case G_FMINIMUM:
2775  case G_FMAXIMUM:
2776  return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2777  case G_SHL:
2778  case G_LSHR:
2779  case G_ASHR:
2780  case G_CTLZ:
2781  case G_CTLZ_ZERO_UNDEF:
2782  case G_CTTZ:
2783  case G_CTTZ_ZERO_UNDEF:
2784  case G_CTPOP:
2785  case G_FCOPYSIGN:
2786  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2787  case G_ZEXT:
2788  case G_SEXT:
2789  case G_ANYEXT:
2790  case G_FPEXT:
2791  case G_FPTRUNC:
2792  case G_SITOFP:
2793  case G_UITOFP:
2794  case G_FPTOSI:
2795  case G_FPTOUI:
2796  case G_INTTOPTR:
2797  case G_PTRTOINT:
2798  case G_ADDRSPACE_CAST:
2799  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2800  case G_ICMP:
2801  case G_FCMP:
2802  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2803  case G_SELECT:
2804  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2805  case G_PHI:
2806  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
2807  case G_UNMERGE_VALUES:
2808  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
2809  case G_LOAD:
2810  case G_STORE:
2811  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2812  default:
2813  return UnableToLegalize;
2814  }
2815 }
2816 
2819  const LLT HalfTy, const LLT AmtTy) {
2820 
2821  Register InL = MRI.createGenericVirtualRegister(HalfTy);
2822  Register InH = MRI.createGenericVirtualRegister(HalfTy);
2823  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2824 
2825  if (Amt.isNullValue()) {
2826  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2827  MI.eraseFromParent();
2828  return Legalized;
2829  }
2830 
2831  LLT NVT = HalfTy;
2832  unsigned NVTBits = HalfTy.getSizeInBits();
2833  unsigned VTBits = 2 * NVTBits;
2834 
2835  SrcOp Lo(Register(0)), Hi(Register(0));
2836  if (MI.getOpcode() == TargetOpcode::G_SHL) {
2837  if (Amt.ugt(VTBits)) {
2838  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2839  } else if (Amt.ugt(NVTBits)) {
2840  Lo = MIRBuilder.buildConstant(NVT, 0);
2841  Hi = MIRBuilder.buildShl(NVT, InL,
2842  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2843  } else if (Amt == NVTBits) {
2844  Lo = MIRBuilder.buildConstant(NVT, 0);
2845  Hi = InL;
2846  } else {
2847  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2848  auto OrLHS =
2849  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2850  auto OrRHS = MIRBuilder.buildLShr(
2851  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2852  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2853  }
2854  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2855  if (Amt.ugt(VTBits)) {
2856  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2857  } else if (Amt.ugt(NVTBits)) {
2858  Lo = MIRBuilder.buildLShr(NVT, InH,
2859  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2860  Hi = MIRBuilder.buildConstant(NVT, 0);
2861  } else if (Amt == NVTBits) {
2862  Lo = InH;
2863  Hi = MIRBuilder.buildConstant(NVT, 0);
2864  } else {
2865  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2866 
2867  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2868  auto OrRHS = MIRBuilder.buildShl(
2869  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2870 
2871  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2872  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2873  }
2874  } else {
2875  if (Amt.ugt(VTBits)) {
2876  Hi = Lo = MIRBuilder.buildAShr(
2877  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2878  } else if (Amt.ugt(NVTBits)) {
2879  Lo = MIRBuilder.buildAShr(NVT, InH,
2880  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2881  Hi = MIRBuilder.buildAShr(NVT, InH,
2882  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2883  } else if (Amt == NVTBits) {
2884  Lo = InH;
2885  Hi = MIRBuilder.buildAShr(NVT, InH,
2886  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2887  } else {
2888  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2889 
2890  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2891  auto OrRHS = MIRBuilder.buildShl(
2892  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2893 
2894  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2895  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2896  }
2897  }
2898 
2899  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2900  MI.eraseFromParent();
2901 
2902  return Legalized;
2903 }
2904 
2905 // TODO: Optimize if constant shift amount.
2908  LLT RequestedTy) {
2909  if (TypeIdx == 1) {
2910  Observer.changingInstr(MI);
2911  narrowScalarSrc(MI, RequestedTy, 2);
2912  Observer.changedInstr(MI);
2913  return Legalized;
2914  }
2915 
2916  Register DstReg = MI.getOperand(0).getReg();
2917  LLT DstTy = MRI.getType(DstReg);
2918  if (DstTy.isVector())
2919  return UnableToLegalize;
2920 
2921  Register Amt = MI.getOperand(2).getReg();
2922  LLT ShiftAmtTy = MRI.getType(Amt);
2923  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2924  if (DstEltSize % 2 != 0)
2925  return UnableToLegalize;
2926 
2927  // Ignore the input type. We can only go to exactly half the size of the
2928  // input. If that isn't small enough, the resulting pieces will be further
2929  // legalized.
2930  const unsigned NewBitSize = DstEltSize / 2;
2931  const LLT HalfTy = LLT::scalar(NewBitSize);
2932  const LLT CondTy = LLT::scalar(1);
2933 
2934  if (const MachineInstr *KShiftAmt =
2935  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2937  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2938  }
2939 
2940  // TODO: Expand with known bits.
2941 
2942  // Handle the fully general expansion by an unknown amount.
2943  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2944 
2945  Register InL = MRI.createGenericVirtualRegister(HalfTy);
2946  Register InH = MRI.createGenericVirtualRegister(HalfTy);
2947  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2948 
2949  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2950  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2951 
2952  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2953  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2954  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2955 
2956  Register ResultRegs[2];
2957  switch (MI.getOpcode()) {
2958  case TargetOpcode::G_SHL: {
2959  // Short: ShAmt < NewBitSize
2960  auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2961 
2962  auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2963  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2964  auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2965 
2966  // Long: ShAmt >= NewBitSize
2967  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
2968  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2969 
2970  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2971  auto Hi = MIRBuilder.buildSelect(
2972  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2973 
2974  ResultRegs[0] = Lo.getReg(0);
2975  ResultRegs[1] = Hi.getReg(0);
2976  break;
2977  }
2978  case TargetOpcode::G_LSHR: {
2979  // Short: ShAmt < NewBitSize
2980  auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2981 
2982  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2983  auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2984  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2985 
2986  // Long: ShAmt >= NewBitSize
2987  auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
2988  auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2989 
2990  auto Lo = MIRBuilder.buildSelect(
2991  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2992  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2993 
2994  ResultRegs[0] = Lo.getReg(0);
2995  ResultRegs[1] = Hi.getReg(0);
2996  break;
2997  }
2998  case TargetOpcode::G_ASHR: {
2999  // Short: ShAmt < NewBitSize
3000  auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
3001 
3002  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3003  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
3004  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
3005 
3006  // Long: ShAmt >= NewBitSize
3007 
3008  // Sign of Hi part.
3009  auto HiL = MIRBuilder.buildAShr(
3010  HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
3011 
3012  auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
3013 
3014  auto Lo = MIRBuilder.buildSelect(
3015  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3016 
3017  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3018 
3019  ResultRegs[0] = Lo.getReg(0);
3020  ResultRegs[1] = Hi.getReg(0);
3021  break;
3022  }
3023  default:
3024  llvm_unreachable("not a shift");
3025  }
3026 
3027  MIRBuilder.buildMerge(DstReg, ResultRegs);
3028  MI.eraseFromParent();
3029  return Legalized;
3030 }
3031 
3034  LLT MoreTy) {
3035  assert(TypeIdx == 0 && "Expecting only Idx 0");
3036 
3037  Observer.changingInstr(MI);
3038  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3039  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3040  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3041  moreElementsVectorSrc(MI, MoreTy, I);
3042  }
3043 
3044  MachineBasicBlock &MBB = *MI.getParent();
3045  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3046  moreElementsVectorDst(MI, MoreTy, 0);
3047  Observer.changedInstr(MI);
3048  return Legalized;
3049 }
3050 
3053  LLT MoreTy) {
3054  MIRBuilder.setInstr(MI);
3055  unsigned Opc = MI.getOpcode();
3056  switch (Opc) {
3057  case TargetOpcode::G_IMPLICIT_DEF:
3058  case TargetOpcode::G_LOAD: {
3059  if (TypeIdx != 0)
3060  return UnableToLegalize;
3061  Observer.changingInstr(MI);
3062  moreElementsVectorDst(MI, MoreTy, 0);
3063  Observer.changedInstr(MI);
3064  return Legalized;
3065  }
3066  case TargetOpcode::G_STORE:
3067  if (TypeIdx != 0)
3068  return UnableToLegalize;
3069  Observer.changingInstr(MI);
3070  moreElementsVectorSrc(MI, MoreTy, 0);
3071  Observer.changedInstr(MI);
3072  return Legalized;
3073  case TargetOpcode::G_AND:
3074  case TargetOpcode::G_OR:
3075  case TargetOpcode::G_XOR:
3076  case TargetOpcode::G_SMIN:
3077  case TargetOpcode::G_SMAX:
3078  case TargetOpcode::G_UMIN:
3079  case TargetOpcode::G_UMAX: {
3080  Observer.changingInstr(MI);
3081  moreElementsVectorSrc(MI, MoreTy, 1);
3082  moreElementsVectorSrc(MI, MoreTy, 2);
3083  moreElementsVectorDst(MI, MoreTy, 0);
3084  Observer.changedInstr(MI);
3085  return Legalized;
3086  }
3087  case TargetOpcode::G_EXTRACT:
3088  if (TypeIdx != 1)
3089  return UnableToLegalize;
3090  Observer.changingInstr(MI);
3091  moreElementsVectorSrc(MI, MoreTy, 1);
3092  Observer.changedInstr(MI);
3093  return Legalized;
3094  case TargetOpcode::G_INSERT:
3095  if (TypeIdx != 0)
3096  return UnableToLegalize;
3097  Observer.changingInstr(MI);
3098  moreElementsVectorSrc(MI, MoreTy, 1);
3099  moreElementsVectorDst(MI, MoreTy, 0);
3100  Observer.changedInstr(MI);
3101  return Legalized;
3102  case TargetOpcode::G_SELECT:
3103  if (TypeIdx != 0)
3104  return UnableToLegalize;
3105  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3106  return UnableToLegalize;
3107 
3108  Observer.changingInstr(MI);
3109  moreElementsVectorSrc(MI, MoreTy, 2);
3110  moreElementsVectorSrc(MI, MoreTy, 3);
3111  moreElementsVectorDst(MI, MoreTy, 0);
3112  Observer.changedInstr(MI);
3113  return Legalized;
3114  case TargetOpcode::G_PHI:
3115  return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3116  default:
3117  return UnableToLegalize;
3118  }
3119 }
3120 
3121 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3122  ArrayRef<Register> Src1Regs,
3123  ArrayRef<Register> Src2Regs,
3124  LLT NarrowTy) {
3126  unsigned SrcParts = Src1Regs.size();
3127  unsigned DstParts = DstRegs.size();
3128 
3129  unsigned DstIdx = 0; // Low bits of the result.
3130  Register FactorSum =
3131  B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3132  DstRegs[DstIdx] = FactorSum;
3133 
3134  unsigned CarrySumPrevDstIdx;
3135  SmallVector<Register, 4> Factors;
3136 
3137  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3138  // Collect low parts of muls for DstIdx.
3139  for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3140  i <= std::min(DstIdx, SrcParts - 1); ++i) {
3141  MachineInstrBuilder Mul =
3142  B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3143  Factors.push_back(Mul.getReg(0));
3144  }
3145  // Collect high parts of muls from previous DstIdx.
3146  for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3147  i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3148  MachineInstrBuilder Umulh =
3149  B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3150  Factors.push_back(Umulh.getReg(0));
3151  }
3152  // Add CarrySum from additons calculated for previous DstIdx.
3153  if (DstIdx != 1) {
3154  Factors.push_back(CarrySumPrevDstIdx);
3155  }
3156 
3157  Register CarrySum;
3158  // Add all factors and accumulate all carries into CarrySum.
3159  if (DstIdx != DstParts - 1) {
3160  MachineInstrBuilder Uaddo =
3161  B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3162  FactorSum = Uaddo.getReg(0);
3163  CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3164  for (unsigned i = 2; i < Factors.size(); ++i) {
3165  MachineInstrBuilder Uaddo =
3166  B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3167  FactorSum = Uaddo.getReg(0);
3168  MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3169  CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3170  }
3171  } else {
3172  // Since value for the next index is not calculated, neither is CarrySum.
3173  FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3174  for (unsigned i = 2; i < Factors.size(); ++i)
3175  FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3176  }
3177 
3178  CarrySumPrevDstIdx = CarrySum;
3179  DstRegs[DstIdx] = FactorSum;
3180  Factors.clear();
3181  }
3182 }
3183 
3186  Register DstReg = MI.getOperand(0).getReg();
3187  Register Src1 = MI.getOperand(1).getReg();
3188  Register Src2 = MI.getOperand(2).getReg();
3189 
3190  LLT Ty = MRI.getType(DstReg);
3191  if (Ty.isVector())
3192  return UnableToLegalize;
3193 
3194  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3195  unsigned DstSize = Ty.getSizeInBits();
3196  unsigned NarrowSize = NarrowTy.getSizeInBits();
3197  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3198  return UnableToLegalize;
3199 
3200  unsigned NumDstParts = DstSize / NarrowSize;
3201  unsigned NumSrcParts = SrcSize / NarrowSize;
3202  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3203  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3204 
3205  SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
3206  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3207  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3208  DstTmpRegs.resize(DstTmpParts);
3209  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3210 
3211  // Take only high half of registers if this is high mul.
3212  ArrayRef<Register> DstRegs(
3213  IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3214  MIRBuilder.buildMerge(DstReg, DstRegs);
3215  MI.eraseFromParent();
3216  return Legalized;
3217 }
3218 
3221  LLT NarrowTy) {
3222  if (TypeIdx != 1)
3223  return UnableToLegalize;
3224 
3225  uint64_t NarrowSize = NarrowTy.getSizeInBits();
3226 
3227  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3228  // FIXME: add support for when SizeOp1 isn't an exact multiple of
3229  // NarrowSize.
3230  if (SizeOp1 % NarrowSize != 0)
3231  return UnableToLegalize;
3232  int NumParts = SizeOp1 / NarrowSize;
3233 
3234  SmallVector<Register, 2> SrcRegs, DstRegs;
3235  SmallVector<uint64_t, 2> Indexes;
3236  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3237 
3238  Register OpReg = MI.getOperand(0).getReg();
3239  uint64_t OpStart = MI.getOperand(2).getImm();
3240  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3241  for (int i = 0; i < NumParts; ++i) {
3242  unsigned SrcStart = i * NarrowSize;
3243 
3244  if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3245  // No part of the extract uses this subregister, ignore it.
3246  continue;
3247  } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3248  // The entire subregister is extracted, forward the value.
3249  DstRegs.push_back(SrcRegs[i]);
3250  continue;
3251  }
3252 
3253  // OpSegStart is where this destination segment would start in OpReg if it
3254  // extended infinitely in both directions.
3255  int64_t ExtractOffset;
3256  uint64_t SegSize;
3257  if (OpStart < SrcStart) {
3258  ExtractOffset = 0;
3259  SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3260  } else {
3261  ExtractOffset = OpStart - SrcStart;
3262  SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3263  }
3264 
3265  Register SegReg = SrcRegs[i];
3266  if (ExtractOffset != 0 || SegSize != NarrowSize) {
3267  // A genuine extract is needed.
3268  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3269  MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3270  }
3271 
3272  DstRegs.push_back(SegReg);
3273  }
3274 
3275  Register DstReg = MI.getOperand(0).getReg();
3276  if(MRI.getType(DstReg).isVector())
3277  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3278  else
3279  MIRBuilder.buildMerge(DstReg, DstRegs);
3280  MI.eraseFromParent();
3281  return Legalized;
3282 }
3283 
3286  LLT NarrowTy) {
3287  // FIXME: Don't know how to handle secondary types yet.
3288  if (TypeIdx != 0)
3289  return UnableToLegalize;
3290 
3291  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3292  uint64_t NarrowSize = NarrowTy.getSizeInBits();
3293 
3294  // FIXME: add support for when SizeOp0 isn't an exact multiple of
3295  // NarrowSize.
3296  if (SizeOp0 % NarrowSize != 0)
3297  return UnableToLegalize;
3298 
3299  int NumParts = SizeOp0 / NarrowSize;
3300 
3301  SmallVector<Register, 2> SrcRegs, DstRegs;
3302  SmallVector<uint64_t, 2> Indexes;
3303  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3304 
3305  Register OpReg = MI.getOperand(2).getReg();
3306  uint64_t OpStart = MI.getOperand(3).getImm();
3307  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3308  for (int i = 0; i < NumParts; ++i) {
3309  unsigned DstStart = i * NarrowSize;
3310 
3311  if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3312  // No part of the insert affects this subregister, forward the original.
3313  DstRegs.push_back(SrcRegs[i]);
3314  continue;
3315  } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3316  // The entire subregister is defined by this insert, forward the new
3317  // value.
3318  DstRegs.push_back(OpReg);
3319  continue;
3320  }
3321 
3322  // OpSegStart is where this destination segment would start in OpReg if it
3323  // extended infinitely in both directions.
3324  int64_t ExtractOffset, InsertOffset;
3325  uint64_t SegSize;
3326  if (OpStart < DstStart) {
3327  InsertOffset = 0;
3328  ExtractOffset = DstStart - OpStart;
3329  SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3330  } else {
3331  InsertOffset = OpStart - DstStart;
3332  ExtractOffset = 0;
3333  SegSize =
3334  std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3335  }
3336 
3337  Register SegReg = OpReg;
3338  if (ExtractOffset != 0 || SegSize != OpSize) {
3339  // A genuine extract is needed.
3340  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3341  MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3342  }
3343 
3344  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3345  MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3346  DstRegs.push_back(DstReg);
3347  }
3348 
3349  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3350  Register DstReg = MI.getOperand(0).getReg();
3351  if(MRI.getType(DstReg).isVector())
3352  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3353  else
3354  MIRBuilder.buildMerge(DstReg, DstRegs);
3355  MI.eraseFromParent();
3356  return Legalized;
3357 }
3358 
3361  LLT NarrowTy) {
3362  Register DstReg = MI.getOperand(0).getReg();
3363  LLT DstTy = MRI.getType(DstReg);
3364 
3365  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3366 
3367  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3368  SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3369  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3370  LLT LeftoverTy;
3371  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3372  Src0Regs, Src0LeftoverRegs))
3373  return UnableToLegalize;
3374 
3375  LLT Unused;
3376  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3377  Src1Regs, Src1LeftoverRegs))
3378  llvm_unreachable("inconsistent extractParts result");
3379 
3380  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3381  auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3382  {Src0Regs[I], Src1Regs[I]});
3383  DstRegs.push_back(Inst->getOperand(0).getReg());
3384  }
3385 
3386  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3387  auto Inst = MIRBuilder.buildInstr(
3388  MI.getOpcode(),
3389  {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3390  DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3391  }
3392 
3393  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3394  LeftoverTy, DstLeftoverRegs);
3395 
3396  MI.eraseFromParent();
3397  return Legalized;
3398 }
3399 
3402  LLT NarrowTy) {
3403  if (TypeIdx != 0)
3404  return UnableToLegalize;
3405 
3406  Register CondReg = MI.getOperand(1).getReg();
3407  LLT CondTy = MRI.getType(CondReg);
3408  if (CondTy.isVector()) // TODO: Handle vselect
3409  return UnableToLegalize;
3410 
3411  Register DstReg = MI.getOperand(0).getReg();
3412  LLT DstTy = MRI.getType(DstReg);
3413 
3414  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3415  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3416  SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3417  LLT LeftoverTy;
3418  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3419  Src1Regs, Src1LeftoverRegs))
3420  return UnableToLegalize;
3421 
3422  LLT Unused;
3423  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3424  Src2Regs, Src2LeftoverRegs))
3425  llvm_unreachable("inconsistent extractParts result");
3426 
3427  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3428  auto Select = MIRBuilder.buildSelect(NarrowTy,
3429  CondReg, Src1Regs[I], Src2Regs[I]);
3430  DstRegs.push_back(Select->getOperand(0).getReg());
3431  }
3432 
3433  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3434  auto Select = MIRBuilder.buildSelect(
3435  LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3436  DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3437  }
3438 
3439  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3440  LeftoverTy, DstLeftoverRegs);
3441 
3442  MI.eraseFromParent();
3443  return Legalized;
3444 }
3445 
3448  unsigned Opc = MI.getOpcode();
3449  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3450  auto isSupported = [this](const LegalityQuery &Q) {
3451  auto QAction = LI.getAction(Q).Action;
3452  return QAction == Legal || QAction == Libcall || QAction == Custom;
3453  };
3454  switch (Opc) {
3455  default:
3456  return UnableToLegalize;
3457  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3458  // This trivially expands to CTLZ.
3459  Observer.changingInstr(MI);
3460  MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3461  Observer.changedInstr(MI);
3462  return Legalized;
3463  }
3464  case TargetOpcode::G_CTLZ: {
3465  Register SrcReg = MI.getOperand(1).getReg();
3466  unsigned Len = Ty.getSizeInBits();
3467  if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3468  // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3469  auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3470  {Ty}, {SrcReg});
3471  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3472  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3473  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3474  SrcReg, MIBZero);
3475  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3476  MIBCtlzZU);
3477  MI.eraseFromParent();
3478  return Legalized;
3479  }
3480  // for now, we do this:
3481  // NewLen = NextPowerOf2(Len);
3482  // x = x | (x >> 1);
3483  // x = x | (x >> 2);
3484  // ...
3485  // x = x | (x >>16);
3486  // x = x | (x >>32); // for 64-bit input
3487  // Upto NewLen/2
3488  // return Len - popcount(x);
3489  //
3490  // Ref: "Hacker's Delight" by Henry Warren
3491  Register Op = SrcReg;
3492  unsigned NewLen = PowerOf2Ceil(Len);
3493  for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3494  auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3495  auto MIBOp = MIRBuilder.buildInstr(
3496  TargetOpcode::G_OR, {Ty},
3497  {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3498  {Op, MIBShiftAmt})});
3499  Op = MIBOp->getOperand(0).getReg();
3500  }
3501  auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3502  MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3503  {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3504  MI.eraseFromParent();
3505  return Legalized;
3506  }
3507  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3508  // This trivially expands to CTTZ.
3509  Observer.changingInstr(MI);
3510  MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3511  Observer.changedInstr(MI);
3512  return Legalized;
3513  }
3514  case TargetOpcode::G_CTTZ: {
3515  Register SrcReg = MI.getOperand(1).getReg();
3516  unsigned Len = Ty.getSizeInBits();
3517  if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3518  // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3519  // zero.
3520  auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3521  {Ty}, {SrcReg});
3522  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3523  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3524  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3525  SrcReg, MIBZero);
3526  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3527  MIBCttzZU);
3528  MI.eraseFromParent();
3529  return Legalized;
3530  }
3531  // for now, we use: { return popcount(~x & (x - 1)); }
3532  // unless the target has ctlz but not ctpop, in which case we use:
3533  // { return 32 - nlz(~x & (x-1)); }
3534  // Ref: "Hacker's Delight" by Henry Warren
3535  auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3536  auto MIBNot =
3537  MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3538  auto MIBTmp = MIRBuilder.buildInstr(
3539  TargetOpcode::G_AND, {Ty},
3540  {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3541  {SrcReg, MIBCstNeg1})});
3542  if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3543  isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3544  auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3546  TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3547  {MIBCstLen,
3548  MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3549  MI.eraseFromParent();
3550  return Legalized;
3551  }
3552  MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3553  MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3554  return Legalized;
3555  }
3556  }
3557 }
3558 
3559 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3560 // representation.
3563  Register Dst = MI.getOperand(0).getReg();
3564  Register Src = MI.getOperand(1).getReg();
3565  const LLT S64 = LLT::scalar(64);
3566  const LLT S32 = LLT::scalar(32);
3567  const LLT S1 = LLT::scalar(1);
3568 
3569  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3570 
3571  // unsigned cul2f(ulong u) {
3572  // uint lz = clz(u);
3573  // uint e = (u != 0) ? 127U + 63U - lz : 0;
3574  // u = (u << lz) & 0x7fffffffffffffffUL;
3575  // ulong t = u & 0xffffffffffUL;
3576  // uint v = (e << 23) | (uint)(u >> 40);
3577  // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3578  // return as_float(v + r);
3579  // }
3580 
3581  auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3582  auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3583 
3584  auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3585 
3586  auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3587  auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3588 
3589  auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3590  auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3591 
3592  auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3593  auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3594 
3595  auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3596 
3597  auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3598  auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3599 
3600  auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3601  auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3602  auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3603 
3604  auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3605  auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3606  auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3607  auto One = MIRBuilder.buildConstant(S32, 1);
3608 
3609  auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3610  auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3611  auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3612  MIRBuilder.buildAdd(Dst, V, R);
3613 
3614  return Legalized;
3615 }
3616 
3618 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3619  Register Dst = MI.getOperand(0).getReg();
3620  Register Src = MI.getOperand(1).getReg();
3621  LLT DstTy = MRI.getType(Dst);
3622  LLT SrcTy = MRI.getType(Src);
3623 
3624  if (SrcTy != LLT::scalar(64))
3625  return UnableToLegalize;
3626 
3627  if (DstTy == LLT::scalar(32)) {
3628  // TODO: SelectionDAG has several alternative expansions to port which may
3629  // be more reasonble depending on the available instructions. If a target
3630  // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3631  // intermediate type, this is probably worse.
3632  return lowerU64ToF32BitOps(MI);
3633  }
3634 
3635  return UnableToLegalize;
3636 }
3637 
3639 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3640  Register Dst = MI.getOperand(0).getReg();
3641  Register Src = MI.getOperand(1).getReg();
3642  LLT DstTy = MRI.getType(Dst);
3643  LLT SrcTy = MRI.getType(Src);
3644 
3645  const LLT S64 = LLT::scalar(64);
3646  const LLT S32 = LLT::scalar(32);
3647  const LLT S1 = LLT::scalar(1);
3648 
3649  if (SrcTy != S64)
3650  return UnableToLegalize;
3651 
3652  if (DstTy == S32) {
3653  // signed cl2f(long l) {
3654  // long s = l >> 63;
3655  // float r = cul2f((l + s) ^ s);
3656  // return s ? -r : r;
3657  // }
3658  Register L = Src;
3659  auto SignBit = MIRBuilder.buildConstant(S64, 63);
3660  auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3661 
3662  auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3663  auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3664  auto R = MIRBuilder.buildUITOFP(S32, Xor);
3665 
3666  auto RNeg = MIRBuilder.buildFNeg(S32, R);
3667  auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3668  MIRBuilder.buildConstant(S64, 0));
3669  MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3670  return Legalized;
3671  }
3672 
3673  return UnableToLegalize;
3674 }
3675 
3676 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
3677  switch (Opc) {
3678  case TargetOpcode::G_SMIN:
3679  return CmpInst::ICMP_SLT;
3680  case TargetOpcode::G_SMAX:
3681  return CmpInst::ICMP_SGT;
3682  case TargetOpcode::G_UMIN:
3683  return CmpInst::ICMP_ULT;
3684  case TargetOpcode::G_UMAX:
3685  return CmpInst::ICMP_UGT;
3686  default:
3687  llvm_unreachable("not in integer min/max");
3688  }
3689 }
3690 
3692 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3693  Register Dst = MI.getOperand(0).getReg();
3694  Register Src0 = MI.getOperand(1).getReg();
3695  Register Src1 = MI.getOperand(2).getReg();
3696 
3697  const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
3698  LLT CmpType = MRI.getType(Dst).changeElementSize(1);
3699 
3700  auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
3701  MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
3702 
3703  MI.eraseFromParent();
3704  return Legalized;
3705 }
3706 
3709  Register Dst = MI.getOperand(0).getReg();
3710  Register Src0 = MI.getOperand(1).getReg();
3711  Register Src1 = MI.getOperand(2).getReg();
3712 
3713  const LLT Src0Ty = MRI.getType(Src0);
3714  const LLT Src1Ty = MRI.getType(Src1);
3715 
3716  const int Src0Size = Src0Ty.getScalarSizeInBits();
3717  const int Src1Size = Src1Ty.getScalarSizeInBits();
3718 
3719  auto SignBitMask = MIRBuilder.buildConstant(
3720  Src0Ty, APInt::getSignMask(Src0Size));
3721 
3722  auto NotSignBitMask = MIRBuilder.buildConstant(
3723  Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
3724 
3725  auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
3726  MachineInstr *Or;
3727 
3728  if (Src0Ty == Src1Ty) {
3729  auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
3730  Or = MIRBuilder.buildOr(Dst, And0, And1);
3731  } else if (Src0Size > Src1Size) {
3732  auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
3733  auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
3734  auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
3735  auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
3736  Or = MIRBuilder.buildOr(Dst, And0, And1);
3737  } else {
3738  auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
3739  auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
3740  auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
3741  auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
3742  Or = MIRBuilder.buildOr(Dst, And0, And1);
3743  }
3744 
3745  // Be careful about setting nsz/nnan/ninf on every instruction, since the
3746  // constants are a nan and -0.0, but the final result should preserve
3747  // everything.
3748  if (unsigned Flags = MI.getFlags())
3749  Or->setFlags(Flags);
3750 
3751  MI.eraseFromParent();
3752  return Legalized;
3753 }
3754 
3757  unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
3758  TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
3759 
3760  Register Dst = MI.getOperand(0).getReg();
3761  Register Src0 = MI.getOperand(1).getReg();
3762  Register Src1 = MI.getOperand(2).getReg();
3763  LLT Ty = MRI.getType(Dst);
3764 
3765  if (!MI.getFlag(MachineInstr::FmNoNans)) {
3766  // Insert canonicalizes if it's possible we need to quiet to get correct
3767  // sNaN behavior.
3768 
3769  // Note this must be done here, and not as an optimization combine in the
3770  // absence of a dedicate quiet-snan instruction as we're using an
3771  // omni-purpose G_FCANONICALIZE.
3772  if (!isKnownNeverSNaN(Src0, MRI))
3773  Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
3774 
3775  if (!isKnownNeverSNaN(Src1, MRI))
3776  Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
3777  }
3778 
3779  // If there are no nans, it's safe to simply replace this with the non-IEEE
3780  // version.
3781  MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
3782  MI.eraseFromParent();
3783  return Legalized;
3784 }
3785 
3788  const unsigned NumDst = MI.getNumOperands() - 1;
3789  const Register SrcReg = MI.getOperand(NumDst).getReg();
3790  LLT SrcTy = MRI.getType(SrcReg);
3791 
3792  Register Dst0Reg = MI.getOperand(0).getReg();
3793  LLT DstTy = MRI.getType(Dst0Reg);
3794 
3795 
3796  // Expand scalarizing unmerge as bitcast to integer and shift.
3797  if (!DstTy.isVector() && SrcTy.isVector() &&
3798  SrcTy.getElementType() == DstTy) {
3799  LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
3800  Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
3801 
3802  MIRBuilder.buildTrunc(Dst0Reg, Cast);
3803 
3804  const unsigned DstSize = DstTy.getSizeInBits();
3805  unsigned Offset = DstSize;
3806  for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
3807  auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
3808  auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
3809  MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
3810  }
3811 
3812  MI.eraseFromParent();
3813  return Legalized;
3814  }
3815 
3816  return UnableToLegalize;
3817 }
3818 
3821  Register DstReg = MI.getOperand(0).getReg();
3822  Register Src0Reg = MI.getOperand(1).getReg();
3823  Register Src1Reg = MI.getOperand(2).getReg();
3824  LLT Src0Ty = MRI.getType(Src0Reg);
3825  LLT DstTy = MRI.getType(DstReg);
3826  LLT IdxTy = LLT::scalar(32);
3827 
3828  const Constant *ShufMask = MI.getOperand(3).getShuffleMask();
3829 
3831  ShuffleVectorInst::getShuffleMask(ShufMask, Mask);
3832 
3833  if (DstTy.isScalar()) {
3834  if (Src0Ty.isVector())
3835  return UnableToLegalize;
3836 
3837  // This is just a SELECT.
3838  assert(Mask.size() == 1 && "Expected a single mask element");
3839  Register Val;
3840  if (Mask[0] < 0 || Mask[0] > 1)
3841  Val = MIRBuilder.buildUndef(DstTy).getReg(0);
3842  else
3843  Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
3844  MIRBuilder.buildCopy(DstReg, Val);
3845  MI.eraseFromParent();
3846  return Legalized;
3847  }
3848 
3849  Register Undef;
3850  SmallVector<Register, 32> BuildVec;
3851  LLT EltTy = DstTy.getElementType();
3852 
3853  for (int Idx : Mask) {
3854  if (Idx < 0) {
3855  if (!Undef.isValid())
3856  Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
3857  BuildVec.push_back(Undef);
3858  continue;
3859  }
3860 
3861  if (Src0Ty.isScalar()) {
3862  BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
3863  } else {
3864  int NumElts = Src0Ty.getNumElements();
3865  Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
3866  int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
3867  auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
3868  auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
3869  BuildVec.push_back(Extract.getReg(0));
3870  }
3871  }
3872 
3873  MIRBuilder.buildBuildVector(DstReg, BuildVec);
3874  MI.eraseFromParent();
3875  return Legalized;
3876 }
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType)
uint64_t CallInst * C
LegalizeResult fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
MachineOperand Callee
Destination of the call.
Definition: CallLowering.h:70
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:164
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1569
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:836
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineBasicBlock * getMBB() const
LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstrBuilder buildInsert(Register Res, Register Src, Register Op, unsigned Index)
iterator begin() const
Definition: ArrayRef.h:136
Register getReg(unsigned Idx) const
Get the register for the operand index.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
unsigned getScalarSizeInBits() const
LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a instruction with a vector type where each operand may have a different element type...
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
void setFPImm(const ConstantFP *CFP)
LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
AtomicOrdering getFailureOrdering() const
For cmpxchg atomic operations, return the atomic ordering requirements when store does not occur...
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:62
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
virtual const TargetLowering * getTargetLowering() const
unsigned less than
Definition: InstrTypes.h:757
LLT getScalarType() const
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_OR Op0, Op1.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:813
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:372
static uint32_t Concat[]
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res, CarryOut = G_UADDO Op0, Op1.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Optional< MachineInstrBuilder > materializeGEP(Register &Res, Register Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:163
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type...
Definition: LegalizerInfo.h:52
bool isVector() const
void setMF(MachineFunction &MF)
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
A description of a memory reference used in the backend.
bool isSigned() const
Definition: InstrTypes.h:902
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions. ...
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:163
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1, const SrcOp &CarryIn)
Build and insert Res, CarryOut = G_UADDE Op0, Op1, CarryIn.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args)
Helper function that creates the given libcall.
AtomicOrdering getOrdering() const
Return the atomic ordering requirements for this memory operation.
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4483
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FNEG Op0.
LegalizeResult lowerShuffleVector(MachineInstr &MI)
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
uint64_t getSizeInBits() const
Return the size in bits of the memory reference.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
void setReg(Register Reg)
Change the register this operand corresponds to.
virtual const TargetInstrInfo * getInstrInfo() const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:158
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
void setChangeObserver(GISelChangeObserver &Observer)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
T greatestCommonDivisor(T A, T B)
Return the greatest common divisor of the values using Euclid&#39;s algorithm.
Definition: MathExtras.h:563
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LegalizeResult legalizeInstrStep(MachineInstr &MI)
Replace MI by a sequence of legal instructions that can implement the same operation.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
virtual bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
Return true if MI is either legal or has been legalized and false if not legal.
Helper class to build MachineInstr.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:587
MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
void setImm(int64_t immVal)
virtual const CallLowering * getCallLowering() const
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI)
Create a libcall to memcpy et al.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getAddressSpace() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:442
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Some kind of error has occurred and we could not legalize this instruction.
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
R600 Clause Merge
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:644
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
Instruction was already legal and no change was made to the MachineFunction.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:300
size_t size() const
Definition: SmallVector.h:52
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
const Constant * getShuffleMask() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
signed greater than
Definition: InstrTypes.h:759
static Type * getFP128Ty(LLVMContext &C)
Definition: Type.cpp:168
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
static LLT getGCDType(LLT OrigTy, LLT TargetTy)
LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
const APFloat & getValueAPF() const
Definition: Constants.h:302
SmallVector< ArgInfo, 8 > OrigArgs
List of descriptors of the arguments passed to the function.
Definition: CallLowering.h:76
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:554
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:162
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:155
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeResult libcall(MachineInstr &MI)
Legalize an instruction by emiting a runtime library call instead.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:536
void setFlags(unsigned flags)
Definition: MachineInstr.h:306
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Intrinsic::ID getIntrinsicID() const
iterator end() const
Definition: ArrayRef.h:137
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:761
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
Promote Memory to Register
Definition: Mem2Reg.cpp:109
LegalizeResult fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a simple vector instruction where all operands are the same type by splitting into multiple ...
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:81
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:640
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:703
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_FCMP PredOp0, Op1.
LegalizeResult fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Class for arbitrary precision integers.
Definition: APInt.h:69
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
Register getReg() const
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType)
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
bool isValid() const
Definition: Register.h:115
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
Representation of each machine instruction.
Definition: MachineInstr.h:64
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
Instruction has been legalized and the MachineFunction changed.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, Optional< unsigned > Flags=None)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_ADD Op0, Op1.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:187
CallingConv::ID CallConv
Calling convention to be used for the call.
Definition: CallLowering.h:66
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
#define I(x, y, z)
Definition: MD5.cpp:58
static Constant * getZeroValueForNegation(Type *Ty)
Floating point negation must be implemented with f(x) = -0.0 - x.
Definition: Constants.cpp:789
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
uint32_t Size
Definition: Profile.cpp:46
void setCImm(const ConstantInt *CI)
const DataLayout & getDataLayout() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:292
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SmallVector< int, 16 > getShuffleMask() const
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:656
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
This file describes how to lower LLVM calls to machine code calls.
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:755
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register getReg() const
getReg - Returns the register number.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1229
const ConstantInt * getCImm() const
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
MachineInstrBuilder buildAtomicCmpXchg(Register OldValRes, Register Addr, Register CmpVal, Register NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:297
ArgInfo OrigRet
Descriptor for the return type of the function.
Definition: CallLowering.h:73
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LegalizeResult fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:405
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:663
This file describes how to lower LLVM code to machine code.
unsigned getPredicate() const
void resize(size_type N)
Definition: SmallVector.h:344