LLVM  9.0.0svn
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
23 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33 ///
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
36 ///
37 /// Returns -1 in the first element of the pair if the breakdown is not
38 /// satisfiable.
39 static std::pair<int, int>
40 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
41  assert(!LeftoverTy.isValid() && "this is an out argument");
42 
43  unsigned Size = OrigTy.getSizeInBits();
44  unsigned NarrowSize = NarrowTy.getSizeInBits();
45  unsigned NumParts = Size / NarrowSize;
46  unsigned LeftoverSize = Size - NumParts * NarrowSize;
47  assert(Size > NarrowSize);
48 
49  if (LeftoverSize == 0)
50  return {NumParts, 0};
51 
52  if (NarrowTy.isVector()) {
53  unsigned EltSize = OrigTy.getScalarSizeInBits();
54  if (LeftoverSize % EltSize != 0)
55  return {-1, -1};
56  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
57  } else {
58  LeftoverTy = LLT::scalar(LeftoverSize);
59  }
60 
61  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
62  return std::make_pair(NumParts, NumLeftover);
63 }
64 
66  GISelChangeObserver &Observer,
67  MachineIRBuilder &Builder)
68  : MIRBuilder(Builder), MRI(MF.getRegInfo()),
69  LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
70  MIRBuilder.setMF(MF);
71  MIRBuilder.setChangeObserver(Observer);
72 }
73 
75  GISelChangeObserver &Observer,
77  : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
78  MIRBuilder.setMF(MF);
79  MIRBuilder.setChangeObserver(Observer);
80 }
83  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
84 
85  auto Step = LI.getAction(MI, MRI);
86  switch (Step.Action) {
87  case Legal:
88  LLVM_DEBUG(dbgs() << ".. Already legal\n");
89  return AlreadyLegal;
90  case Libcall:
91  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
92  return libcall(MI);
93  case NarrowScalar:
94  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
95  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
96  case WidenScalar:
97  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
98  return widenScalar(MI, Step.TypeIdx, Step.NewType);
99  case Lower:
100  LLVM_DEBUG(dbgs() << ".. Lower\n");
101  return lower(MI, Step.TypeIdx, Step.NewType);
102  case FewerElements:
103  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
104  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
105  case MoreElements:
106  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
107  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
108  case Custom:
109  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
110  return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
112  default:
113  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
114  return UnableToLegalize;
115  }
116 }
117 
118 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
119  SmallVectorImpl<Register> &VRegs) {
120  for (int i = 0; i < NumParts; ++i)
122  MIRBuilder.buildUnmerge(VRegs, Reg);
123 }
124 
125 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
126  LLT MainTy, LLT &LeftoverTy,
128  SmallVectorImpl<Register> &LeftoverRegs) {
129  assert(!LeftoverTy.isValid() && "this is an out argument");
130 
131  unsigned RegSize = RegTy.getSizeInBits();
132  unsigned MainSize = MainTy.getSizeInBits();
133  unsigned NumParts = RegSize / MainSize;
134  unsigned LeftoverSize = RegSize - NumParts * MainSize;
135 
136  // Use an unmerge when possible.
137  if (LeftoverSize == 0) {
138  for (unsigned I = 0; I < NumParts; ++I)
139  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
140  MIRBuilder.buildUnmerge(VRegs, Reg);
141  return true;
142  }
143 
144  if (MainTy.isVector()) {
145  unsigned EltSize = MainTy.getScalarSizeInBits();
146  if (LeftoverSize % EltSize != 0)
147  return false;
148  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
149  } else {
150  LeftoverTy = LLT::scalar(LeftoverSize);
151  }
152 
153  // For irregular sizes, extract the individual parts.
154  for (unsigned I = 0; I != NumParts; ++I) {
155  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
156  VRegs.push_back(NewReg);
157  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
158  }
159 
160  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
161  Offset += LeftoverSize) {
162  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
163  LeftoverRegs.push_back(NewReg);
164  MIRBuilder.buildExtract(NewReg, Reg, Offset);
165  }
166 
167  return true;
168 }
169 
170 void LegalizerHelper::insertParts(Register DstReg,
171  LLT ResultTy, LLT PartTy,
172  ArrayRef<Register> PartRegs,
173  LLT LeftoverTy,
174  ArrayRef<Register> LeftoverRegs) {
175  if (!LeftoverTy.isValid()) {
176  assert(LeftoverRegs.empty());
177 
178  if (!ResultTy.isVector()) {
179  MIRBuilder.buildMerge(DstReg, PartRegs);
180  return;
181  }
182 
183  if (PartTy.isVector())
184  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
185  else
186  MIRBuilder.buildBuildVector(DstReg, PartRegs);
187  return;
188  }
189 
190  unsigned PartSize = PartTy.getSizeInBits();
191  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
192 
193  unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
194  MIRBuilder.buildUndef(CurResultReg);
195 
196  unsigned Offset = 0;
197  for (unsigned PartReg : PartRegs) {
198  unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
199  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
200  CurResultReg = NewResultReg;
201  Offset += PartSize;
202  }
203 
204  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
205  // Use the original output register for the final insert to avoid a copy.
206  unsigned NewResultReg = (I + 1 == E) ?
207  DstReg : MRI.createGenericVirtualRegister(ResultTy);
208 
209  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
210  CurResultReg = NewResultReg;
211  Offset += LeftoverPartSize;
212  }
213 }
214 
215 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
216  switch (Opcode) {
217  case TargetOpcode::G_SDIV:
218  assert((Size == 32 || Size == 64) && "Unsupported size");
219  return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
220  case TargetOpcode::G_UDIV:
221  assert((Size == 32 || Size == 64) && "Unsupported size");
222  return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
223  case TargetOpcode::G_SREM:
224  assert((Size == 32 || Size == 64) && "Unsupported size");
225  return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
226  case TargetOpcode::G_UREM:
227  assert((Size == 32 || Size == 64) && "Unsupported size");
228  return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
229  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
230  assert(Size == 32 && "Unsupported size");
231  return RTLIB::CTLZ_I32;
232  case TargetOpcode::G_FADD:
233  assert((Size == 32 || Size == 64) && "Unsupported size");
234  return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
235  case TargetOpcode::G_FSUB:
236  assert((Size == 32 || Size == 64) && "Unsupported size");
237  return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
238  case TargetOpcode::G_FMUL:
239  assert((Size == 32 || Size == 64) && "Unsupported size");
240  return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
241  case TargetOpcode::G_FDIV:
242  assert((Size == 32 || Size == 64) && "Unsupported size");
243  return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
244  case TargetOpcode::G_FEXP:
245  assert((Size == 32 || Size == 64) && "Unsupported size");
246  return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
247  case TargetOpcode::G_FEXP2:
248  assert((Size == 32 || Size == 64) && "Unsupported size");
249  return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
250  case TargetOpcode::G_FREM:
251  return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
252  case TargetOpcode::G_FPOW:
253  return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
254  case TargetOpcode::G_FMA:
255  assert((Size == 32 || Size == 64) && "Unsupported size");
256  return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
257  case TargetOpcode::G_FSIN:
258  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
259  return Size == 128 ? RTLIB::SIN_F128
260  : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
261  case TargetOpcode::G_FCOS:
262  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
263  return Size == 128 ? RTLIB::COS_F128
264  : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
265  case TargetOpcode::G_FLOG10:
266  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
267  return Size == 128 ? RTLIB::LOG10_F128
268  : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
269  case TargetOpcode::G_FLOG:
270  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
271  return Size == 128 ? RTLIB::LOG_F128
272  : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
273  case TargetOpcode::G_FLOG2:
274  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
275  return Size == 128 ? RTLIB::LOG2_F128
276  : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
277  case TargetOpcode::G_FCEIL:
278  assert((Size == 32 || Size == 64) && "Unsupported size");
279  return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
280  case TargetOpcode::G_FFLOOR:
281  assert((Size == 32 || Size == 64) && "Unsupported size");
282  return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
283  }
284  llvm_unreachable("Unknown libcall function");
285 }
286 
289  const CallLowering::ArgInfo &Result,
291  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
292  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
293  const char *Name = TLI.getLibcallName(Libcall);
294 
295  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
296  if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
297  MachineOperand::CreateES(Name), Result, Args))
299 
301 }
302 
303 // Useful for libcalls where all operands have the same type.
306  Type *OpType) {
307  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
308 
310  for (unsigned i = 1; i < MI.getNumOperands(); i++)
311  Args.push_back({MI.getOperand(i).getReg(), OpType});
312  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
313  Args);
314 }
315 
316 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
317  Type *FromType) {
318  auto ToMVT = MVT::getVT(ToType);
319  auto FromMVT = MVT::getVT(FromType);
320 
321  switch (Opcode) {
322  case TargetOpcode::G_FPEXT:
323  return RTLIB::getFPEXT(FromMVT, ToMVT);
324  case TargetOpcode::G_FPTRUNC:
325  return RTLIB::getFPROUND(FromMVT, ToMVT);
326  case TargetOpcode::G_FPTOSI:
327  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
328  case TargetOpcode::G_FPTOUI:
329  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
330  case TargetOpcode::G_SITOFP:
331  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
332  case TargetOpcode::G_UITOFP:
333  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
334  }
335  llvm_unreachable("Unsupported libcall function");
336 }
337 
340  Type *FromType) {
342  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
343  {{MI.getOperand(1).getReg(), FromType}});
344 }
345 
348  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
349  unsigned Size = LLTy.getSizeInBits();
350  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
351 
352  MIRBuilder.setInstr(MI);
353 
354  switch (MI.getOpcode()) {
355  default:
356  return UnableToLegalize;
357  case TargetOpcode::G_SDIV:
358  case TargetOpcode::G_UDIV:
359  case TargetOpcode::G_SREM:
360  case TargetOpcode::G_UREM:
361  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
362  Type *HLTy = IntegerType::get(Ctx, Size);
363  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
364  if (Status != Legalized)
365  return Status;
366  break;
367  }
368  case TargetOpcode::G_FADD:
369  case TargetOpcode::G_FSUB:
370  case TargetOpcode::G_FMUL:
371  case TargetOpcode::G_FDIV:
372  case TargetOpcode::G_FMA:
373  case TargetOpcode::G_FPOW:
374  case TargetOpcode::G_FREM:
375  case TargetOpcode::G_FCOS:
376  case TargetOpcode::G_FSIN:
377  case TargetOpcode::G_FLOG10:
378  case TargetOpcode::G_FLOG:
379  case TargetOpcode::G_FLOG2:
380  case TargetOpcode::G_FEXP:
381  case TargetOpcode::G_FEXP2:
382  case TargetOpcode::G_FCEIL:
383  case TargetOpcode::G_FFLOOR: {
384  if (Size > 64) {
385  LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
386  return UnableToLegalize;
387  }
388  Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
389  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
390  if (Status != Legalized)
391  return Status;
392  break;
393  }
394  case TargetOpcode::G_FPEXT: {
395  // FIXME: Support other floating point types (half, fp128 etc)
396  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
397  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
398  if (ToSize != 64 || FromSize != 32)
399  return UnableToLegalize;
402  if (Status != Legalized)
403  return Status;
404  break;
405  }
406  case TargetOpcode::G_FPTRUNC: {
407  // FIXME: Support other floating point types (half, fp128 etc)
408  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
409  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
410  if (ToSize != 32 || FromSize != 64)
411  return UnableToLegalize;
414  if (Status != Legalized)
415  return Status;
416  break;
417  }
418  case TargetOpcode::G_FPTOSI:
419  case TargetOpcode::G_FPTOUI: {
420  // FIXME: Support other types
421  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
422  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
423  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
424  return UnableToLegalize;
426  MI, MIRBuilder,
427  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
428  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
429  if (Status != Legalized)
430  return Status;
431  break;
432  }
433  case TargetOpcode::G_SITOFP:
434  case TargetOpcode::G_UITOFP: {
435  // FIXME: Support other types
436  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
437  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
438  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
439  return UnableToLegalize;
441  MI, MIRBuilder,
442  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
443  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
444  if (Status != Legalized)
445  return Status;
446  break;
447  }
448  }
449 
450  MI.eraseFromParent();
451  return Legalized;
452 }
453 
455  unsigned TypeIdx,
456  LLT NarrowTy) {
457  MIRBuilder.setInstr(MI);
458 
459  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
460  uint64_t NarrowSize = NarrowTy.getSizeInBits();
461 
462  switch (MI.getOpcode()) {
463  default:
464  return UnableToLegalize;
465  case TargetOpcode::G_IMPLICIT_DEF: {
466  // FIXME: add support for when SizeOp0 isn't an exact multiple of
467  // NarrowSize.
468  if (SizeOp0 % NarrowSize != 0)
469  return UnableToLegalize;
470  int NumParts = SizeOp0 / NarrowSize;
471 
472  SmallVector<Register, 2> DstRegs;
473  for (int i = 0; i < NumParts; ++i)
474  DstRegs.push_back(
475  MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
476 
477  unsigned DstReg = MI.getOperand(0).getReg();
478  if(MRI.getType(DstReg).isVector())
479  MIRBuilder.buildBuildVector(DstReg, DstRegs);
480  else
481  MIRBuilder.buildMerge(DstReg, DstRegs);
482  MI.eraseFromParent();
483  return Legalized;
484  }
485  case TargetOpcode::G_CONSTANT: {
486  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
487  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
488  unsigned TotalSize = Ty.getSizeInBits();
489  unsigned NarrowSize = NarrowTy.getSizeInBits();
490  int NumParts = TotalSize / NarrowSize;
491 
492  SmallVector<Register, 4> PartRegs;
493  for (int I = 0; I != NumParts; ++I) {
494  unsigned Offset = I * NarrowSize;
495  auto K = MIRBuilder.buildConstant(NarrowTy,
496  Val.lshr(Offset).trunc(NarrowSize));
497  PartRegs.push_back(K.getReg(0));
498  }
499 
500  LLT LeftoverTy;
501  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
502  SmallVector<Register, 1> LeftoverRegs;
503  if (LeftoverBits != 0) {
504  LeftoverTy = LLT::scalar(LeftoverBits);
505  auto K = MIRBuilder.buildConstant(
506  LeftoverTy,
507  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
508  LeftoverRegs.push_back(K.getReg(0));
509  }
510 
511  insertParts(MI.getOperand(0).getReg(),
512  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
513 
514  MI.eraseFromParent();
515  return Legalized;
516  }
517  case TargetOpcode::G_ADD: {
518  // FIXME: add support for when SizeOp0 isn't an exact multiple of
519  // NarrowSize.
520  if (SizeOp0 % NarrowSize != 0)
521  return UnableToLegalize;
522  // Expand in terms of carry-setting/consuming G_ADDE instructions.
523  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
524 
525  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
526  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
527  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
528 
530  MIRBuilder.buildConstant(CarryIn, 0);
531 
532  for (int i = 0; i < NumParts; ++i) {
533  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
535 
536  MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
537  Src2Regs[i], CarryIn);
538 
539  DstRegs.push_back(DstReg);
540  CarryIn = CarryOut;
541  }
542  Register DstReg = MI.getOperand(0).getReg();
543  if(MRI.getType(DstReg).isVector())
544  MIRBuilder.buildBuildVector(DstReg, DstRegs);
545  else
546  MIRBuilder.buildMerge(DstReg, DstRegs);
547  MI.eraseFromParent();
548  return Legalized;
549  }
550  case TargetOpcode::G_SUB: {
551  // FIXME: add support for when SizeOp0 isn't an exact multiple of
552  // NarrowSize.
553  if (SizeOp0 % NarrowSize != 0)
554  return UnableToLegalize;
555 
556  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
557 
558  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
559  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
560  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
561 
562  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
564  MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
565  {Src1Regs[0], Src2Regs[0]});
566  DstRegs.push_back(DstReg);
567  Register BorrowIn = BorrowOut;
568  for (int i = 1; i < NumParts; ++i) {
569  DstReg = MRI.createGenericVirtualRegister(NarrowTy);
570  BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
571 
572  MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
573  {Src1Regs[i], Src2Regs[i], BorrowIn});
574 
575  DstRegs.push_back(DstReg);
576  BorrowIn = BorrowOut;
577  }
578  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
579  MI.eraseFromParent();
580  return Legalized;
581  }
582  case TargetOpcode::G_MUL:
583  case TargetOpcode::G_UMULH:
584  return narrowScalarMul(MI, NarrowTy);
585  case TargetOpcode::G_EXTRACT:
586  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
587  case TargetOpcode::G_INSERT:
588  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
589  case TargetOpcode::G_LOAD: {
590  const auto &MMO = **MI.memoperands_begin();
591  Register DstReg = MI.getOperand(0).getReg();
592  LLT DstTy = MRI.getType(DstReg);
593  if (DstTy.isVector())
594  return UnableToLegalize;
595 
596  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
597  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
598  auto &MMO = **MI.memoperands_begin();
599  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
600  MIRBuilder.buildAnyExt(DstReg, TmpReg);
601  MI.eraseFromParent();
602  return Legalized;
603  }
604 
605  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
606  }
607  case TargetOpcode::G_ZEXTLOAD:
608  case TargetOpcode::G_SEXTLOAD: {
609  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
610  Register DstReg = MI.getOperand(0).getReg();
611  Register PtrReg = MI.getOperand(1).getReg();
612 
613  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
614  auto &MMO = **MI.memoperands_begin();
615  if (MMO.getSizeInBits() == NarrowSize) {
616  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
617  } else {
618  unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
619  : TargetOpcode::G_SEXTLOAD;
620  MIRBuilder.buildInstr(ExtLoad)
621  .addDef(TmpReg)
622  .addUse(PtrReg)
623  .addMemOperand(&MMO);
624  }
625 
626  if (ZExt)
627  MIRBuilder.buildZExt(DstReg, TmpReg);
628  else
629  MIRBuilder.buildSExt(DstReg, TmpReg);
630 
631  MI.eraseFromParent();
632  return Legalized;
633  }
634  case TargetOpcode::G_STORE: {
635  const auto &MMO = **MI.memoperands_begin();
636 
637  Register SrcReg = MI.getOperand(0).getReg();
638  LLT SrcTy = MRI.getType(SrcReg);
639  if (SrcTy.isVector())
640  return UnableToLegalize;
641 
642  int NumParts = SizeOp0 / NarrowSize;
643  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
644  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
645  if (SrcTy.isVector() && LeftoverBits != 0)
646  return UnableToLegalize;
647 
648  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
649  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
650  auto &MMO = **MI.memoperands_begin();
651  MIRBuilder.buildTrunc(TmpReg, SrcReg);
652  MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
653  MI.eraseFromParent();
654  return Legalized;
655  }
656 
657  return reduceLoadStoreWidth(MI, 0, NarrowTy);
658  }
659  case TargetOpcode::G_SELECT:
660  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
661  case TargetOpcode::G_AND:
662  case TargetOpcode::G_OR:
663  case TargetOpcode::G_XOR: {
664  // Legalize bitwise operation:
665  // A = BinOp<Ty> B, C
666  // into:
667  // B1, ..., BN = G_UNMERGE_VALUES B
668  // C1, ..., CN = G_UNMERGE_VALUES C
669  // A1 = BinOp<Ty/N> B1, C2
670  // ...
671  // AN = BinOp<Ty/N> BN, CN
672  // A = G_MERGE_VALUES A1, ..., AN
673  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
674  }
675  case TargetOpcode::G_SHL:
676  case TargetOpcode::G_LSHR:
677  case TargetOpcode::G_ASHR:
678  return narrowScalarShift(MI, TypeIdx, NarrowTy);
679  case TargetOpcode::G_CTLZ:
680  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
681  case TargetOpcode::G_CTTZ:
682  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
683  case TargetOpcode::G_CTPOP:
684  if (TypeIdx != 0)
685  return UnableToLegalize; // TODO
686 
687  Observer.changingInstr(MI);
688  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
689  Observer.changedInstr(MI);
690  return Legalized;
691  case TargetOpcode::G_INTTOPTR:
692  if (TypeIdx != 1)
693  return UnableToLegalize;
694 
695  Observer.changingInstr(MI);
696  narrowScalarSrc(MI, NarrowTy, 1);
697  Observer.changedInstr(MI);
698  return Legalized;
699  case TargetOpcode::G_PTRTOINT:
700  if (TypeIdx != 0)
701  return UnableToLegalize;
702 
703  Observer.changingInstr(MI);
704  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
705  Observer.changedInstr(MI);
706  return Legalized;
707  }
708 }
709 
710 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
711  unsigned OpIdx, unsigned ExtOpcode) {
712  MachineOperand &MO = MI.getOperand(OpIdx);
713  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
714  MO.setReg(ExtB->getOperand(0).getReg());
715 }
716 
717 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
718  unsigned OpIdx) {
719  MachineOperand &MO = MI.getOperand(OpIdx);
720  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
721  {MO.getReg()});
722  MO.setReg(ExtB->getOperand(0).getReg());
723 }
724 
725 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
726  unsigned OpIdx, unsigned TruncOpcode) {
727  MachineOperand &MO = MI.getOperand(OpIdx);
728  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
730  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
731  MO.setReg(DstExt);
732 }
733 
734 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
735  unsigned OpIdx, unsigned ExtOpcode) {
736  MachineOperand &MO = MI.getOperand(OpIdx);
737  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
739  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
740  MO.setReg(DstTrunc);
741 }
742 
743 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
744  unsigned OpIdx) {
745  MachineOperand &MO = MI.getOperand(OpIdx);
746  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
748  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
749  MO.setReg(DstExt);
750 }
751 
752 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
753  unsigned OpIdx) {
754  MachineOperand &MO = MI.getOperand(OpIdx);
755 
756  LLT OldTy = MRI.getType(MO.getReg());
757  unsigned OldElts = OldTy.getNumElements();
758  unsigned NewElts = MoreTy.getNumElements();
759 
760  unsigned NumParts = NewElts / OldElts;
761 
762  // Use concat_vectors if the result is a multiple of the number of elements.
763  if (NumParts * OldElts == NewElts) {
765  Parts.push_back(MO.getReg());
766 
767  unsigned ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
768  for (unsigned I = 1; I != NumParts; ++I)
769  Parts.push_back(ImpDef);
770 
771  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
772  MO.setReg(Concat.getReg(0));
773  return;
774  }
775 
776  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
777  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
778  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
779  MO.setReg(MoreReg);
780 }
781 
783 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
784  LLT WideTy) {
785  if (TypeIdx != 1)
786  return UnableToLegalize;
787 
788  Register DstReg = MI.getOperand(0).getReg();
789  LLT DstTy = MRI.getType(DstReg);
790  if (!DstTy.isScalar())
791  return UnableToLegalize;
792 
793  unsigned NumOps = MI.getNumOperands();
794  unsigned NumSrc = MI.getNumOperands() - 1;
795  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
796 
797  Register Src1 = MI.getOperand(1).getReg();
798  Register ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg();
799 
800  for (unsigned I = 2; I != NumOps; ++I) {
801  const unsigned Offset = (I - 1) * PartSize;
802 
803  Register SrcReg = MI.getOperand(I).getReg();
804  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
805 
806  auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg);
807 
808  Register NextResult = I + 1 == NumOps ? DstReg :
809  MRI.createGenericVirtualRegister(DstTy);
810 
811  auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset);
812  auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt);
813  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
814  ResultReg = NextResult;
815  }
816 
817  MI.eraseFromParent();
818  return Legalized;
819 }
820 
822 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
823  LLT WideTy) {
824  if (TypeIdx != 0)
825  return UnableToLegalize;
826 
827  unsigned NumDst = MI.getNumOperands() - 1;
828  Register SrcReg = MI.getOperand(NumDst).getReg();
829  LLT SrcTy = MRI.getType(SrcReg);
830  if (!SrcTy.isScalar())
831  return UnableToLegalize;
832 
833  Register Dst0Reg = MI.getOperand(0).getReg();
834  LLT DstTy = MRI.getType(Dst0Reg);
835  if (!DstTy.isScalar())
836  return UnableToLegalize;
837 
838  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
839  LLT NewSrcTy = LLT::scalar(NewSrcSize);
840  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
841 
842  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
843 
844  for (unsigned I = 1; I != NumDst; ++I) {
845  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
846  auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
847  WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
848  }
849 
850  Observer.changingInstr(MI);
851 
852  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
853  for (unsigned I = 0; I != NumDst; ++I)
854  widenScalarDst(MI, WideTy, I);
855 
856  Observer.changedInstr(MI);
857 
858  return Legalized;
859 }
860 
862 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
863  LLT WideTy) {
864  Register DstReg = MI.getOperand(0).getReg();
865  Register SrcReg = MI.getOperand(1).getReg();
866  LLT SrcTy = MRI.getType(SrcReg);
867 
868  LLT DstTy = MRI.getType(DstReg);
869  unsigned Offset = MI.getOperand(2).getImm();
870 
871  if (TypeIdx == 0) {
872  if (SrcTy.isVector() || DstTy.isVector())
873  return UnableToLegalize;
874 
875  SrcOp Src(SrcReg);
876  if (SrcTy.isPointer()) {
877  // Extracts from pointers can be handled only if they are really just
878  // simple integers.
879  const DataLayout &DL = MIRBuilder.getDataLayout();
881  return UnableToLegalize;
882 
883  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
884  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
885  SrcTy = SrcAsIntTy;
886  }
887 
888  if (DstTy.isPointer())
889  return UnableToLegalize;
890 
891  if (Offset == 0) {
892  // Avoid a shift in the degenerate case.
893  MIRBuilder.buildTrunc(DstReg,
894  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
895  MI.eraseFromParent();
896  return Legalized;
897  }
898 
899  // Do a shift in the source type.
900  LLT ShiftTy = SrcTy;
901  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
902  Src = MIRBuilder.buildAnyExt(WideTy, Src);
903  ShiftTy = WideTy;
904  } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
905  return UnableToLegalize;
906 
907  auto LShr = MIRBuilder.buildLShr(
908  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
909  MIRBuilder.buildTrunc(DstReg, LShr);
910  MI.eraseFromParent();
911  return Legalized;
912  }
913 
914  if (SrcTy.isScalar()) {
915  Observer.changingInstr(MI);
916  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
917  Observer.changedInstr(MI);
918  return Legalized;
919  }
920 
921  if (!SrcTy.isVector())
922  return UnableToLegalize;
923 
924  if (DstTy != SrcTy.getElementType())
925  return UnableToLegalize;
926 
927  if (Offset % SrcTy.getScalarSizeInBits() != 0)
928  return UnableToLegalize;
929 
930  Observer.changingInstr(MI);
931  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
932 
933  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
934  Offset);
935  widenScalarDst(MI, WideTy.getScalarType(), 0);
936  Observer.changedInstr(MI);
937  return Legalized;
938 }
939 
941 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
942  LLT WideTy) {
943  if (TypeIdx != 0)
944  return UnableToLegalize;
945  Observer.changingInstr(MI);
946  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
947  widenScalarDst(MI, WideTy);
948  Observer.changedInstr(MI);
949  return Legalized;
950 }
951 
953 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
954  MIRBuilder.setInstr(MI);
955 
956  switch (MI.getOpcode()) {
957  default:
958  return UnableToLegalize;
959  case TargetOpcode::G_EXTRACT:
960  return widenScalarExtract(MI, TypeIdx, WideTy);
961  case TargetOpcode::G_INSERT:
962  return widenScalarInsert(MI, TypeIdx, WideTy);
963  case TargetOpcode::G_MERGE_VALUES:
964  return widenScalarMergeValues(MI, TypeIdx, WideTy);
965  case TargetOpcode::G_UNMERGE_VALUES:
966  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
967  case TargetOpcode::G_UADDO:
968  case TargetOpcode::G_USUBO: {
969  if (TypeIdx == 1)
970  return UnableToLegalize; // TODO
971  auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
972  {MI.getOperand(2).getReg()});
973  auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
974  {MI.getOperand(3).getReg()});
975  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
976  ? TargetOpcode::G_ADD
977  : TargetOpcode::G_SUB;
978  // Do the arithmetic in the larger type.
979  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
980  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
982  auto AndOp = MIRBuilder.buildInstr(
983  TargetOpcode::G_AND, {WideTy},
984  {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
985  // There is no overflow if the AndOp is the same as NewOp.
987  AndOp);
988  // Now trunc the NewOp to the original result.
989  MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
990  MI.eraseFromParent();
991  return Legalized;
992  }
993  case TargetOpcode::G_CTTZ:
994  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
995  case TargetOpcode::G_CTLZ:
996  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
997  case TargetOpcode::G_CTPOP: {
998  if (TypeIdx == 0) {
999  Observer.changingInstr(MI);
1000  widenScalarDst(MI, WideTy, 0);
1001  Observer.changedInstr(MI);
1002  return Legalized;
1003  }
1004 
1005  Register SrcReg = MI.getOperand(1).getReg();
1006 
1007  // First ZEXT the input.
1008  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1009  LLT CurTy = MRI.getType(SrcReg);
1010  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1011  // The count is the same in the larger type except if the original
1012  // value was zero. This can be handled by setting the bit just off
1013  // the top of the original type.
1014  auto TopBit =
1015  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1016  MIBSrc = MIRBuilder.buildOr(
1017  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1018  }
1019 
1020  // Perform the operation at the larger size.
1021  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1022  // This is already the correct result for CTPOP and CTTZs
1023  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1024  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1025  // The correct result is NewOp - (Difference in widety and current ty).
1026  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1027  MIBNewOp = MIRBuilder.buildInstr(
1028  TargetOpcode::G_SUB, {WideTy},
1029  {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1030  }
1031 
1032  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1033  MI.eraseFromParent();
1034  return Legalized;
1035  }
1036  case TargetOpcode::G_BSWAP: {
1037  Observer.changingInstr(MI);
1038  Register DstReg = MI.getOperand(0).getReg();
1039 
1040  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1041  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1042  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1043  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1044 
1045  MI.getOperand(0).setReg(DstExt);
1046 
1048 
1049  LLT Ty = MRI.getType(DstReg);
1050  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1051  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1052  MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1053  .addDef(ShrReg)
1054  .addUse(DstExt)
1055  .addUse(ShiftAmtReg);
1056 
1057  MIRBuilder.buildTrunc(DstReg, ShrReg);
1058  Observer.changedInstr(MI);
1059  return Legalized;
1060  }
1061  case TargetOpcode::G_ADD:
1062  case TargetOpcode::G_AND:
1063  case TargetOpcode::G_MUL:
1064  case TargetOpcode::G_OR:
1065  case TargetOpcode::G_XOR:
1066  case TargetOpcode::G_SUB:
1067  // Perform operation at larger width (any extension is fines here, high bits
1068  // don't affect the result) and then truncate the result back to the
1069  // original type.
1070  Observer.changingInstr(MI);
1071  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1072  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1073  widenScalarDst(MI, WideTy);
1074  Observer.changedInstr(MI);
1075  return Legalized;
1076 
1077  case TargetOpcode::G_SHL:
1078  Observer.changingInstr(MI);
1079 
1080  if (TypeIdx == 0) {
1081  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1082  widenScalarDst(MI, WideTy);
1083  } else {
1084  assert(TypeIdx == 1);
1085  // The "number of bits to shift" operand must preserve its value as an
1086  // unsigned integer:
1087  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1088  }
1089 
1090  Observer.changedInstr(MI);
1091  return Legalized;
1092 
1093  case TargetOpcode::G_SDIV:
1094  case TargetOpcode::G_SREM:
1095  case TargetOpcode::G_SMIN:
1096  case TargetOpcode::G_SMAX:
1097  Observer.changingInstr(MI);
1098  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1099  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1100  widenScalarDst(MI, WideTy);
1101  Observer.changedInstr(MI);
1102  return Legalized;
1103 
1104  case TargetOpcode::G_ASHR:
1105  case TargetOpcode::G_LSHR:
1106  Observer.changingInstr(MI);
1107 
1108  if (TypeIdx == 0) {
1109  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1110  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1111 
1112  widenScalarSrc(MI, WideTy, 1, CvtOp);
1113  widenScalarDst(MI, WideTy);
1114  } else {
1115  assert(TypeIdx == 1);
1116  // The "number of bits to shift" operand must preserve its value as an
1117  // unsigned integer:
1118  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1119  }
1120 
1121  Observer.changedInstr(MI);
1122  return Legalized;
1123  case TargetOpcode::G_UDIV:
1124  case TargetOpcode::G_UREM:
1125  case TargetOpcode::G_UMIN:
1126  case TargetOpcode::G_UMAX:
1127  Observer.changingInstr(MI);
1128  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1129  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1130  widenScalarDst(MI, WideTy);
1131  Observer.changedInstr(MI);
1132  return Legalized;
1133 
1134  case TargetOpcode::G_SELECT:
1135  Observer.changingInstr(MI);
1136  if (TypeIdx == 0) {
1137  // Perform operation at larger width (any extension is fine here, high
1138  // bits don't affect the result) and then truncate the result back to the
1139  // original type.
1140  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1141  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1142  widenScalarDst(MI, WideTy);
1143  } else {
1144  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1145  // Explicit extension is required here since high bits affect the result.
1146  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1147  }
1148  Observer.changedInstr(MI);
1149  return Legalized;
1150 
1151  case TargetOpcode::G_FPTOSI:
1152  case TargetOpcode::G_FPTOUI:
1153  if (TypeIdx != 0)
1154  return UnableToLegalize;
1155  Observer.changingInstr(MI);
1156  widenScalarDst(MI, WideTy);
1157  Observer.changedInstr(MI);
1158  return Legalized;
1159 
1160  case TargetOpcode::G_SITOFP:
1161  if (TypeIdx != 1)
1162  return UnableToLegalize;
1163  Observer.changingInstr(MI);
1164  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1165  Observer.changedInstr(MI);
1166  return Legalized;
1167 
1168  case TargetOpcode::G_UITOFP:
1169  if (TypeIdx != 1)
1170  return UnableToLegalize;
1171  Observer.changingInstr(MI);
1172  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1173  Observer.changedInstr(MI);
1174  return Legalized;
1175 
1176  case TargetOpcode::G_LOAD:
1177  case TargetOpcode::G_SEXTLOAD:
1178  case TargetOpcode::G_ZEXTLOAD:
1179  Observer.changingInstr(MI);
1180  widenScalarDst(MI, WideTy);
1181  Observer.changedInstr(MI);
1182  return Legalized;
1183 
1184  case TargetOpcode::G_STORE: {
1185  if (TypeIdx != 0)
1186  return UnableToLegalize;
1187 
1188  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1189  if (!isPowerOf2_32(Ty.getSizeInBits()))
1190  return UnableToLegalize;
1191 
1192  Observer.changingInstr(MI);
1193 
1194  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1195  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1196  widenScalarSrc(MI, WideTy, 0, ExtType);
1197 
1198  Observer.changedInstr(MI);
1199  return Legalized;
1200  }
1201  case TargetOpcode::G_CONSTANT: {
1202  MachineOperand &SrcMO = MI.getOperand(1);
1204  const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1205  Observer.changingInstr(MI);
1206  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1207 
1208  widenScalarDst(MI, WideTy);
1209  Observer.changedInstr(MI);
1210  return Legalized;
1211  }
1212  case TargetOpcode::G_FCONSTANT: {
1213  MachineOperand &SrcMO = MI.getOperand(1);
1215  APFloat Val = SrcMO.getFPImm()->getValueAPF();
1216  bool LosesInfo;
1217  switch (WideTy.getSizeInBits()) {
1218  case 32:
1220  &LosesInfo);
1221  break;
1222  case 64:
1224  &LosesInfo);
1225  break;
1226  default:
1227  return UnableToLegalize;
1228  }
1229 
1230  assert(!LosesInfo && "extend should always be lossless");
1231 
1232  Observer.changingInstr(MI);
1233  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1234 
1235  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1236  Observer.changedInstr(MI);
1237  return Legalized;
1238  }
1239  case TargetOpcode::G_IMPLICIT_DEF: {
1240  Observer.changingInstr(MI);
1241  widenScalarDst(MI, WideTy);
1242  Observer.changedInstr(MI);
1243  return Legalized;
1244  }
1245  case TargetOpcode::G_BRCOND:
1246  Observer.changingInstr(MI);
1247  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1248  Observer.changedInstr(MI);
1249  return Legalized;
1250 
1251  case TargetOpcode::G_FCMP:
1252  Observer.changingInstr(MI);
1253  if (TypeIdx == 0)
1254  widenScalarDst(MI, WideTy);
1255  else {
1256  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1257  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1258  }
1259  Observer.changedInstr(MI);
1260  return Legalized;
1261 
1262  case TargetOpcode::G_ICMP:
1263  Observer.changingInstr(MI);
1264  if (TypeIdx == 0)
1265  widenScalarDst(MI, WideTy);
1266  else {
1267  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1268  MI.getOperand(1).getPredicate()))
1269  ? TargetOpcode::G_SEXT
1270  : TargetOpcode::G_ZEXT;
1271  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1272  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1273  }
1274  Observer.changedInstr(MI);
1275  return Legalized;
1276 
1277  case TargetOpcode::G_GEP:
1278  assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1279  Observer.changingInstr(MI);
1280  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1281  Observer.changedInstr(MI);
1282  return Legalized;
1283 
1284  case TargetOpcode::G_PHI: {
1285  assert(TypeIdx == 0 && "Expecting only Idx 0");
1286 
1287  Observer.changingInstr(MI);
1288  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1289  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1290  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1291  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1292  }
1293 
1294  MachineBasicBlock &MBB = *MI.getParent();
1295  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1296  widenScalarDst(MI, WideTy);
1297  Observer.changedInstr(MI);
1298  return Legalized;
1299  }
1300  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1301  if (TypeIdx == 0) {
1302  Register VecReg = MI.getOperand(1).getReg();
1303  LLT VecTy = MRI.getType(VecReg);
1304  Observer.changingInstr(MI);
1305 
1306  widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1307  WideTy.getSizeInBits()),
1308  1, TargetOpcode::G_SEXT);
1309 
1310  widenScalarDst(MI, WideTy, 0);
1311  Observer.changedInstr(MI);
1312  return Legalized;
1313  }
1314 
1315  if (TypeIdx != 2)
1316  return UnableToLegalize;
1317  Observer.changingInstr(MI);
1318  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1319  Observer.changedInstr(MI);
1320  return Legalized;
1321  }
1322  case TargetOpcode::G_FADD:
1323  case TargetOpcode::G_FMUL:
1324  case TargetOpcode::G_FSUB:
1325  case TargetOpcode::G_FMA:
1326  case TargetOpcode::G_FNEG:
1327  case TargetOpcode::G_FABS:
1328  case TargetOpcode::G_FCANONICALIZE:
1329  case TargetOpcode::G_FDIV:
1330  case TargetOpcode::G_FREM:
1331  case TargetOpcode::G_FCEIL:
1332  case TargetOpcode::G_FFLOOR:
1333  case TargetOpcode::G_FCOS:
1334  case TargetOpcode::G_FSIN:
1335  case TargetOpcode::G_FLOG10:
1336  case TargetOpcode::G_FLOG:
1337  case TargetOpcode::G_FLOG2:
1338  case TargetOpcode::G_FRINT:
1339  case TargetOpcode::G_FNEARBYINT:
1340  case TargetOpcode::G_FSQRT:
1341  case TargetOpcode::G_FEXP:
1342  case TargetOpcode::G_FEXP2:
1343  case TargetOpcode::G_FPOW:
1344  case TargetOpcode::G_INTRINSIC_TRUNC:
1345  case TargetOpcode::G_INTRINSIC_ROUND:
1346  assert(TypeIdx == 0);
1347  Observer.changingInstr(MI);
1348 
1349  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1350  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1351 
1352  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1353  Observer.changedInstr(MI);
1354  return Legalized;
1355  case TargetOpcode::G_INTTOPTR:
1356  if (TypeIdx != 1)
1357  return UnableToLegalize;
1358 
1359  Observer.changingInstr(MI);
1360  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1361  Observer.changedInstr(MI);
1362  return Legalized;
1363  case TargetOpcode::G_PTRTOINT:
1364  if (TypeIdx != 0)
1365  return UnableToLegalize;
1366 
1367  Observer.changingInstr(MI);
1368  widenScalarDst(MI, WideTy, 0);
1369  Observer.changedInstr(MI);
1370  return Legalized;
1371  }
1372 }
1373 
1375 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1376  using namespace TargetOpcode;
1377  MIRBuilder.setInstr(MI);
1378 
1379  switch(MI.getOpcode()) {
1380  default:
1381  return UnableToLegalize;
1382  case TargetOpcode::G_SREM:
1383  case TargetOpcode::G_UREM: {
1384  Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1385  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1386  .addDef(QuotReg)
1387  .addUse(MI.getOperand(1).getReg())
1388  .addUse(MI.getOperand(2).getReg());
1389 
1390  Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1391  MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1393  ProdReg);
1394  MI.eraseFromParent();
1395  return Legalized;
1396  }
1397  case TargetOpcode::G_SMULO:
1398  case TargetOpcode::G_UMULO: {
1399  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1400  // result.
1401  Register Res = MI.getOperand(0).getReg();
1402  Register Overflow = MI.getOperand(1).getReg();
1403  Register LHS = MI.getOperand(2).getReg();
1404  Register RHS = MI.getOperand(3).getReg();
1405 
1406  MIRBuilder.buildMul(Res, LHS, RHS);
1407 
1408  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1409  ? TargetOpcode::G_SMULH
1410  : TargetOpcode::G_UMULH;
1411 
1412  Register HiPart = MRI.createGenericVirtualRegister(Ty);
1413  MIRBuilder.buildInstr(Opcode)
1414  .addDef(HiPart)
1415  .addUse(LHS)
1416  .addUse(RHS);
1417 
1418  Register Zero = MRI.createGenericVirtualRegister(Ty);
1419  MIRBuilder.buildConstant(Zero, 0);
1420 
1421  // For *signed* multiply, overflow is detected by checking:
1422  // (hi != (lo >> bitwidth-1))
1423  if (Opcode == TargetOpcode::G_SMULH) {
1424  Register Shifted = MRI.createGenericVirtualRegister(Ty);
1425  Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1426  MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1427  MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1428  .addDef(Shifted)
1429  .addUse(Res)
1430  .addUse(ShiftAmt);
1431  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1432  } else {
1433  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1434  }
1435  MI.eraseFromParent();
1436  return Legalized;
1437  }
1438  case TargetOpcode::G_FNEG: {
1439  // TODO: Handle vector types once we are able to
1440  // represent them.
1441  if (Ty.isVector())
1442  return UnableToLegalize;
1443  Register Res = MI.getOperand(0).getReg();
1444  Type *ZeroTy;
1446  switch (Ty.getSizeInBits()) {
1447  case 16:
1448  ZeroTy = Type::getHalfTy(Ctx);
1449  break;
1450  case 32:
1451  ZeroTy = Type::getFloatTy(Ctx);
1452  break;
1453  case 64:
1454  ZeroTy = Type::getDoubleTy(Ctx);
1455  break;
1456  case 128:
1457  ZeroTy = Type::getFP128Ty(Ctx);
1458  break;
1459  default:
1460  llvm_unreachable("unexpected floating-point type");
1461  }
1462  ConstantFP &ZeroForNegation =
1463  *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1464  auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1465  Register SubByReg = MI.getOperand(1).getReg();
1466  Register ZeroReg = Zero->getOperand(0).getReg();
1467  MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1468  MI.getFlags());
1469  MI.eraseFromParent();
1470  return Legalized;
1471  }
1472  case TargetOpcode::G_FSUB: {
1473  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1474  // First, check if G_FNEG is marked as Lower. If so, we may
1475  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1476  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1477  return UnableToLegalize;
1478  Register Res = MI.getOperand(0).getReg();
1479  Register LHS = MI.getOperand(1).getReg();
1480  Register RHS = MI.getOperand(2).getReg();
1481  Register Neg = MRI.createGenericVirtualRegister(Ty);
1482  MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1483  MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
1484  MI.eraseFromParent();
1485  return Legalized;
1486  }
1487  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1488  Register OldValRes = MI.getOperand(0).getReg();
1489  Register SuccessRes = MI.getOperand(1).getReg();
1490  Register Addr = MI.getOperand(2).getReg();
1491  Register CmpVal = MI.getOperand(3).getReg();
1492  Register NewVal = MI.getOperand(4).getReg();
1493  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1494  **MI.memoperands_begin());
1495  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1496  MI.eraseFromParent();
1497  return Legalized;
1498  }
1499  case TargetOpcode::G_LOAD:
1500  case TargetOpcode::G_SEXTLOAD:
1501  case TargetOpcode::G_ZEXTLOAD: {
1502  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1503  Register DstReg = MI.getOperand(0).getReg();
1504  Register PtrReg = MI.getOperand(1).getReg();
1505  LLT DstTy = MRI.getType(DstReg);
1506  auto &MMO = **MI.memoperands_begin();
1507 
1508  if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1509  // In the case of G_LOAD, this was a non-extending load already and we're
1510  // about to lower to the same instruction.
1511  if (MI.getOpcode() == TargetOpcode::G_LOAD)
1512  return UnableToLegalize;
1513  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1514  MI.eraseFromParent();
1515  return Legalized;
1516  }
1517 
1518  if (DstTy.isScalar()) {
1519  Register TmpReg =
1520  MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
1521  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1522  switch (MI.getOpcode()) {
1523  default:
1524  llvm_unreachable("Unexpected opcode");
1525  case TargetOpcode::G_LOAD:
1526  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1527  break;
1528  case TargetOpcode::G_SEXTLOAD:
1529  MIRBuilder.buildSExt(DstReg, TmpReg);
1530  break;
1531  case TargetOpcode::G_ZEXTLOAD:
1532  MIRBuilder.buildZExt(DstReg, TmpReg);
1533  break;
1534  }
1535  MI.eraseFromParent();
1536  return Legalized;
1537  }
1538 
1539  return UnableToLegalize;
1540  }
1541  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1542  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1543  case TargetOpcode::G_CTLZ:
1544  case TargetOpcode::G_CTTZ:
1545  case TargetOpcode::G_CTPOP:
1546  return lowerBitCount(MI, TypeIdx, Ty);
1547  case G_UADDO: {
1548  Register Res = MI.getOperand(0).getReg();
1549  Register CarryOut = MI.getOperand(1).getReg();
1550  Register LHS = MI.getOperand(2).getReg();
1551  Register RHS = MI.getOperand(3).getReg();
1552 
1553  MIRBuilder.buildAdd(Res, LHS, RHS);
1554  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
1555 
1556  MI.eraseFromParent();
1557  return Legalized;
1558  }
1559  case G_UADDE: {
1560  Register Res = MI.getOperand(0).getReg();
1561  Register CarryOut = MI.getOperand(1).getReg();
1562  Register LHS = MI.getOperand(2).getReg();
1563  Register RHS = MI.getOperand(3).getReg();
1564  Register CarryIn = MI.getOperand(4).getReg();
1565 
1566  Register TmpRes = MRI.createGenericVirtualRegister(Ty);
1567  Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1568 
1569  MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1570  MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1571  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1572  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1573 
1574  MI.eraseFromParent();
1575  return Legalized;
1576  }
1577  case G_USUBO: {
1578  Register Res = MI.getOperand(0).getReg();
1579  Register BorrowOut = MI.getOperand(1).getReg();
1580  Register LHS = MI.getOperand(2).getReg();
1581  Register RHS = MI.getOperand(3).getReg();
1582 
1583  MIRBuilder.buildSub(Res, LHS, RHS);
1584  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1585 
1586  MI.eraseFromParent();
1587  return Legalized;
1588  }
1589  case G_USUBE: {
1590  Register Res = MI.getOperand(0).getReg();
1591  Register BorrowOut = MI.getOperand(1).getReg();
1592  Register LHS = MI.getOperand(2).getReg();
1593  Register RHS = MI.getOperand(3).getReg();
1594  Register BorrowIn = MI.getOperand(4).getReg();
1595 
1596  Register TmpRes = MRI.createGenericVirtualRegister(Ty);
1597  Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1598  Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1599  Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1600 
1601  MIRBuilder.buildSub(TmpRes, LHS, RHS);
1602  MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1603  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1604  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1605  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1606  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1607 
1608  MI.eraseFromParent();
1609  return Legalized;
1610  }
1611  case G_UITOFP:
1612  return lowerUITOFP(MI, TypeIdx, Ty);
1613  case G_SITOFP:
1614  return lowerSITOFP(MI, TypeIdx, Ty);
1615  }
1616 }
1617 
1618 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
1619  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1620  SmallVector<Register, 2> DstRegs;
1621 
1622  unsigned NarrowSize = NarrowTy.getSizeInBits();
1623  Register DstReg = MI.getOperand(0).getReg();
1624  unsigned Size = MRI.getType(DstReg).getSizeInBits();
1625  int NumParts = Size / NarrowSize;
1626  // FIXME: Don't know how to handle the situation where the small vectors
1627  // aren't all the same size yet.
1628  if (Size % NarrowSize != 0)
1629  return UnableToLegalize;
1630 
1631  for (int i = 0; i < NumParts; ++i) {
1632  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1633  MIRBuilder.buildUndef(TmpReg);
1634  DstRegs.push_back(TmpReg);
1635  }
1636 
1637  if (NarrowTy.isVector())
1638  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1639  else
1640  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1641 
1642  MI.eraseFromParent();
1643  return Legalized;
1644 }
1645 
1647 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
1648  LLT NarrowTy) {
1649  const unsigned Opc = MI.getOpcode();
1650  const unsigned NumOps = MI.getNumOperands() - 1;
1651  const unsigned NarrowSize = NarrowTy.getSizeInBits();
1652  const unsigned DstReg = MI.getOperand(0).getReg();
1653  const unsigned Flags = MI.getFlags();
1654  const LLT DstTy = MRI.getType(DstReg);
1655  const unsigned Size = DstTy.getSizeInBits();
1656  const int NumParts = Size / NarrowSize;
1657  const LLT EltTy = DstTy.getElementType();
1658  const unsigned EltSize = EltTy.getSizeInBits();
1659  const unsigned BitsForNumParts = NarrowSize * NumParts;
1660 
1661  // Check if we have any leftovers. If we do, then only handle the case where
1662  // the leftover is one element.
1663  if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
1664  return UnableToLegalize;
1665 
1666  if (BitsForNumParts != Size) {
1667  Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
1668  MIRBuilder.buildUndef(AccumDstReg);
1669 
1670  // Handle the pieces which evenly divide into the requested type with
1671  // extract/op/insert sequence.
1672  for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
1673  SmallVector<SrcOp, 4> SrcOps;
1674  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1675  Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
1676  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
1677  SrcOps.push_back(PartOpReg);
1678  }
1679 
1680  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
1681  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1682 
1683  Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
1684  MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
1685  AccumDstReg = PartInsertReg;
1686  }
1687 
1688  // Handle the remaining element sized leftover piece.
1689  SmallVector<SrcOp, 4> SrcOps;
1690  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1691  Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
1692  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
1693  BitsForNumParts);
1694  SrcOps.push_back(PartOpReg);
1695  }
1696 
1697  Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
1698  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1699  MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
1700  MI.eraseFromParent();
1701 
1702  return Legalized;
1703  }
1704 
1705  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1706 
1707  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1708 
1709  if (NumOps >= 2)
1710  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1711 
1712  if (NumOps >= 3)
1713  extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1714 
1715  for (int i = 0; i < NumParts; ++i) {
1716  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1717 
1718  if (NumOps == 1)
1719  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1720  else if (NumOps == 2) {
1721  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1722  } else if (NumOps == 3) {
1723  MIRBuilder.buildInstr(Opc, {DstReg},
1724  {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1725  }
1726 
1727  DstRegs.push_back(DstReg);
1728  }
1729 
1730  if (NarrowTy.isVector())
1731  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1732  else
1733  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1734 
1735  MI.eraseFromParent();
1736  return Legalized;
1737 }
1738 
1739 // Handle splitting vector operations which need to have the same number of
1740 // elements in each type index, but each type index may have a different element
1741 // type.
1742 //
1743 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
1744 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1745 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1746 //
1747 // Also handles some irregular breakdown cases, e.g.
1748 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
1749 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1750 // s64 = G_SHL s64, s32
1752 LegalizerHelper::fewerElementsVectorMultiEltType(
1753  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
1754  if (TypeIdx != 0)
1755  return UnableToLegalize;
1756 
1757  const LLT NarrowTy0 = NarrowTyArg;
1758  const unsigned NewNumElts =
1759  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
1760 
1761  const Register DstReg = MI.getOperand(0).getReg();
1762  LLT DstTy = MRI.getType(DstReg);
1763  LLT LeftoverTy0;
1764 
1765  int NumParts, NumLeftover;
1766  // All of the operands need to have the same number of elements, so if we can
1767  // determine a type breakdown for the result type, we can for all of the
1768  // source types.
1769  std::tie(NumParts, NumLeftover)
1770  = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0);
1771  if (NumParts < 0)
1772  return UnableToLegalize;
1773 
1775 
1776  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
1777  SmallVector<Register, 4> PartRegs, LeftoverRegs;
1778 
1779  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1780  LLT LeftoverTy;
1781  Register SrcReg = MI.getOperand(I).getReg();
1782  LLT SrcTyI = MRI.getType(SrcReg);
1783  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
1784  LLT LeftoverTyI;
1785 
1786  // Split this operand into the requested typed registers, and any leftover
1787  // required to reproduce the original type.
1788  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
1789  LeftoverRegs))
1790  return UnableToLegalize;
1791 
1792  if (I == 1) {
1793  // For the first operand, create an instruction for each part and setup
1794  // the result.
1795  for (Register PartReg : PartRegs) {
1796  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1798  .addDef(PartDstReg)
1799  .addUse(PartReg));
1800  DstRegs.push_back(PartDstReg);
1801  }
1802 
1803  for (Register LeftoverReg : LeftoverRegs) {
1804  Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
1806  .addDef(PartDstReg)
1807  .addUse(LeftoverReg));
1808  LeftoverDstRegs.push_back(PartDstReg);
1809  }
1810  } else {
1811  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
1812 
1813  // Add the newly created operand splits to the existing instructions. The
1814  // odd-sized pieces are ordered after the requested NarrowTyArg sized
1815  // pieces.
1816  unsigned InstCount = 0;
1817  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
1818  NewInsts[InstCount++].addUse(PartRegs[J]);
1819  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
1820  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
1821  }
1822 
1823  PartRegs.clear();
1824  LeftoverRegs.clear();
1825  }
1826 
1827  // Insert the newly built operations and rebuild the result register.
1828  for (auto &MIB : NewInsts)
1829  MIRBuilder.insertInstr(MIB);
1830 
1831  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
1832 
1833  MI.eraseFromParent();
1834  return Legalized;
1835 }
1836 
1838 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
1839  LLT NarrowTy) {
1840  if (TypeIdx != 0)
1841  return UnableToLegalize;
1842 
1843  Register DstReg = MI.getOperand(0).getReg();
1844  Register SrcReg = MI.getOperand(1).getReg();
1845  LLT DstTy = MRI.getType(DstReg);
1846  LLT SrcTy = MRI.getType(SrcReg);
1847 
1848  LLT NarrowTy0 = NarrowTy;
1849  LLT NarrowTy1;
1850  unsigned NumParts;
1851 
1852  if (NarrowTy.isVector()) {
1853  // Uneven breakdown not handled.
1854  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
1855  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
1856  return UnableToLegalize;
1857 
1858  NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
1859  } else {
1860  NumParts = DstTy.getNumElements();
1861  NarrowTy1 = SrcTy.getElementType();
1862  }
1863 
1864  SmallVector<Register, 4> SrcRegs, DstRegs;
1865  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
1866 
1867  for (unsigned I = 0; I < NumParts; ++I) {
1868  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1869  MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
1870  .addDef(DstReg)
1871  .addUse(SrcRegs[I]);
1872 
1873  NewInst->setFlags(MI.getFlags());
1874  DstRegs.push_back(DstReg);
1875  }
1876 
1877  if (NarrowTy.isVector())
1878  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1879  else
1880  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1881 
1882  MI.eraseFromParent();
1883  return Legalized;
1884 }
1885 
1887 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
1888  LLT NarrowTy) {
1889  Register DstReg = MI.getOperand(0).getReg();
1890  Register Src0Reg = MI.getOperand(2).getReg();
1891  LLT DstTy = MRI.getType(DstReg);
1892  LLT SrcTy = MRI.getType(Src0Reg);
1893 
1894  unsigned NumParts;
1895  LLT NarrowTy0, NarrowTy1;
1896 
1897  if (TypeIdx == 0) {
1898  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1899  unsigned OldElts = DstTy.getNumElements();
1900 
1901  NarrowTy0 = NarrowTy;
1902  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
1903  NarrowTy1 = NarrowTy.isVector() ?
1904  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
1905  SrcTy.getElementType();
1906 
1907  } else {
1908  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1909  unsigned OldElts = SrcTy.getNumElements();
1910 
1911  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
1912  NarrowTy.getNumElements();
1913  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
1914  DstTy.getScalarSizeInBits());
1915  NarrowTy1 = NarrowTy;
1916  }
1917 
1918  // FIXME: Don't know how to handle the situation where the small vectors
1919  // aren't all the same size yet.
1920  if (NarrowTy1.isVector() &&
1921  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
1922  return UnableToLegalize;
1923 
1924  CmpInst::Predicate Pred
1925  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1926 
1927  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
1928  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
1929  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
1930 
1931  for (unsigned I = 0; I < NumParts; ++I) {
1932  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1933  DstRegs.push_back(DstReg);
1934 
1935  if (MI.getOpcode() == TargetOpcode::G_ICMP)
1936  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1937  else {
1938  MachineInstr *NewCmp
1939  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1940  NewCmp->setFlags(MI.getFlags());
1941  }
1942  }
1943 
1944  if (NarrowTy1.isVector())
1945  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1946  else
1947  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1948 
1949  MI.eraseFromParent();
1950  return Legalized;
1951 }
1952 
1954 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
1955  LLT NarrowTy) {
1956  Register DstReg = MI.getOperand(0).getReg();
1957  Register CondReg = MI.getOperand(1).getReg();
1958 
1959  unsigned NumParts = 0;
1960  LLT NarrowTy0, NarrowTy1;
1961 
1962  LLT DstTy = MRI.getType(DstReg);
1963  LLT CondTy = MRI.getType(CondReg);
1964  unsigned Size = DstTy.getSizeInBits();
1965 
1966  assert(TypeIdx == 0 || CondTy.isVector());
1967 
1968  if (TypeIdx == 0) {
1969  NarrowTy0 = NarrowTy;
1970  NarrowTy1 = CondTy;
1971 
1972  unsigned NarrowSize = NarrowTy0.getSizeInBits();
1973  // FIXME: Don't know how to handle the situation where the small vectors
1974  // aren't all the same size yet.
1975  if (Size % NarrowSize != 0)
1976  return UnableToLegalize;
1977 
1978  NumParts = Size / NarrowSize;
1979 
1980  // Need to break down the condition type
1981  if (CondTy.isVector()) {
1982  if (CondTy.getNumElements() == NumParts)
1983  NarrowTy1 = CondTy.getElementType();
1984  else
1985  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
1986  CondTy.getScalarSizeInBits());
1987  }
1988  } else {
1989  NumParts = CondTy.getNumElements();
1990  if (NarrowTy.isVector()) {
1991  // TODO: Handle uneven breakdown.
1992  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
1993  return UnableToLegalize;
1994 
1995  return UnableToLegalize;
1996  } else {
1997  NarrowTy0 = DstTy.getElementType();
1998  NarrowTy1 = NarrowTy;
1999  }
2000  }
2001 
2002  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2003  if (CondTy.isVector())
2004  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2005 
2006  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2007  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2008 
2009  for (unsigned i = 0; i < NumParts; ++i) {
2010  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2011  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2012  Src1Regs[i], Src2Regs[i]);
2013  DstRegs.push_back(DstReg);
2014  }
2015 
2016  if (NarrowTy0.isVector())
2017  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2018  else
2019  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2020 
2021  MI.eraseFromParent();
2022  return Legalized;
2023 }
2024 
2026 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2027  LLT NarrowTy) {
2028  const Register DstReg = MI.getOperand(0).getReg();
2029  LLT PhiTy = MRI.getType(DstReg);
2030  LLT LeftoverTy;
2031 
2032  // All of the operands need to have the same number of elements, so if we can
2033  // determine a type breakdown for the result type, we can for all of the
2034  // source types.
2035  int NumParts, NumLeftover;
2036  std::tie(NumParts, NumLeftover)
2037  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2038  if (NumParts < 0)
2039  return UnableToLegalize;
2040 
2041  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2043 
2044  const int TotalNumParts = NumParts + NumLeftover;
2045 
2046  // Insert the new phis in the result block first.
2047  for (int I = 0; I != TotalNumParts; ++I) {
2048  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2049  Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2050  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2051  .addDef(PartDstReg));
2052  if (I < NumParts)
2053  DstRegs.push_back(PartDstReg);
2054  else
2055  LeftoverDstRegs.push_back(PartDstReg);
2056  }
2057 
2058  MachineBasicBlock *MBB = MI.getParent();
2059  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2060  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2061 
2062  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2063 
2064  // Insert code to extract the incoming values in each predecessor block.
2065  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2066  PartRegs.clear();
2067  LeftoverRegs.clear();
2068 
2069  Register SrcReg = MI.getOperand(I).getReg();
2070  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2071  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2072 
2073  LLT Unused;
2074  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2075  LeftoverRegs))
2076  return UnableToLegalize;
2077 
2078  // Add the newly created operand splits to the existing instructions. The
2079  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2080  // pieces.
2081  for (int J = 0; J != TotalNumParts; ++J) {
2082  MachineInstrBuilder MIB = NewInsts[J];
2083  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2084  MIB.addMBB(&OpMBB);
2085  }
2086  }
2087 
2088  MI.eraseFromParent();
2089  return Legalized;
2090 }
2091 
2093 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2094  LLT NarrowTy) {
2095  // FIXME: Don't know how to handle secondary types yet.
2096  if (TypeIdx != 0)
2097  return UnableToLegalize;
2098 
2099  MachineMemOperand *MMO = *MI.memoperands_begin();
2100 
2101  // This implementation doesn't work for atomics. Give up instead of doing
2102  // something invalid.
2103  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2105  return UnableToLegalize;
2106 
2107  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2108  Register ValReg = MI.getOperand(0).getReg();
2109  Register AddrReg = MI.getOperand(1).getReg();
2110  LLT ValTy = MRI.getType(ValReg);
2111 
2112  int NumParts = -1;
2113  int NumLeftover = -1;
2114  LLT LeftoverTy;
2115  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2116  if (IsLoad) {
2117  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2118  } else {
2119  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2120  NarrowLeftoverRegs)) {
2121  NumParts = NarrowRegs.size();
2122  NumLeftover = NarrowLeftoverRegs.size();
2123  }
2124  }
2125 
2126  if (NumParts == -1)
2127  return UnableToLegalize;
2128 
2129  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2130 
2131  unsigned TotalSize = ValTy.getSizeInBits();
2132 
2133  // Split the load/store into PartTy sized pieces starting at Offset. If this
2134  // is a load, return the new registers in ValRegs. For a store, each elements
2135  // of ValRegs should be PartTy. Returns the next offset that needs to be
2136  // handled.
2137  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2138  unsigned Offset) -> unsigned {
2140  unsigned PartSize = PartTy.getSizeInBits();
2141  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2142  Offset += PartSize, ++Idx) {
2143  unsigned ByteSize = PartSize / 8;
2144  unsigned ByteOffset = Offset / 8;
2145  Register NewAddrReg;
2146 
2147  MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2148 
2149  MachineMemOperand *NewMMO =
2150  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2151 
2152  if (IsLoad) {
2153  Register Dst = MRI.createGenericVirtualRegister(PartTy);
2154  ValRegs.push_back(Dst);
2155  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2156  } else {
2157  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2158  }
2159  }
2160 
2161  return Offset;
2162  };
2163 
2164  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2165 
2166  // Handle the rest of the register if this isn't an even type breakdown.
2167  if (LeftoverTy.isValid())
2168  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2169 
2170  if (IsLoad) {
2171  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2172  LeftoverTy, NarrowLeftoverRegs);
2173  }
2174 
2175  MI.eraseFromParent();
2176  return Legalized;
2177 }
2178 
2181  LLT NarrowTy) {
2182  using namespace TargetOpcode;
2183 
2184  MIRBuilder.setInstr(MI);
2185  switch (MI.getOpcode()) {
2186  case G_IMPLICIT_DEF:
2187  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2188  case G_AND:
2189  case G_OR:
2190  case G_XOR:
2191  case G_ADD:
2192  case G_SUB:
2193  case G_MUL:
2194  case G_SMULH:
2195  case G_UMULH:
2196  case G_FADD:
2197  case G_FMUL:
2198  case G_FSUB:
2199  case G_FNEG:
2200  case G_FABS:
2201  case G_FCANONICALIZE:
2202  case G_FDIV:
2203  case G_FREM:
2204  case G_FMA:
2205  case G_FPOW:
2206  case G_FEXP:
2207  case G_FEXP2:
2208  case G_FLOG:
2209  case G_FLOG2:
2210  case G_FLOG10:
2211  case G_FNEARBYINT:
2212  case G_FCEIL:
2213  case G_FFLOOR:
2214  case G_FRINT:
2215  case G_INTRINSIC_ROUND:
2216  case G_INTRINSIC_TRUNC:
2217  case G_FCOS:
2218  case G_FSIN:
2219  case G_FSQRT:
2220  case G_BSWAP:
2221  case G_SDIV:
2222  case G_SMIN:
2223  case G_SMAX:
2224  case G_UMIN:
2225  case G_UMAX:
2226  return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2227  case G_SHL:
2228  case G_LSHR:
2229  case G_ASHR:
2230  case G_CTLZ:
2231  case G_CTLZ_ZERO_UNDEF:
2232  case G_CTTZ:
2233  case G_CTTZ_ZERO_UNDEF:
2234  case G_CTPOP:
2235  case G_FCOPYSIGN:
2236  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2237  case G_ZEXT:
2238  case G_SEXT:
2239  case G_ANYEXT:
2240  case G_FPEXT:
2241  case G_FPTRUNC:
2242  case G_SITOFP:
2243  case G_UITOFP:
2244  case G_FPTOSI:
2245  case G_FPTOUI:
2246  case G_INTTOPTR:
2247  case G_PTRTOINT:
2248  case G_ADDRSPACE_CAST:
2249  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2250  case G_ICMP:
2251  case G_FCMP:
2252  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2253  case G_SELECT:
2254  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2255  case G_PHI:
2256  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
2257  case G_LOAD:
2258  case G_STORE:
2259  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2260  default:
2261  return UnableToLegalize;
2262  }
2263 }
2264 
2266 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
2267  const LLT HalfTy, const LLT AmtTy) {
2268 
2269  Register InL = MRI.createGenericVirtualRegister(HalfTy);
2270  Register InH = MRI.createGenericVirtualRegister(HalfTy);
2271  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2272 
2273  if (Amt.isNullValue()) {
2274  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2275  MI.eraseFromParent();
2276  return Legalized;
2277  }
2278 
2279  LLT NVT = HalfTy;
2280  unsigned NVTBits = HalfTy.getSizeInBits();
2281  unsigned VTBits = 2 * NVTBits;
2282 
2283  SrcOp Lo(Register(0)), Hi(Register(0));
2284  if (MI.getOpcode() == TargetOpcode::G_SHL) {
2285  if (Amt.ugt(VTBits)) {
2286  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2287  } else if (Amt.ugt(NVTBits)) {
2288  Lo = MIRBuilder.buildConstant(NVT, 0);
2289  Hi = MIRBuilder.buildShl(NVT, InL,
2290  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2291  } else if (Amt == NVTBits) {
2292  Lo = MIRBuilder.buildConstant(NVT, 0);
2293  Hi = InL;
2294  } else {
2295  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2296  auto OrLHS =
2297  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2298  auto OrRHS = MIRBuilder.buildLShr(
2299  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2300  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2301  }
2302  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2303  if (Amt.ugt(VTBits)) {
2304  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2305  } else if (Amt.ugt(NVTBits)) {
2306  Lo = MIRBuilder.buildLShr(NVT, InH,
2307  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2308  Hi = MIRBuilder.buildConstant(NVT, 0);
2309  } else if (Amt == NVTBits) {
2310  Lo = InH;
2311  Hi = MIRBuilder.buildConstant(NVT, 0);
2312  } else {
2313  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2314 
2315  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2316  auto OrRHS = MIRBuilder.buildShl(
2317  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2318 
2319  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2320  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2321  }
2322  } else {
2323  if (Amt.ugt(VTBits)) {
2324  Hi = Lo = MIRBuilder.buildAShr(
2325  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2326  } else if (Amt.ugt(NVTBits)) {
2327  Lo = MIRBuilder.buildAShr(NVT, InH,
2328  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2329  Hi = MIRBuilder.buildAShr(NVT, InH,
2330  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2331  } else if (Amt == NVTBits) {
2332  Lo = InH;
2333  Hi = MIRBuilder.buildAShr(NVT, InH,
2334  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2335  } else {
2336  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2337 
2338  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2339  auto OrRHS = MIRBuilder.buildShl(
2340  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2341 
2342  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2343  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2344  }
2345  }
2346 
2347  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2348  MI.eraseFromParent();
2349 
2350  return Legalized;
2351 }
2352 
2353 // TODO: Optimize if constant shift amount.
2355 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
2356  LLT RequestedTy) {
2357  if (TypeIdx == 1) {
2358  Observer.changingInstr(MI);
2359  narrowScalarSrc(MI, RequestedTy, 2);
2360  Observer.changedInstr(MI);
2361  return Legalized;
2362  }
2363 
2364  Register DstReg = MI.getOperand(0).getReg();
2365  LLT DstTy = MRI.getType(DstReg);
2366  if (DstTy.isVector())
2367  return UnableToLegalize;
2368 
2369  Register Amt = MI.getOperand(2).getReg();
2370  LLT ShiftAmtTy = MRI.getType(Amt);
2371  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2372  if (DstEltSize % 2 != 0)
2373  return UnableToLegalize;
2374 
2375  // Ignore the input type. We can only go to exactly half the size of the
2376  // input. If that isn't small enough, the resulting pieces will be further
2377  // legalized.
2378  const unsigned NewBitSize = DstEltSize / 2;
2379  const LLT HalfTy = LLT::scalar(NewBitSize);
2380  const LLT CondTy = LLT::scalar(1);
2381 
2382  if (const MachineInstr *KShiftAmt =
2383  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2384  return narrowScalarShiftByConstant(
2385  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2386  }
2387 
2388  // TODO: Expand with known bits.
2389 
2390  // Handle the fully general expansion by an unknown amount.
2391  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2392 
2393  Register InL = MRI.createGenericVirtualRegister(HalfTy);
2394  Register InH = MRI.createGenericVirtualRegister(HalfTy);
2395  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2396 
2397  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2398  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2399 
2400  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2401  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2402  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2403 
2404  Register ResultRegs[2];
2405  switch (MI.getOpcode()) {
2406  case TargetOpcode::G_SHL: {
2407  // Short: ShAmt < NewBitSize
2408  auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2409 
2410  auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2411  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2412  auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2413 
2414  // Long: ShAmt >= NewBitSize
2415  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
2416  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2417 
2418  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2419  auto Hi = MIRBuilder.buildSelect(
2420  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2421 
2422  ResultRegs[0] = Lo.getReg(0);
2423  ResultRegs[1] = Hi.getReg(0);
2424  break;
2425  }
2426  case TargetOpcode::G_LSHR: {
2427  // Short: ShAmt < NewBitSize
2428  auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2429 
2430  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2431  auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2432  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2433 
2434  // Long: ShAmt >= NewBitSize
2435  auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
2436  auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2437 
2438  auto Lo = MIRBuilder.buildSelect(
2439  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2440  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2441 
2442  ResultRegs[0] = Lo.getReg(0);
2443  ResultRegs[1] = Hi.getReg(0);
2444  break;
2445  }
2446  case TargetOpcode::G_ASHR: {
2447  // Short: ShAmt < NewBitSize
2448  auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
2449 
2450  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2451  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
2452  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2453 
2454  // Long: ShAmt >= NewBitSize
2455 
2456  // Sign of Hi part.
2457  auto HiL = MIRBuilder.buildAShr(
2458  HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
2459 
2460  auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2461 
2462  auto Lo = MIRBuilder.buildSelect(
2463  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2464 
2465  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2466 
2467  ResultRegs[0] = Lo.getReg(0);
2468  ResultRegs[1] = Hi.getReg(0);
2469  break;
2470  }
2471  default:
2472  llvm_unreachable("not a shift");
2473  }
2474 
2475  MIRBuilder.buildMerge(DstReg, ResultRegs);
2476  MI.eraseFromParent();
2477  return Legalized;
2478 }
2479 
2481 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2482  LLT MoreTy) {
2483  assert(TypeIdx == 0 && "Expecting only Idx 0");
2484 
2485  Observer.changingInstr(MI);
2486  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2487  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2488  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2489  moreElementsVectorSrc(MI, MoreTy, I);
2490  }
2491 
2492  MachineBasicBlock &MBB = *MI.getParent();
2493  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2494  moreElementsVectorDst(MI, MoreTy, 0);
2495  Observer.changedInstr(MI);
2496  return Legalized;
2497 }
2498 
2501  LLT MoreTy) {
2502  MIRBuilder.setInstr(MI);
2503  unsigned Opc = MI.getOpcode();
2504  switch (Opc) {
2505  case TargetOpcode::G_IMPLICIT_DEF: {
2506  Observer.changingInstr(MI);
2507  moreElementsVectorDst(MI, MoreTy, 0);
2508  Observer.changedInstr(MI);
2509  return Legalized;
2510  }
2511  case TargetOpcode::G_AND:
2512  case TargetOpcode::G_OR:
2513  case TargetOpcode::G_XOR:
2514  case TargetOpcode::G_SMIN:
2515  case TargetOpcode::G_SMAX:
2516  case TargetOpcode::G_UMIN:
2517  case TargetOpcode::G_UMAX: {
2518  Observer.changingInstr(MI);
2519  moreElementsVectorSrc(MI, MoreTy, 1);
2520  moreElementsVectorSrc(MI, MoreTy, 2);
2521  moreElementsVectorDst(MI, MoreTy, 0);
2522  Observer.changedInstr(MI);
2523  return Legalized;
2524  }
2525  case TargetOpcode::G_EXTRACT:
2526  if (TypeIdx != 1)
2527  return UnableToLegalize;
2528  Observer.changingInstr(MI);
2529  moreElementsVectorSrc(MI, MoreTy, 1);
2530  Observer.changedInstr(MI);
2531  return Legalized;
2532  case TargetOpcode::G_INSERT:
2533  if (TypeIdx != 0)
2534  return UnableToLegalize;
2535  Observer.changingInstr(MI);
2536  moreElementsVectorSrc(MI, MoreTy, 1);
2537  moreElementsVectorDst(MI, MoreTy, 0);
2538  Observer.changedInstr(MI);
2539  return Legalized;
2540  case TargetOpcode::G_SELECT:
2541  if (TypeIdx != 0)
2542  return UnableToLegalize;
2543  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
2544  return UnableToLegalize;
2545 
2546  Observer.changingInstr(MI);
2547  moreElementsVectorSrc(MI, MoreTy, 2);
2548  moreElementsVectorSrc(MI, MoreTy, 3);
2549  moreElementsVectorDst(MI, MoreTy, 0);
2550  Observer.changedInstr(MI);
2551  return Legalized;
2552  case TargetOpcode::G_PHI:
2553  return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
2554  default:
2555  return UnableToLegalize;
2556  }
2557 }
2558 
2559 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
2560  ArrayRef<Register> Src1Regs,
2561  ArrayRef<Register> Src2Regs,
2562  LLT NarrowTy) {
2564  unsigned SrcParts = Src1Regs.size();
2565  unsigned DstParts = DstRegs.size();
2566 
2567  unsigned DstIdx = 0; // Low bits of the result.
2568  Register FactorSum =
2569  B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
2570  DstRegs[DstIdx] = FactorSum;
2571 
2572  unsigned CarrySumPrevDstIdx;
2573  SmallVector<Register, 4> Factors;
2574 
2575  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
2576  // Collect low parts of muls for DstIdx.
2577  for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
2578  i <= std::min(DstIdx, SrcParts - 1); ++i) {
2579  MachineInstrBuilder Mul =
2580  B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
2581  Factors.push_back(Mul.getReg(0));
2582  }
2583  // Collect high parts of muls from previous DstIdx.
2584  for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
2585  i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
2586  MachineInstrBuilder Umulh =
2587  B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
2588  Factors.push_back(Umulh.getReg(0));
2589  }
2590  // Add CarrySum from additons calculated for previous DstIdx.
2591  if (DstIdx != 1) {
2592  Factors.push_back(CarrySumPrevDstIdx);
2593  }
2594 
2595  Register CarrySum;
2596  // Add all factors and accumulate all carries into CarrySum.
2597  if (DstIdx != DstParts - 1) {
2598  MachineInstrBuilder Uaddo =
2599  B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
2600  FactorSum = Uaddo.getReg(0);
2601  CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
2602  for (unsigned i = 2; i < Factors.size(); ++i) {
2603  MachineInstrBuilder Uaddo =
2604  B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
2605  FactorSum = Uaddo.getReg(0);
2606  MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
2607  CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
2608  }
2609  } else {
2610  // Since value for the next index is not calculated, neither is CarrySum.
2611  FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
2612  for (unsigned i = 2; i < Factors.size(); ++i)
2613  FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
2614  }
2615 
2616  CarrySumPrevDstIdx = CarrySum;
2617  DstRegs[DstIdx] = FactorSum;
2618  Factors.clear();
2619  }
2620 }
2621 
2623 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
2624  Register DstReg = MI.getOperand(0).getReg();
2625  Register Src1 = MI.getOperand(1).getReg();
2626  Register Src2 = MI.getOperand(2).getReg();
2627 
2628  LLT Ty = MRI.getType(DstReg);
2629  if (Ty.isVector())
2630  return UnableToLegalize;
2631 
2632  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
2633  unsigned DstSize = Ty.getSizeInBits();
2634  unsigned NarrowSize = NarrowTy.getSizeInBits();
2635  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
2636  return UnableToLegalize;
2637 
2638  unsigned NumDstParts = DstSize / NarrowSize;
2639  unsigned NumSrcParts = SrcSize / NarrowSize;
2640  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
2641  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
2642 
2643  SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
2644  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
2645  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
2646  DstTmpRegs.resize(DstTmpParts);
2647  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
2648 
2649  // Take only high half of registers if this is high mul.
2650  ArrayRef<Register> DstRegs(
2651  IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
2652  MIRBuilder.buildMerge(DstReg, DstRegs);
2653  MI.eraseFromParent();
2654  return Legalized;
2655 }
2656 
2658 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2659  LLT NarrowTy) {
2660  if (TypeIdx != 1)
2661  return UnableToLegalize;
2662 
2663  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2664 
2665  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2666  // FIXME: add support for when SizeOp1 isn't an exact multiple of
2667  // NarrowSize.
2668  if (SizeOp1 % NarrowSize != 0)
2669  return UnableToLegalize;
2670  int NumParts = SizeOp1 / NarrowSize;
2671 
2672  SmallVector<Register, 2> SrcRegs, DstRegs;
2673  SmallVector<uint64_t, 2> Indexes;
2674  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2675 
2676  Register OpReg = MI.getOperand(0).getReg();
2677  uint64_t OpStart = MI.getOperand(2).getImm();
2678  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2679  for (int i = 0; i < NumParts; ++i) {
2680  unsigned SrcStart = i * NarrowSize;
2681 
2682  if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
2683  // No part of the extract uses this subregister, ignore it.
2684  continue;
2685  } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2686  // The entire subregister is extracted, forward the value.
2687  DstRegs.push_back(SrcRegs[i]);
2688  continue;
2689  }
2690 
2691  // OpSegStart is where this destination segment would start in OpReg if it
2692  // extended infinitely in both directions.
2693  int64_t ExtractOffset;
2694  uint64_t SegSize;
2695  if (OpStart < SrcStart) {
2696  ExtractOffset = 0;
2697  SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
2698  } else {
2699  ExtractOffset = OpStart - SrcStart;
2700  SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
2701  }
2702 
2703  Register SegReg = SrcRegs[i];
2704  if (ExtractOffset != 0 || SegSize != NarrowSize) {
2705  // A genuine extract is needed.
2706  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2707  MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
2708  }
2709 
2710  DstRegs.push_back(SegReg);
2711  }
2712 
2713  Register DstReg = MI.getOperand(0).getReg();
2714  if(MRI.getType(DstReg).isVector())
2715  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2716  else
2717  MIRBuilder.buildMerge(DstReg, DstRegs);
2718  MI.eraseFromParent();
2719  return Legalized;
2720 }
2721 
2723 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2724  LLT NarrowTy) {
2725  // FIXME: Don't know how to handle secondary types yet.
2726  if (TypeIdx != 0)
2727  return UnableToLegalize;
2728 
2729  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2730  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2731 
2732  // FIXME: add support for when SizeOp0 isn't an exact multiple of
2733  // NarrowSize.
2734  if (SizeOp0 % NarrowSize != 0)
2735  return UnableToLegalize;
2736 
2737  int NumParts = SizeOp0 / NarrowSize;
2738 
2739  SmallVector<Register, 2> SrcRegs, DstRegs;
2740  SmallVector<uint64_t, 2> Indexes;
2741  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2742 
2743  Register OpReg = MI.getOperand(2).getReg();
2744  uint64_t OpStart = MI.getOperand(3).getImm();
2745  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2746  for (int i = 0; i < NumParts; ++i) {
2747  unsigned DstStart = i * NarrowSize;
2748 
2749  if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
2750  // No part of the insert affects this subregister, forward the original.
2751  DstRegs.push_back(SrcRegs[i]);
2752  continue;
2753  } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2754  // The entire subregister is defined by this insert, forward the new
2755  // value.
2756  DstRegs.push_back(OpReg);
2757  continue;
2758  }
2759 
2760  // OpSegStart is where this destination segment would start in OpReg if it
2761  // extended infinitely in both directions.
2762  int64_t ExtractOffset, InsertOffset;
2763  uint64_t SegSize;
2764  if (OpStart < DstStart) {
2765  InsertOffset = 0;
2766  ExtractOffset = DstStart - OpStart;
2767  SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
2768  } else {
2769  InsertOffset = OpStart - DstStart;
2770  ExtractOffset = 0;
2771  SegSize =
2772  std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
2773  }
2774 
2775  Register SegReg = OpReg;
2776  if (ExtractOffset != 0 || SegSize != OpSize) {
2777  // A genuine extract is needed.
2778  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2779  MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
2780  }
2781 
2782  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2783  MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
2784  DstRegs.push_back(DstReg);
2785  }
2786 
2787  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
2788  Register DstReg = MI.getOperand(0).getReg();
2789  if(MRI.getType(DstReg).isVector())
2790  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2791  else
2792  MIRBuilder.buildMerge(DstReg, DstRegs);
2793  MI.eraseFromParent();
2794  return Legalized;
2795 }
2796 
2798 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
2799  LLT NarrowTy) {
2800  Register DstReg = MI.getOperand(0).getReg();
2801  LLT DstTy = MRI.getType(DstReg);
2802 
2803  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
2804 
2805  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
2806  SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
2807  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
2808  LLT LeftoverTy;
2809  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
2810  Src0Regs, Src0LeftoverRegs))
2811  return UnableToLegalize;
2812 
2813  LLT Unused;
2814  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
2815  Src1Regs, Src1LeftoverRegs))
2816  llvm_unreachable("inconsistent extractParts result");
2817 
2818  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
2819  auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2820  {Src0Regs[I], Src1Regs[I]});
2821  DstRegs.push_back(Inst->getOperand(0).getReg());
2822  }
2823 
2824  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
2825  auto Inst = MIRBuilder.buildInstr(
2826  MI.getOpcode(),
2827  {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
2828  DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
2829  }
2830 
2831  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
2832  LeftoverTy, DstLeftoverRegs);
2833 
2834  MI.eraseFromParent();
2835  return Legalized;
2836 }
2837 
2839 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
2840  LLT NarrowTy) {
2841  if (TypeIdx != 0)
2842  return UnableToLegalize;
2843 
2844  Register CondReg = MI.getOperand(1).getReg();
2845  LLT CondTy = MRI.getType(CondReg);
2846  if (CondTy.isVector()) // TODO: Handle vselect
2847  return UnableToLegalize;
2848 
2849  Register DstReg = MI.getOperand(0).getReg();
2850  LLT DstTy = MRI.getType(DstReg);
2851 
2852  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
2853  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
2854  SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
2855  LLT LeftoverTy;
2856  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
2857  Src1Regs, Src1LeftoverRegs))
2858  return UnableToLegalize;
2859 
2860  LLT Unused;
2861  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
2862  Src2Regs, Src2LeftoverRegs))
2863  llvm_unreachable("inconsistent extractParts result");
2864 
2865  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
2866  auto Select = MIRBuilder.buildSelect(NarrowTy,
2867  CondReg, Src1Regs[I], Src2Regs[I]);
2868  DstRegs.push_back(Select->getOperand(0).getReg());
2869  }
2870 
2871  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
2872  auto Select = MIRBuilder.buildSelect(
2873  LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
2874  DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
2875  }
2876 
2877  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
2878  LeftoverTy, DstLeftoverRegs);
2879 
2880  MI.eraseFromParent();
2881  return Legalized;
2882 }
2883 
2885 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2886  unsigned Opc = MI.getOpcode();
2887  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
2888  auto isSupported = [this](const LegalityQuery &Q) {
2889  auto QAction = LI.getAction(Q).Action;
2890  return QAction == Legal || QAction == Libcall || QAction == Custom;
2891  };
2892  switch (Opc) {
2893  default:
2894  return UnableToLegalize;
2895  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
2896  // This trivially expands to CTLZ.
2897  Observer.changingInstr(MI);
2898  MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
2899  Observer.changedInstr(MI);
2900  return Legalized;
2901  }
2902  case TargetOpcode::G_CTLZ: {
2903  Register SrcReg = MI.getOperand(1).getReg();
2904  unsigned Len = Ty.getSizeInBits();
2905  if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
2906  // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
2907  auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
2908  {Ty}, {SrcReg});
2909  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2910  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2911  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2912  SrcReg, MIBZero);
2913  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2914  MIBCtlzZU);
2915  MI.eraseFromParent();
2916  return Legalized;
2917  }
2918  // for now, we do this:
2919  // NewLen = NextPowerOf2(Len);
2920  // x = x | (x >> 1);
2921  // x = x | (x >> 2);
2922  // ...
2923  // x = x | (x >>16);
2924  // x = x | (x >>32); // for 64-bit input
2925  // Upto NewLen/2
2926  // return Len - popcount(x);
2927  //
2928  // Ref: "Hacker's Delight" by Henry Warren
2929  Register Op = SrcReg;
2930  unsigned NewLen = PowerOf2Ceil(Len);
2931  for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
2932  auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
2933  auto MIBOp = MIRBuilder.buildInstr(
2934  TargetOpcode::G_OR, {Ty},
2935  {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
2936  {Op, MIBShiftAmt})});
2937  Op = MIBOp->getOperand(0).getReg();
2938  }
2939  auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
2940  MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2941  {MIRBuilder.buildConstant(Ty, Len), MIBPop});
2942  MI.eraseFromParent();
2943  return Legalized;
2944  }
2945  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
2946  // This trivially expands to CTTZ.
2947  Observer.changingInstr(MI);
2948  MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
2949  Observer.changedInstr(MI);
2950  return Legalized;
2951  }
2952  case TargetOpcode::G_CTTZ: {
2953  Register SrcReg = MI.getOperand(1).getReg();
2954  unsigned Len = Ty.getSizeInBits();
2955  if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
2956  // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
2957  // zero.
2958  auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
2959  {Ty}, {SrcReg});
2960  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2961  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2962  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2963  SrcReg, MIBZero);
2964  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2965  MIBCttzZU);
2966  MI.eraseFromParent();
2967  return Legalized;
2968  }
2969  // for now, we use: { return popcount(~x & (x - 1)); }
2970  // unless the target has ctlz but not ctpop, in which case we use:
2971  // { return 32 - nlz(~x & (x-1)); }
2972  // Ref: "Hacker's Delight" by Henry Warren
2973  auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
2974  auto MIBNot =
2975  MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
2976  auto MIBTmp = MIRBuilder.buildInstr(
2977  TargetOpcode::G_AND, {Ty},
2978  {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
2979  {SrcReg, MIBCstNeg1})});
2980  if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
2981  isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
2982  auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
2984  TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2985  {MIBCstLen,
2986  MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
2987  MI.eraseFromParent();
2988  return Legalized;
2989  }
2990  MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
2991  MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
2992  return Legalized;
2993  }
2994  }
2995 }
2996 
2997 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
2998 // representation.
3000 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
3001  Register Dst = MI.getOperand(0).getReg();
3002  Register Src = MI.getOperand(1).getReg();
3003  const LLT S64 = LLT::scalar(64);
3004  const LLT S32 = LLT::scalar(32);
3005  const LLT S1 = LLT::scalar(1);
3006 
3007  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3008 
3009  // unsigned cul2f(ulong u) {
3010  // uint lz = clz(u);
3011  // uint e = (u != 0) ? 127U + 63U - lz : 0;
3012  // u = (u << lz) & 0x7fffffffffffffffUL;
3013  // ulong t = u & 0xffffffffffUL;
3014  // uint v = (e << 23) | (uint)(u >> 40);
3015  // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3016  // return as_float(v + r);
3017  // }
3018 
3019  auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3020  auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3021 
3022  auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3023 
3024  auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3025  auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3026 
3027  auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3028  auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3029 
3030  auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3031  auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3032 
3033  auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3034 
3035  auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3036  auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3037 
3038  auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3039  auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3040  auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3041 
3042  auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3043  auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3044  auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3045  auto One = MIRBuilder.buildConstant(S32, 1);
3046 
3047  auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3048  auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3049  auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3050  MIRBuilder.buildAdd(Dst, V, R);
3051 
3052  return Legalized;
3053 }
3054 
3056 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3057  Register Dst = MI.getOperand(0).getReg();
3058  Register Src = MI.getOperand(1).getReg();
3059  LLT DstTy = MRI.getType(Dst);
3060  LLT SrcTy = MRI.getType(Src);
3061 
3062  if (SrcTy != LLT::scalar(64))
3063  return UnableToLegalize;
3064 
3065  if (DstTy == LLT::scalar(32)) {
3066  // TODO: SelectionDAG has several alternative expansions to port which may
3067  // be more reasonble depending on the available instructions. If a target
3068  // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3069  // intermediate type, this is probably worse.
3070  return lowerU64ToF32BitOps(MI);
3071  }
3072 
3073  return UnableToLegalize;
3074 }
3075 
3077 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3078  Register Dst = MI.getOperand(0).getReg();
3079  Register Src = MI.getOperand(1).getReg();
3080  LLT DstTy = MRI.getType(Dst);
3081  LLT SrcTy = MRI.getType(Src);
3082 
3083  const LLT S64 = LLT::scalar(64);
3084  const LLT S32 = LLT::scalar(32);
3085  const LLT S1 = LLT::scalar(1);
3086 
3087  if (SrcTy != S64)
3088  return UnableToLegalize;
3089 
3090  if (DstTy == S32) {
3091  // signed cl2f(long l) {
3092  // long s = l >> 63;
3093  // float r = cul2f((l + s) ^ s);
3094  // return s ? -r : r;
3095  // }
3096  Register L = Src;
3097  auto SignBit = MIRBuilder.buildConstant(S64, 63);
3098  auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3099 
3100  auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3101  auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3102  auto R = MIRBuilder.buildUITOFP(S32, Xor);
3103 
3104  auto RNeg = MIRBuilder.buildFNeg(S32, R);
3105  auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3106  MIRBuilder.buildConstant(S64, 0));
3107  MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3108  return Legalized;
3109  }
3110 
3111  return UnableToLegalize;
3112 }
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType)
uint64_t CallInst * C
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:164
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:833
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineBasicBlock * getMBB() const
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstrBuilder buildInsert(Register Res, Register Src, Register Op, unsigned Index)
Register getReg(unsigned Idx) const
Get the register for the operand index.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
unsigned getScalarSizeInBits() const
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
void setFPImm(const ConstantFP *CFP)
AtomicOrdering getFailureOrdering() const
For cmpxchg atomic operations, return the atomic ordering requirements when store does not occur...
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned Reg
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:62
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
virtual const TargetLowering * getTargetLowering() const
unsigned less than
Definition: InstrTypes.h:757
LLT getScalarType() const
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_OR Op0, Op1.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:810
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:371
static uint32_t Concat[]
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res, CarryOut = G_UADDO Op0, Op1.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Optional< MachineInstrBuilder > materializeGEP(Register &Res, Register Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type...
Definition: LegalizerInfo.h:52
MachineInstrBuilder buildStore(unsigned Val, unsigned Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
bool isVector() const
void setMF(MachineFunction &MF)
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
A description of a memory reference used in the backend.
bool isSigned() const
Definition: InstrTypes.h:902
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions. ...
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:163
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1, const SrcOp &CarryIn)
Build and insert Res, CarryOut = G_UADDE Op0, Op1, CarryIn.
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args)
Helper function that creates the given libcall.
AtomicOrdering getOrdering() const
Return the atomic ordering requirements for this memory operation.
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4483
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
virtual const TargetInstrInfo * getInstrInfo() const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:158
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
MachineInstr * getOpcodeDef(unsigned Opcode, unsigned Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:284
void setChangeObserver(GISelChangeObserver &Observer)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LegalizeResult legalizeInstrStep(MachineInstr &MI)
Replace MI by a sequence of legal instructions that can implement the same operation.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
Helper class to build MachineInstr.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:587
MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
void setImm(int64_t immVal)
virtual const CallLowering * getCallLowering() const
unsigned getAddressSpace() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:288
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
Some kind of error has occurred and we could not legalize this instruction.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
Instruction was already legal and no change was made to the MachineFunction.
size_t size() const
Definition: SmallVector.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
static Type * getFP128Ty(LLVMContext &C)
Definition: Type.cpp:168
const APFloat & getValueAPF() const
Definition: Constants.h:302
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:162
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:155
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeResult libcall(MachineInstr &MI)
Legalize an instruction by emiting a runtime library call instead.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:535
void setFlags(unsigned flags)
Definition: MachineInstr.h:305
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:81
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:694
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static MachineOperand CreateES(const char *SymName, unsigned char TargetFlags=0)
Register getReg() const
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType)
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
Representation of each machine instruction.
Definition: MachineInstr.h:63
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
Instruction has been legalized and the MachineFunction changed.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_FCMP PredOp0, Op1.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_ADD Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
static Constant * getZeroValueForNegation(Type *Ty)
Floating point negation must be implemented with f(x) = -0.0 - x.
Definition: Constants.cpp:780
uint32_t Size
Definition: Profile.cpp:46
void setCImm(const ConstantInt *CI)
const DataLayout & getDataLayout() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:291
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
This file describes how to lower LLVM calls to machine code calls.
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, unsigned CmpVal, unsigned NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:755
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register getReg() const
getReg - Returns the register number.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
const ConstantInt * getCImm() const
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:405
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:658
This file describes how to lower LLVM code to machine code.
unsigned getPredicate() const