LLVM  9.0.0svn
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
23 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33 ///
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
36 ///
37 /// Returns -1 in the first element of the pair if the breakdown is not
38 /// satisfiable.
39 static std::pair<int, int>
40 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
41  assert(!LeftoverTy.isValid() && "this is an out argument");
42 
43  unsigned Size = OrigTy.getSizeInBits();
44  unsigned NarrowSize = NarrowTy.getSizeInBits();
45  unsigned NumParts = Size / NarrowSize;
46  unsigned LeftoverSize = Size - NumParts * NarrowSize;
47  assert(Size > NarrowSize);
48 
49  if (LeftoverSize == 0)
50  return {NumParts, 0};
51 
52  if (NarrowTy.isVector()) {
53  unsigned EltSize = OrigTy.getScalarSizeInBits();
54  if (LeftoverSize % EltSize != 0)
55  return {-1, -1};
56  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
57  } else {
58  LeftoverTy = LLT::scalar(LeftoverSize);
59  }
60 
61  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
62  return std::make_pair(NumParts, NumLeftover);
63 }
64 
66  GISelChangeObserver &Observer,
67  MachineIRBuilder &Builder)
68  : MIRBuilder(Builder), MRI(MF.getRegInfo()),
69  LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
70  MIRBuilder.setMF(MF);
71  MIRBuilder.setChangeObserver(Observer);
72 }
73 
75  GISelChangeObserver &Observer,
77  : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
78  MIRBuilder.setMF(MF);
79  MIRBuilder.setChangeObserver(Observer);
80 }
83  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
84 
85  auto Step = LI.getAction(MI, MRI);
86  switch (Step.Action) {
87  case Legal:
88  LLVM_DEBUG(dbgs() << ".. Already legal\n");
89  return AlreadyLegal;
90  case Libcall:
91  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
92  return libcall(MI);
93  case NarrowScalar:
94  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
95  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
96  case WidenScalar:
97  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
98  return widenScalar(MI, Step.TypeIdx, Step.NewType);
99  case Lower:
100  LLVM_DEBUG(dbgs() << ".. Lower\n");
101  return lower(MI, Step.TypeIdx, Step.NewType);
102  case FewerElements:
103  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
104  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
105  case MoreElements:
106  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
107  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
108  case Custom:
109  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
110  return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
112  default:
113  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
114  return UnableToLegalize;
115  }
116 }
117 
118 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
119  SmallVectorImpl<unsigned> &VRegs) {
120  for (int i = 0; i < NumParts; ++i)
122  MIRBuilder.buildUnmerge(VRegs, Reg);
123 }
124 
125 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy,
126  LLT MainTy, LLT &LeftoverTy,
128  SmallVectorImpl<unsigned> &LeftoverRegs) {
129  assert(!LeftoverTy.isValid() && "this is an out argument");
130 
131  unsigned RegSize = RegTy.getSizeInBits();
132  unsigned MainSize = MainTy.getSizeInBits();
133  unsigned NumParts = RegSize / MainSize;
134  unsigned LeftoverSize = RegSize - NumParts * MainSize;
135 
136  // Use an unmerge when possible.
137  if (LeftoverSize == 0) {
138  for (unsigned I = 0; I < NumParts; ++I)
139  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
140  MIRBuilder.buildUnmerge(VRegs, Reg);
141  return true;
142  }
143 
144  if (MainTy.isVector()) {
145  unsigned EltSize = MainTy.getScalarSizeInBits();
146  if (LeftoverSize % EltSize != 0)
147  return false;
148  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
149  } else {
150  LeftoverTy = LLT::scalar(LeftoverSize);
151  }
152 
153  // For irregular sizes, extract the individual parts.
154  for (unsigned I = 0; I != NumParts; ++I) {
155  unsigned NewReg = MRI.createGenericVirtualRegister(MainTy);
156  VRegs.push_back(NewReg);
157  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
158  }
159 
160  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
161  Offset += LeftoverSize) {
162  unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
163  LeftoverRegs.push_back(NewReg);
164  MIRBuilder.buildExtract(NewReg, Reg, Offset);
165  }
166 
167  return true;
168 }
169 
170 void LegalizerHelper::insertParts(unsigned DstReg,
171  LLT ResultTy, LLT PartTy,
172  ArrayRef<unsigned> PartRegs,
173  LLT LeftoverTy,
174  ArrayRef<unsigned> LeftoverRegs) {
175  if (!LeftoverTy.isValid()) {
176  assert(LeftoverRegs.empty());
177 
178  if (!ResultTy.isVector()) {
179  MIRBuilder.buildMerge(DstReg, PartRegs);
180  return;
181  }
182 
183  if (PartTy.isVector())
184  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
185  else
186  MIRBuilder.buildBuildVector(DstReg, PartRegs);
187  return;
188  }
189 
190  unsigned PartSize = PartTy.getSizeInBits();
191  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
192 
193  unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
194  MIRBuilder.buildUndef(CurResultReg);
195 
196  unsigned Offset = 0;
197  for (unsigned PartReg : PartRegs) {
198  unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
199  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
200  CurResultReg = NewResultReg;
201  Offset += PartSize;
202  }
203 
204  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
205  // Use the original output register for the final insert to avoid a copy.
206  unsigned NewResultReg = (I + 1 == E) ?
207  DstReg : MRI.createGenericVirtualRegister(ResultTy);
208 
209  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
210  CurResultReg = NewResultReg;
211  Offset += LeftoverPartSize;
212  }
213 }
214 
215 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
216  switch (Opcode) {
217  case TargetOpcode::G_SDIV:
218  assert((Size == 32 || Size == 64) && "Unsupported size");
219  return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
220  case TargetOpcode::G_UDIV:
221  assert((Size == 32 || Size == 64) && "Unsupported size");
222  return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
223  case TargetOpcode::G_SREM:
224  assert((Size == 32 || Size == 64) && "Unsupported size");
225  return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
226  case TargetOpcode::G_UREM:
227  assert((Size == 32 || Size == 64) && "Unsupported size");
228  return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
229  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
230  assert(Size == 32 && "Unsupported size");
231  return RTLIB::CTLZ_I32;
232  case TargetOpcode::G_FADD:
233  assert((Size == 32 || Size == 64) && "Unsupported size");
234  return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
235  case TargetOpcode::G_FSUB:
236  assert((Size == 32 || Size == 64) && "Unsupported size");
237  return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
238  case TargetOpcode::G_FMUL:
239  assert((Size == 32 || Size == 64) && "Unsupported size");
240  return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
241  case TargetOpcode::G_FDIV:
242  assert((Size == 32 || Size == 64) && "Unsupported size");
243  return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
244  case TargetOpcode::G_FEXP:
245  assert((Size == 32 || Size == 64) && "Unsupported size");
246  return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
247  case TargetOpcode::G_FEXP2:
248  assert((Size == 32 || Size == 64) && "Unsupported size");
249  return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
250  case TargetOpcode::G_FREM:
251  return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
252  case TargetOpcode::G_FPOW:
253  return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
254  case TargetOpcode::G_FMA:
255  assert((Size == 32 || Size == 64) && "Unsupported size");
256  return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
257  case TargetOpcode::G_FSIN:
258  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
259  return Size == 128 ? RTLIB::SIN_F128
260  : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
261  case TargetOpcode::G_FCOS:
262  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
263  return Size == 128 ? RTLIB::COS_F128
264  : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
265  case TargetOpcode::G_FLOG10:
266  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
267  return Size == 128 ? RTLIB::LOG10_F128
268  : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
269  case TargetOpcode::G_FLOG:
270  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
271  return Size == 128 ? RTLIB::LOG_F128
272  : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
273  case TargetOpcode::G_FLOG2:
274  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
275  return Size == 128 ? RTLIB::LOG2_F128
276  : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
277  }
278  llvm_unreachable("Unknown libcall function");
279 }
280 
283  const CallLowering::ArgInfo &Result,
285  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
286  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
287  const char *Name = TLI.getLibcallName(Libcall);
288 
289  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
290  if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
291  MachineOperand::CreateES(Name), Result, Args))
293 
295 }
296 
297 // Useful for libcalls where all operands have the same type.
300  Type *OpType) {
301  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
302 
304  for (unsigned i = 1; i < MI.getNumOperands(); i++)
305  Args.push_back({MI.getOperand(i).getReg(), OpType});
306  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
307  Args);
308 }
309 
310 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
311  Type *FromType) {
312  auto ToMVT = MVT::getVT(ToType);
313  auto FromMVT = MVT::getVT(FromType);
314 
315  switch (Opcode) {
316  case TargetOpcode::G_FPEXT:
317  return RTLIB::getFPEXT(FromMVT, ToMVT);
318  case TargetOpcode::G_FPTRUNC:
319  return RTLIB::getFPROUND(FromMVT, ToMVT);
320  case TargetOpcode::G_FPTOSI:
321  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
322  case TargetOpcode::G_FPTOUI:
323  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
324  case TargetOpcode::G_SITOFP:
325  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
326  case TargetOpcode::G_UITOFP:
327  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
328  }
329  llvm_unreachable("Unsupported libcall function");
330 }
331 
334  Type *FromType) {
336  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
337  {{MI.getOperand(1).getReg(), FromType}});
338 }
339 
342  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
343  unsigned Size = LLTy.getSizeInBits();
344  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
345 
346  MIRBuilder.setInstr(MI);
347 
348  switch (MI.getOpcode()) {
349  default:
350  return UnableToLegalize;
351  case TargetOpcode::G_SDIV:
352  case TargetOpcode::G_UDIV:
353  case TargetOpcode::G_SREM:
354  case TargetOpcode::G_UREM:
355  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
356  Type *HLTy = IntegerType::get(Ctx, Size);
357  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
358  if (Status != Legalized)
359  return Status;
360  break;
361  }
362  case TargetOpcode::G_FADD:
363  case TargetOpcode::G_FSUB:
364  case TargetOpcode::G_FMUL:
365  case TargetOpcode::G_FDIV:
366  case TargetOpcode::G_FMA:
367  case TargetOpcode::G_FPOW:
368  case TargetOpcode::G_FREM:
369  case TargetOpcode::G_FCOS:
370  case TargetOpcode::G_FSIN:
371  case TargetOpcode::G_FLOG10:
372  case TargetOpcode::G_FLOG:
373  case TargetOpcode::G_FLOG2:
374  case TargetOpcode::G_FEXP:
375  case TargetOpcode::G_FEXP2: {
376  if (Size > 64) {
377  LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
378  return UnableToLegalize;
379  }
380  Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
381  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
382  if (Status != Legalized)
383  return Status;
384  break;
385  }
386  case TargetOpcode::G_FPEXT: {
387  // FIXME: Support other floating point types (half, fp128 etc)
388  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
389  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
390  if (ToSize != 64 || FromSize != 32)
391  return UnableToLegalize;
394  if (Status != Legalized)
395  return Status;
396  break;
397  }
398  case TargetOpcode::G_FPTRUNC: {
399  // FIXME: Support other floating point types (half, fp128 etc)
400  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
401  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
402  if (ToSize != 32 || FromSize != 64)
403  return UnableToLegalize;
406  if (Status != Legalized)
407  return Status;
408  break;
409  }
410  case TargetOpcode::G_FPTOSI:
411  case TargetOpcode::G_FPTOUI: {
412  // FIXME: Support other types
413  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
414  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
415  if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
416  return UnableToLegalize;
418  MI, MIRBuilder, Type::getInt32Ty(Ctx),
419  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
420  if (Status != Legalized)
421  return Status;
422  break;
423  }
424  case TargetOpcode::G_SITOFP:
425  case TargetOpcode::G_UITOFP: {
426  // FIXME: Support other types
427  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
428  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
429  if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
430  return UnableToLegalize;
432  MI, MIRBuilder,
433  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
434  Type::getInt32Ty(Ctx));
435  if (Status != Legalized)
436  return Status;
437  break;
438  }
439  }
440 
441  MI.eraseFromParent();
442  return Legalized;
443 }
444 
446  unsigned TypeIdx,
447  LLT NarrowTy) {
448  MIRBuilder.setInstr(MI);
449 
450  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
451  uint64_t NarrowSize = NarrowTy.getSizeInBits();
452 
453  switch (MI.getOpcode()) {
454  default:
455  return UnableToLegalize;
456  case TargetOpcode::G_IMPLICIT_DEF: {
457  // FIXME: add support for when SizeOp0 isn't an exact multiple of
458  // NarrowSize.
459  if (SizeOp0 % NarrowSize != 0)
460  return UnableToLegalize;
461  int NumParts = SizeOp0 / NarrowSize;
462 
463  SmallVector<unsigned, 2> DstRegs;
464  for (int i = 0; i < NumParts; ++i)
465  DstRegs.push_back(
466  MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
467 
468  unsigned DstReg = MI.getOperand(0).getReg();
469  if(MRI.getType(DstReg).isVector())
470  MIRBuilder.buildBuildVector(DstReg, DstRegs);
471  else
472  MIRBuilder.buildMerge(DstReg, DstRegs);
473  MI.eraseFromParent();
474  return Legalized;
475  }
476  case TargetOpcode::G_CONSTANT: {
477  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
478  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
479  unsigned TotalSize = Ty.getSizeInBits();
480  unsigned NarrowSize = NarrowTy.getSizeInBits();
481  int NumParts = TotalSize / NarrowSize;
482 
483  SmallVector<unsigned, 4> PartRegs;
484  for (int I = 0; I != NumParts; ++I) {
485  unsigned Offset = I * NarrowSize;
486  auto K = MIRBuilder.buildConstant(NarrowTy,
487  Val.lshr(Offset).trunc(NarrowSize));
488  PartRegs.push_back(K.getReg(0));
489  }
490 
491  LLT LeftoverTy;
492  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
493  SmallVector<unsigned, 1> LeftoverRegs;
494  if (LeftoverBits != 0) {
495  LeftoverTy = LLT::scalar(LeftoverBits);
496  auto K = MIRBuilder.buildConstant(
497  LeftoverTy,
498  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
499  LeftoverRegs.push_back(K.getReg(0));
500  }
501 
502  insertParts(MI.getOperand(0).getReg(),
503  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
504 
505  MI.eraseFromParent();
506  return Legalized;
507  }
508  case TargetOpcode::G_ADD: {
509  // FIXME: add support for when SizeOp0 isn't an exact multiple of
510  // NarrowSize.
511  if (SizeOp0 % NarrowSize != 0)
512  return UnableToLegalize;
513  // Expand in terms of carry-setting/consuming G_ADDE instructions.
514  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
515 
516  SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
517  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
518  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
519 
520  unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
521  MIRBuilder.buildConstant(CarryIn, 0);
522 
523  for (int i = 0; i < NumParts; ++i) {
524  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
525  unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
526 
527  MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
528  Src2Regs[i], CarryIn);
529 
530  DstRegs.push_back(DstReg);
531  CarryIn = CarryOut;
532  }
533  unsigned DstReg = MI.getOperand(0).getReg();
534  if(MRI.getType(DstReg).isVector())
535  MIRBuilder.buildBuildVector(DstReg, DstRegs);
536  else
537  MIRBuilder.buildMerge(DstReg, DstRegs);
538  MI.eraseFromParent();
539  return Legalized;
540  }
541  case TargetOpcode::G_SUB: {
542  // FIXME: add support for when SizeOp0 isn't an exact multiple of
543  // NarrowSize.
544  if (SizeOp0 % NarrowSize != 0)
545  return UnableToLegalize;
546 
547  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
548 
549  SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
550  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
551  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
552 
553  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
554  unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
555  MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
556  {Src1Regs[0], Src2Regs[0]});
557  DstRegs.push_back(DstReg);
558  unsigned BorrowIn = BorrowOut;
559  for (int i = 1; i < NumParts; ++i) {
560  DstReg = MRI.createGenericVirtualRegister(NarrowTy);
561  BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
562 
563  MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
564  {Src1Regs[i], Src2Regs[i], BorrowIn});
565 
566  DstRegs.push_back(DstReg);
567  BorrowIn = BorrowOut;
568  }
569  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
570  MI.eraseFromParent();
571  return Legalized;
572  }
573  case TargetOpcode::G_MUL:
574  case TargetOpcode::G_UMULH:
575  return narrowScalarMul(MI, NarrowTy);
576  case TargetOpcode::G_EXTRACT:
577  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
578  case TargetOpcode::G_INSERT:
579  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
580  case TargetOpcode::G_LOAD: {
581  const auto &MMO = **MI.memoperands_begin();
582  unsigned DstReg = MI.getOperand(0).getReg();
583  LLT DstTy = MRI.getType(DstReg);
584  if (DstTy.isVector())
585  return UnableToLegalize;
586 
587  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
588  unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
589  auto &MMO = **MI.memoperands_begin();
590  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
591  MIRBuilder.buildAnyExt(DstReg, TmpReg);
592  MI.eraseFromParent();
593  return Legalized;
594  }
595 
596  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
597  }
598  case TargetOpcode::G_ZEXTLOAD:
599  case TargetOpcode::G_SEXTLOAD: {
600  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
601  unsigned DstReg = MI.getOperand(0).getReg();
602  unsigned PtrReg = MI.getOperand(1).getReg();
603 
604  unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
605  auto &MMO = **MI.memoperands_begin();
606  if (MMO.getSizeInBits() == NarrowSize) {
607  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
608  } else {
609  unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
610  : TargetOpcode::G_SEXTLOAD;
611  MIRBuilder.buildInstr(ExtLoad)
612  .addDef(TmpReg)
613  .addUse(PtrReg)
614  .addMemOperand(&MMO);
615  }
616 
617  if (ZExt)
618  MIRBuilder.buildZExt(DstReg, TmpReg);
619  else
620  MIRBuilder.buildSExt(DstReg, TmpReg);
621 
622  MI.eraseFromParent();
623  return Legalized;
624  }
625  case TargetOpcode::G_STORE: {
626  const auto &MMO = **MI.memoperands_begin();
627 
628  unsigned SrcReg = MI.getOperand(0).getReg();
629  LLT SrcTy = MRI.getType(SrcReg);
630  if (SrcTy.isVector())
631  return UnableToLegalize;
632 
633  int NumParts = SizeOp0 / NarrowSize;
634  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
635  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
636  if (SrcTy.isVector() && LeftoverBits != 0)
637  return UnableToLegalize;
638 
639  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
640  unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
641  auto &MMO = **MI.memoperands_begin();
642  MIRBuilder.buildTrunc(TmpReg, SrcReg);
643  MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
644  MI.eraseFromParent();
645  return Legalized;
646  }
647 
648  return reduceLoadStoreWidth(MI, 0, NarrowTy);
649  }
650  case TargetOpcode::G_SELECT:
651  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
652  case TargetOpcode::G_AND:
653  case TargetOpcode::G_OR:
654  case TargetOpcode::G_XOR: {
655  // Legalize bitwise operation:
656  // A = BinOp<Ty> B, C
657  // into:
658  // B1, ..., BN = G_UNMERGE_VALUES B
659  // C1, ..., CN = G_UNMERGE_VALUES C
660  // A1 = BinOp<Ty/N> B1, C2
661  // ...
662  // AN = BinOp<Ty/N> BN, CN
663  // A = G_MERGE_VALUES A1, ..., AN
664  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
665  }
666  case TargetOpcode::G_SHL:
667  case TargetOpcode::G_LSHR:
668  case TargetOpcode::G_ASHR:
669  return narrowScalarShift(MI, TypeIdx, NarrowTy);
670  case TargetOpcode::G_CTLZ:
671  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
672  case TargetOpcode::G_CTTZ:
673  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
674  case TargetOpcode::G_CTPOP:
675  if (TypeIdx != 0)
676  return UnableToLegalize; // TODO
677 
678  Observer.changingInstr(MI);
679  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
680  Observer.changedInstr(MI);
681  return Legalized;
682  case TargetOpcode::G_INTTOPTR:
683  if (TypeIdx != 1)
684  return UnableToLegalize;
685 
686  Observer.changingInstr(MI);
687  narrowScalarSrc(MI, NarrowTy, 1);
688  Observer.changedInstr(MI);
689  return Legalized;
690  case TargetOpcode::G_PTRTOINT:
691  if (TypeIdx != 0)
692  return UnableToLegalize;
693 
694  Observer.changingInstr(MI);
695  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
696  Observer.changedInstr(MI);
697  return Legalized;
698  }
699 }
700 
701 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
702  unsigned OpIdx, unsigned ExtOpcode) {
703  MachineOperand &MO = MI.getOperand(OpIdx);
704  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
705  MO.setReg(ExtB->getOperand(0).getReg());
706 }
707 
708 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
709  unsigned OpIdx) {
710  MachineOperand &MO = MI.getOperand(OpIdx);
711  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
712  {MO.getReg()});
713  MO.setReg(ExtB->getOperand(0).getReg());
714 }
715 
716 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
717  unsigned OpIdx, unsigned TruncOpcode) {
718  MachineOperand &MO = MI.getOperand(OpIdx);
719  unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
721  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
722  MO.setReg(DstExt);
723 }
724 
725 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
726  unsigned OpIdx, unsigned ExtOpcode) {
727  MachineOperand &MO = MI.getOperand(OpIdx);
728  unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
730  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
731  MO.setReg(DstTrunc);
732 }
733 
734 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
735  unsigned OpIdx) {
736  MachineOperand &MO = MI.getOperand(OpIdx);
737  unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
739  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
740  MO.setReg(DstExt);
741 }
742 
743 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
744  unsigned OpIdx) {
745  MachineOperand &MO = MI.getOperand(OpIdx);
746 
747  LLT OldTy = MRI.getType(MO.getReg());
748  unsigned OldElts = OldTy.getNumElements();
749  unsigned NewElts = MoreTy.getNumElements();
750 
751  unsigned NumParts = NewElts / OldElts;
752 
753  // Use concat_vectors if the result is a multiple of the number of elements.
754  if (NumParts * OldElts == NewElts) {
756  Parts.push_back(MO.getReg());
757 
758  unsigned ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
759  for (unsigned I = 1; I != NumParts; ++I)
760  Parts.push_back(ImpDef);
761 
762  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
763  MO.setReg(Concat.getReg(0));
764  return;
765  }
766 
767  unsigned MoreReg = MRI.createGenericVirtualRegister(MoreTy);
768  unsigned ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
769  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
770  MO.setReg(MoreReg);
771 }
772 
774 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
775  LLT WideTy) {
776  if (TypeIdx != 1)
777  return UnableToLegalize;
778 
779  unsigned DstReg = MI.getOperand(0).getReg();
780  LLT DstTy = MRI.getType(DstReg);
781  if (!DstTy.isScalar())
782  return UnableToLegalize;
783 
784  unsigned NumOps = MI.getNumOperands();
785  unsigned NumSrc = MI.getNumOperands() - 1;
786  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
787 
788  unsigned Src1 = MI.getOperand(1).getReg();
789  unsigned ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg();
790 
791  for (unsigned I = 2; I != NumOps; ++I) {
792  const unsigned Offset = (I - 1) * PartSize;
793 
794  unsigned SrcReg = MI.getOperand(I).getReg();
795  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
796 
797  auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg);
798 
799  unsigned NextResult = I + 1 == NumOps ? DstReg :
800  MRI.createGenericVirtualRegister(DstTy);
801 
802  auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset);
803  auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt);
804  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
805  ResultReg = NextResult;
806  }
807 
808  MI.eraseFromParent();
809  return Legalized;
810 }
811 
813 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
814  LLT WideTy) {
815  if (TypeIdx != 0)
816  return UnableToLegalize;
817 
818  unsigned NumDst = MI.getNumOperands() - 1;
819  unsigned SrcReg = MI.getOperand(NumDst).getReg();
820  LLT SrcTy = MRI.getType(SrcReg);
821  if (!SrcTy.isScalar())
822  return UnableToLegalize;
823 
824  unsigned Dst0Reg = MI.getOperand(0).getReg();
825  LLT DstTy = MRI.getType(Dst0Reg);
826  if (!DstTy.isScalar())
827  return UnableToLegalize;
828 
829  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
830  LLT NewSrcTy = LLT::scalar(NewSrcSize);
831  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
832 
833  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
834 
835  for (unsigned I = 1; I != NumDst; ++I) {
836  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
837  auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
838  WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
839  }
840 
841  Observer.changingInstr(MI);
842 
843  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
844  for (unsigned I = 0; I != NumDst; ++I)
845  widenScalarDst(MI, WideTy, I);
846 
847  Observer.changedInstr(MI);
848 
849  return Legalized;
850 }
851 
853 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
854  LLT WideTy) {
855  unsigned DstReg = MI.getOperand(0).getReg();
856  unsigned SrcReg = MI.getOperand(1).getReg();
857  LLT SrcTy = MRI.getType(SrcReg);
858 
859  LLT DstTy = MRI.getType(DstReg);
860  unsigned Offset = MI.getOperand(2).getImm();
861 
862  if (TypeIdx == 0) {
863  if (SrcTy.isVector() || DstTy.isVector())
864  return UnableToLegalize;
865 
866  SrcOp Src(SrcReg);
867  if (SrcTy.isPointer()) {
868  // Extracts from pointers can be handled only if they are really just
869  // simple integers.
870  const DataLayout &DL = MIRBuilder.getDataLayout();
872  return UnableToLegalize;
873 
874  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
875  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
876  SrcTy = SrcAsIntTy;
877  }
878 
879  if (DstTy.isPointer())
880  return UnableToLegalize;
881 
882  if (Offset == 0) {
883  // Avoid a shift in the degenerate case.
884  MIRBuilder.buildTrunc(DstReg,
885  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
886  MI.eraseFromParent();
887  return Legalized;
888  }
889 
890  // Do a shift in the source type.
891  LLT ShiftTy = SrcTy;
892  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
893  Src = MIRBuilder.buildAnyExt(WideTy, Src);
894  ShiftTy = WideTy;
895  } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
896  return UnableToLegalize;
897 
898  auto LShr = MIRBuilder.buildLShr(
899  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
900  MIRBuilder.buildTrunc(DstReg, LShr);
901  MI.eraseFromParent();
902  return Legalized;
903  }
904 
905  if (!SrcTy.isVector())
906  return UnableToLegalize;
907 
908  if (DstTy != SrcTy.getElementType())
909  return UnableToLegalize;
910 
911  if (Offset % SrcTy.getScalarSizeInBits() != 0)
912  return UnableToLegalize;
913 
914  Observer.changingInstr(MI);
915  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
916 
917  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
918  Offset);
919  widenScalarDst(MI, WideTy.getScalarType(), 0);
920  Observer.changedInstr(MI);
921  return Legalized;
922 }
923 
925 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
926  LLT WideTy) {
927  if (TypeIdx != 0)
928  return UnableToLegalize;
929  Observer.changingInstr(MI);
930  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
931  widenScalarDst(MI, WideTy);
932  Observer.changedInstr(MI);
933  return Legalized;
934 }
935 
937 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
938  MIRBuilder.setInstr(MI);
939 
940  switch (MI.getOpcode()) {
941  default:
942  return UnableToLegalize;
943  case TargetOpcode::G_EXTRACT:
944  return widenScalarExtract(MI, TypeIdx, WideTy);
945  case TargetOpcode::G_INSERT:
946  return widenScalarInsert(MI, TypeIdx, WideTy);
947  case TargetOpcode::G_MERGE_VALUES:
948  return widenScalarMergeValues(MI, TypeIdx, WideTy);
949  case TargetOpcode::G_UNMERGE_VALUES:
950  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
951  case TargetOpcode::G_UADDO:
952  case TargetOpcode::G_USUBO: {
953  if (TypeIdx == 1)
954  return UnableToLegalize; // TODO
955  auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
956  {MI.getOperand(2).getReg()});
957  auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
958  {MI.getOperand(3).getReg()});
959  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
960  ? TargetOpcode::G_ADD
961  : TargetOpcode::G_SUB;
962  // Do the arithmetic in the larger type.
963  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
964  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
966  auto AndOp = MIRBuilder.buildInstr(
967  TargetOpcode::G_AND, {WideTy},
968  {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
969  // There is no overflow if the AndOp is the same as NewOp.
971  AndOp);
972  // Now trunc the NewOp to the original result.
973  MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
974  MI.eraseFromParent();
975  return Legalized;
976  }
977  case TargetOpcode::G_CTTZ:
978  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
979  case TargetOpcode::G_CTLZ:
980  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
981  case TargetOpcode::G_CTPOP: {
982  if (TypeIdx == 0) {
983  Observer.changingInstr(MI);
984  widenScalarDst(MI, WideTy, 0);
985  Observer.changedInstr(MI);
986  return Legalized;
987  }
988 
989  unsigned SrcReg = MI.getOperand(1).getReg();
990 
991  // First ZEXT the input.
992  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
993  LLT CurTy = MRI.getType(SrcReg);
994  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
995  // The count is the same in the larger type except if the original
996  // value was zero. This can be handled by setting the bit just off
997  // the top of the original type.
998  auto TopBit =
1000  MIBSrc = MIRBuilder.buildOr(
1001  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1002  }
1003 
1004  // Perform the operation at the larger size.
1005  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1006  // This is already the correct result for CTPOP and CTTZs
1007  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1008  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1009  // The correct result is NewOp - (Difference in widety and current ty).
1010  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1011  MIBNewOp = MIRBuilder.buildInstr(
1012  TargetOpcode::G_SUB, {WideTy},
1013  {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1014  }
1015 
1016  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1017  MI.eraseFromParent();
1018  return Legalized;
1019  }
1020  case TargetOpcode::G_BSWAP: {
1021  Observer.changingInstr(MI);
1022  unsigned DstReg = MI.getOperand(0).getReg();
1023 
1024  unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy);
1025  unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
1026  unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1027  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1028 
1029  MI.getOperand(0).setReg(DstExt);
1030 
1032 
1033  LLT Ty = MRI.getType(DstReg);
1034  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1035  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1036  MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1037  .addDef(ShrReg)
1038  .addUse(DstExt)
1039  .addUse(ShiftAmtReg);
1040 
1041  MIRBuilder.buildTrunc(DstReg, ShrReg);
1042  Observer.changedInstr(MI);
1043  return Legalized;
1044  }
1045  case TargetOpcode::G_ADD:
1046  case TargetOpcode::G_AND:
1047  case TargetOpcode::G_MUL:
1048  case TargetOpcode::G_OR:
1049  case TargetOpcode::G_XOR:
1050  case TargetOpcode::G_SUB:
1051  // Perform operation at larger width (any extension is fines here, high bits
1052  // don't affect the result) and then truncate the result back to the
1053  // original type.
1054  Observer.changingInstr(MI);
1055  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1056  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1057  widenScalarDst(MI, WideTy);
1058  Observer.changedInstr(MI);
1059  return Legalized;
1060 
1061  case TargetOpcode::G_SHL:
1062  Observer.changingInstr(MI);
1063 
1064  if (TypeIdx == 0) {
1065  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1066  widenScalarDst(MI, WideTy);
1067  } else {
1068  assert(TypeIdx == 1);
1069  // The "number of bits to shift" operand must preserve its value as an
1070  // unsigned integer:
1071  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1072  }
1073 
1074  Observer.changedInstr(MI);
1075  return Legalized;
1076 
1077  case TargetOpcode::G_SDIV:
1078  case TargetOpcode::G_SREM:
1079  Observer.changingInstr(MI);
1080  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1081  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1082  widenScalarDst(MI, WideTy);
1083  Observer.changedInstr(MI);
1084  return Legalized;
1085 
1086  case TargetOpcode::G_ASHR:
1087  case TargetOpcode::G_LSHR:
1088  Observer.changingInstr(MI);
1089 
1090  if (TypeIdx == 0) {
1091  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1092  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1093 
1094  widenScalarSrc(MI, WideTy, 1, CvtOp);
1095  widenScalarDst(MI, WideTy);
1096  } else {
1097  assert(TypeIdx == 1);
1098  // The "number of bits to shift" operand must preserve its value as an
1099  // unsigned integer:
1100  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1101  }
1102 
1103  Observer.changedInstr(MI);
1104  return Legalized;
1105  case TargetOpcode::G_UDIV:
1106  case TargetOpcode::G_UREM:
1107  Observer.changingInstr(MI);
1108  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1109  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1110  widenScalarDst(MI, WideTy);
1111  Observer.changedInstr(MI);
1112  return Legalized;
1113 
1114  case TargetOpcode::G_SELECT:
1115  Observer.changingInstr(MI);
1116  if (TypeIdx == 0) {
1117  // Perform operation at larger width (any extension is fine here, high
1118  // bits don't affect the result) and then truncate the result back to the
1119  // original type.
1120  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1121  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1122  widenScalarDst(MI, WideTy);
1123  } else {
1124  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1125  // Explicit extension is required here since high bits affect the result.
1126  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1127  }
1128  Observer.changedInstr(MI);
1129  return Legalized;
1130 
1131  case TargetOpcode::G_FPTOSI:
1132  case TargetOpcode::G_FPTOUI:
1133  if (TypeIdx != 0)
1134  return UnableToLegalize;
1135  Observer.changingInstr(MI);
1136  widenScalarDst(MI, WideTy);
1137  Observer.changedInstr(MI);
1138  return Legalized;
1139 
1140  case TargetOpcode::G_SITOFP:
1141  if (TypeIdx != 1)
1142  return UnableToLegalize;
1143  Observer.changingInstr(MI);
1144  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1145  Observer.changedInstr(MI);
1146  return Legalized;
1147 
1148  case TargetOpcode::G_UITOFP:
1149  if (TypeIdx != 1)
1150  return UnableToLegalize;
1151  Observer.changingInstr(MI);
1152  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1153  Observer.changedInstr(MI);
1154  return Legalized;
1155 
1156  case TargetOpcode::G_LOAD:
1157  case TargetOpcode::G_SEXTLOAD:
1158  case TargetOpcode::G_ZEXTLOAD:
1159  Observer.changingInstr(MI);
1160  widenScalarDst(MI, WideTy);
1161  Observer.changedInstr(MI);
1162  return Legalized;
1163 
1164  case TargetOpcode::G_STORE: {
1165  if (TypeIdx != 0)
1166  return UnableToLegalize;
1167 
1168  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1169  if (!isPowerOf2_32(Ty.getSizeInBits()))
1170  return UnableToLegalize;
1171 
1172  Observer.changingInstr(MI);
1173 
1174  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1175  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1176  widenScalarSrc(MI, WideTy, 0, ExtType);
1177 
1178  Observer.changedInstr(MI);
1179  return Legalized;
1180  }
1181  case TargetOpcode::G_CONSTANT: {
1182  MachineOperand &SrcMO = MI.getOperand(1);
1184  const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1185  Observer.changingInstr(MI);
1186  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1187 
1188  widenScalarDst(MI, WideTy);
1189  Observer.changedInstr(MI);
1190  return Legalized;
1191  }
1192  case TargetOpcode::G_FCONSTANT: {
1193  MachineOperand &SrcMO = MI.getOperand(1);
1195  APFloat Val = SrcMO.getFPImm()->getValueAPF();
1196  bool LosesInfo;
1197  switch (WideTy.getSizeInBits()) {
1198  case 32:
1200  &LosesInfo);
1201  break;
1202  case 64:
1204  &LosesInfo);
1205  break;
1206  default:
1207  return UnableToLegalize;
1208  }
1209 
1210  assert(!LosesInfo && "extend should always be lossless");
1211 
1212  Observer.changingInstr(MI);
1213  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1214 
1215  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1216  Observer.changedInstr(MI);
1217  return Legalized;
1218  }
1219  case TargetOpcode::G_IMPLICIT_DEF: {
1220  Observer.changingInstr(MI);
1221  widenScalarDst(MI, WideTy);
1222  Observer.changedInstr(MI);
1223  return Legalized;
1224  }
1225  case TargetOpcode::G_BRCOND:
1226  Observer.changingInstr(MI);
1227  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1228  Observer.changedInstr(MI);
1229  return Legalized;
1230 
1231  case TargetOpcode::G_FCMP:
1232  Observer.changingInstr(MI);
1233  if (TypeIdx == 0)
1234  widenScalarDst(MI, WideTy);
1235  else {
1236  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1237  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1238  }
1239  Observer.changedInstr(MI);
1240  return Legalized;
1241 
1242  case TargetOpcode::G_ICMP:
1243  Observer.changingInstr(MI);
1244  if (TypeIdx == 0)
1245  widenScalarDst(MI, WideTy);
1246  else {
1247  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1248  MI.getOperand(1).getPredicate()))
1249  ? TargetOpcode::G_SEXT
1250  : TargetOpcode::G_ZEXT;
1251  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1252  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1253  }
1254  Observer.changedInstr(MI);
1255  return Legalized;
1256 
1257  case TargetOpcode::G_GEP:
1258  assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1259  Observer.changingInstr(MI);
1260  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1261  Observer.changedInstr(MI);
1262  return Legalized;
1263 
1264  case TargetOpcode::G_PHI: {
1265  assert(TypeIdx == 0 && "Expecting only Idx 0");
1266 
1267  Observer.changingInstr(MI);
1268  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1269  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1270  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1271  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1272  }
1273 
1274  MachineBasicBlock &MBB = *MI.getParent();
1275  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1276  widenScalarDst(MI, WideTy);
1277  Observer.changedInstr(MI);
1278  return Legalized;
1279  }
1280  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1281  if (TypeIdx == 0) {
1282  unsigned VecReg = MI.getOperand(1).getReg();
1283  LLT VecTy = MRI.getType(VecReg);
1284  Observer.changingInstr(MI);
1285 
1286  widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1287  WideTy.getSizeInBits()),
1288  1, TargetOpcode::G_SEXT);
1289 
1290  widenScalarDst(MI, WideTy, 0);
1291  Observer.changedInstr(MI);
1292  return Legalized;
1293  }
1294 
1295  if (TypeIdx != 2)
1296  return UnableToLegalize;
1297  Observer.changingInstr(MI);
1298  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1299  Observer.changedInstr(MI);
1300  return Legalized;
1301  }
1302  case TargetOpcode::G_FADD:
1303  case TargetOpcode::G_FMUL:
1304  case TargetOpcode::G_FSUB:
1305  case TargetOpcode::G_FMA:
1306  case TargetOpcode::G_FNEG:
1307  case TargetOpcode::G_FABS:
1308  case TargetOpcode::G_FCANONICALIZE:
1309  case TargetOpcode::G_FDIV:
1310  case TargetOpcode::G_FREM:
1311  case TargetOpcode::G_FCEIL:
1312  case TargetOpcode::G_FFLOOR:
1313  case TargetOpcode::G_FCOS:
1314  case TargetOpcode::G_FSIN:
1315  case TargetOpcode::G_FLOG10:
1316  case TargetOpcode::G_FLOG:
1317  case TargetOpcode::G_FLOG2:
1318  case TargetOpcode::G_FSQRT:
1319  case TargetOpcode::G_FEXP:
1320  case TargetOpcode::G_FEXP2:
1321  assert(TypeIdx == 0);
1322  Observer.changingInstr(MI);
1323 
1324  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1325  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1326 
1327  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1328  Observer.changedInstr(MI);
1329  return Legalized;
1330  case TargetOpcode::G_INTTOPTR:
1331  if (TypeIdx != 1)
1332  return UnableToLegalize;
1333 
1334  Observer.changingInstr(MI);
1335  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1336  Observer.changedInstr(MI);
1337  return Legalized;
1338  case TargetOpcode::G_PTRTOINT:
1339  if (TypeIdx != 0)
1340  return UnableToLegalize;
1341 
1342  Observer.changingInstr(MI);
1343  widenScalarDst(MI, WideTy, 0);
1344  Observer.changedInstr(MI);
1345  return Legalized;
1346  }
1347 }
1348 
1350 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1351  using namespace TargetOpcode;
1352  MIRBuilder.setInstr(MI);
1353 
1354  switch(MI.getOpcode()) {
1355  default:
1356  return UnableToLegalize;
1357  case TargetOpcode::G_SREM:
1358  case TargetOpcode::G_UREM: {
1359  unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
1360  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1361  .addDef(QuotReg)
1362  .addUse(MI.getOperand(1).getReg())
1363  .addUse(MI.getOperand(2).getReg());
1364 
1365  unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
1366  MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1368  ProdReg);
1369  MI.eraseFromParent();
1370  return Legalized;
1371  }
1372  case TargetOpcode::G_SMULO:
1373  case TargetOpcode::G_UMULO: {
1374  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1375  // result.
1376  unsigned Res = MI.getOperand(0).getReg();
1377  unsigned Overflow = MI.getOperand(1).getReg();
1378  unsigned LHS = MI.getOperand(2).getReg();
1379  unsigned RHS = MI.getOperand(3).getReg();
1380 
1381  MIRBuilder.buildMul(Res, LHS, RHS);
1382 
1383  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1384  ? TargetOpcode::G_SMULH
1385  : TargetOpcode::G_UMULH;
1386 
1387  unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1388  MIRBuilder.buildInstr(Opcode)
1389  .addDef(HiPart)
1390  .addUse(LHS)
1391  .addUse(RHS);
1392 
1393  unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1394  MIRBuilder.buildConstant(Zero, 0);
1395 
1396  // For *signed* multiply, overflow is detected by checking:
1397  // (hi != (lo >> bitwidth-1))
1398  if (Opcode == TargetOpcode::G_SMULH) {
1399  unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1400  unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1401  MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1402  MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1403  .addDef(Shifted)
1404  .addUse(Res)
1405  .addUse(ShiftAmt);
1406  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1407  } else {
1408  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1409  }
1410  MI.eraseFromParent();
1411  return Legalized;
1412  }
1413  case TargetOpcode::G_FNEG: {
1414  // TODO: Handle vector types once we are able to
1415  // represent them.
1416  if (Ty.isVector())
1417  return UnableToLegalize;
1418  unsigned Res = MI.getOperand(0).getReg();
1419  Type *ZeroTy;
1421  switch (Ty.getSizeInBits()) {
1422  case 16:
1423  ZeroTy = Type::getHalfTy(Ctx);
1424  break;
1425  case 32:
1426  ZeroTy = Type::getFloatTy(Ctx);
1427  break;
1428  case 64:
1429  ZeroTy = Type::getDoubleTy(Ctx);
1430  break;
1431  case 128:
1432  ZeroTy = Type::getFP128Ty(Ctx);
1433  break;
1434  default:
1435  llvm_unreachable("unexpected floating-point type");
1436  }
1437  ConstantFP &ZeroForNegation =
1438  *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1439  auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1440  MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
1441  .addDef(Res)
1442  .addUse(Zero->getOperand(0).getReg())
1443  .addUse(MI.getOperand(1).getReg());
1444  MI.eraseFromParent();
1445  return Legalized;
1446  }
1447  case TargetOpcode::G_FSUB: {
1448  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1449  // First, check if G_FNEG is marked as Lower. If so, we may
1450  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1451  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1452  return UnableToLegalize;
1453  unsigned Res = MI.getOperand(0).getReg();
1454  unsigned LHS = MI.getOperand(1).getReg();
1455  unsigned RHS = MI.getOperand(2).getReg();
1456  unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1457  MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1458  MIRBuilder.buildInstr(TargetOpcode::G_FADD)
1459  .addDef(Res)
1460  .addUse(LHS)
1461  .addUse(Neg);
1462  MI.eraseFromParent();
1463  return Legalized;
1464  }
1465  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1466  unsigned OldValRes = MI.getOperand(0).getReg();
1467  unsigned SuccessRes = MI.getOperand(1).getReg();
1468  unsigned Addr = MI.getOperand(2).getReg();
1469  unsigned CmpVal = MI.getOperand(3).getReg();
1470  unsigned NewVal = MI.getOperand(4).getReg();
1471  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1472  **MI.memoperands_begin());
1473  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1474  MI.eraseFromParent();
1475  return Legalized;
1476  }
1477  case TargetOpcode::G_LOAD:
1478  case TargetOpcode::G_SEXTLOAD:
1479  case TargetOpcode::G_ZEXTLOAD: {
1480  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1481  unsigned DstReg = MI.getOperand(0).getReg();
1482  unsigned PtrReg = MI.getOperand(1).getReg();
1483  LLT DstTy = MRI.getType(DstReg);
1484  auto &MMO = **MI.memoperands_begin();
1485 
1486  if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
1487  if (MI.getOpcode() == TargetOpcode::G_LOAD) {
1488  // This load needs splitting into power of 2 sized loads.
1489  if (DstTy.isVector())
1490  return UnableToLegalize;
1491  if (isPowerOf2_32(DstTy.getSizeInBits()))
1492  return UnableToLegalize; // Don't know what we're being asked to do.
1493 
1494  // Our strategy here is to generate anyextending loads for the smaller
1495  // types up to next power-2 result type, and then combine the two larger
1496  // result values together, before truncating back down to the non-pow-2
1497  // type.
1498  // E.g. v1 = i24 load =>
1499  // v2 = i32 load (2 byte)
1500  // v3 = i32 load (1 byte)
1501  // v4 = i32 shl v2, 16
1502  // v5 = i32 or v4, v3
1503  // v1 = i24 trunc v5
1504  // By doing this we generate the correct truncate which should get
1505  // combined away as an artifact with a matching extend.
1506  uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
1507  uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
1508 
1510  MachineMemOperand *LargeMMO =
1511  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
1512  MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
1513  &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
1514 
1515  LLT PtrTy = MRI.getType(PtrReg);
1516  unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
1517  LLT AnyExtTy = LLT::scalar(AnyExtSize);
1518  unsigned LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
1519  unsigned SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
1520  auto LargeLoad =
1521  MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
1522 
1523  auto OffsetCst =
1524  MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
1525  unsigned GEPReg = MRI.createGenericVirtualRegister(PtrTy);
1526  auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
1527  auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
1528  *SmallMMO);
1529 
1530  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
1531  auto Shift = MIRBuilder.buildShl(AnyExtTy, LargeLoad, ShiftAmt);
1532  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, SmallLoad);
1533  MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
1534  MI.eraseFromParent();
1535  return Legalized;
1536  }
1537  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1538  MI.eraseFromParent();
1539  return Legalized;
1540  }
1541 
1542  if (DstTy.isScalar()) {
1543  unsigned TmpReg =
1544  MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
1545  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1546  switch (MI.getOpcode()) {
1547  default:
1548  llvm_unreachable("Unexpected opcode");
1549  case TargetOpcode::G_LOAD:
1550  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1551  break;
1552  case TargetOpcode::G_SEXTLOAD:
1553  MIRBuilder.buildSExt(DstReg, TmpReg);
1554  break;
1555  case TargetOpcode::G_ZEXTLOAD:
1556  MIRBuilder.buildZExt(DstReg, TmpReg);
1557  break;
1558  }
1559  MI.eraseFromParent();
1560  return Legalized;
1561  }
1562 
1563  return UnableToLegalize;
1564  }
1565  case TargetOpcode::G_STORE: {
1566  // Lower a non-power of 2 store into multiple pow-2 stores.
1567  // E.g. split an i24 store into an i16 store + i8 store.
1568  // We do this by first extending the stored value to the next largest power
1569  // of 2 type, and then using truncating stores to store the components.
1570  // By doing this, likewise with G_LOAD, generate an extend that can be
1571  // artifact-combined away instead of leaving behind extracts.
1572  unsigned SrcReg = MI.getOperand(0).getReg();
1573  unsigned PtrReg = MI.getOperand(1).getReg();
1574  LLT SrcTy = MRI.getType(SrcReg);
1575  MachineMemOperand &MMO = **MI.memoperands_begin();
1576  if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
1577  return UnableToLegalize;
1578  if (SrcTy.isVector())
1579  return UnableToLegalize;
1580  if (isPowerOf2_32(SrcTy.getSizeInBits()))
1581  return UnableToLegalize; // Don't know what we're being asked to do.
1582 
1583  // Extend to the next pow-2.
1584  const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
1585  auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
1586 
1587  // Obtain the smaller value by shifting away the larger value.
1588  uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
1589  uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
1590  auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
1591  auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
1592 
1593  // Generate the GEP and truncating stores.
1594  LLT PtrTy = MRI.getType(PtrReg);
1595  auto OffsetCst =
1596  MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
1597  unsigned GEPReg = MRI.createGenericVirtualRegister(PtrTy);
1598  auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
1599 
1601  MachineMemOperand *LargeMMO =
1602  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
1603  MachineMemOperand *SmallMMO =
1604  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
1605  MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
1606  MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
1607  MI.eraseFromParent();
1608  return Legalized;
1609  }
1610  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1611  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1612  case TargetOpcode::G_CTLZ:
1613  case TargetOpcode::G_CTTZ:
1614  case TargetOpcode::G_CTPOP:
1615  return lowerBitCount(MI, TypeIdx, Ty);
1616  case G_UADDO: {
1617  unsigned Res = MI.getOperand(0).getReg();
1618  unsigned CarryOut = MI.getOperand(1).getReg();
1619  unsigned LHS = MI.getOperand(2).getReg();
1620  unsigned RHS = MI.getOperand(3).getReg();
1621 
1622  MIRBuilder.buildAdd(Res, LHS, RHS);
1623  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
1624 
1625  MI.eraseFromParent();
1626  return Legalized;
1627  }
1628  case G_UADDE: {
1629  unsigned Res = MI.getOperand(0).getReg();
1630  unsigned CarryOut = MI.getOperand(1).getReg();
1631  unsigned LHS = MI.getOperand(2).getReg();
1632  unsigned RHS = MI.getOperand(3).getReg();
1633  unsigned CarryIn = MI.getOperand(4).getReg();
1634 
1635  unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1636  unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1637 
1638  MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1639  MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1640  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1641  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1642 
1643  MI.eraseFromParent();
1644  return Legalized;
1645  }
1646  case G_USUBO: {
1647  unsigned Res = MI.getOperand(0).getReg();
1648  unsigned BorrowOut = MI.getOperand(1).getReg();
1649  unsigned LHS = MI.getOperand(2).getReg();
1650  unsigned RHS = MI.getOperand(3).getReg();
1651 
1652  MIRBuilder.buildSub(Res, LHS, RHS);
1653  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1654 
1655  MI.eraseFromParent();
1656  return Legalized;
1657  }
1658  case G_USUBE: {
1659  unsigned Res = MI.getOperand(0).getReg();
1660  unsigned BorrowOut = MI.getOperand(1).getReg();
1661  unsigned LHS = MI.getOperand(2).getReg();
1662  unsigned RHS = MI.getOperand(3).getReg();
1663  unsigned BorrowIn = MI.getOperand(4).getReg();
1664 
1665  unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1666  unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1667  unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1668  unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1669 
1670  MIRBuilder.buildSub(TmpRes, LHS, RHS);
1671  MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1672  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1673  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1674  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1675  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1676 
1677  MI.eraseFromParent();
1678  return Legalized;
1679  }
1680  }
1681 }
1682 
1683 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
1684  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1685  SmallVector<unsigned, 2> DstRegs;
1686 
1687  unsigned NarrowSize = NarrowTy.getSizeInBits();
1688  unsigned DstReg = MI.getOperand(0).getReg();
1689  unsigned Size = MRI.getType(DstReg).getSizeInBits();
1690  int NumParts = Size / NarrowSize;
1691  // FIXME: Don't know how to handle the situation where the small vectors
1692  // aren't all the same size yet.
1693  if (Size % NarrowSize != 0)
1694  return UnableToLegalize;
1695 
1696  for (int i = 0; i < NumParts; ++i) {
1697  unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1698  MIRBuilder.buildUndef(TmpReg);
1699  DstRegs.push_back(TmpReg);
1700  }
1701 
1702  if (NarrowTy.isVector())
1703  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1704  else
1705  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1706 
1707  MI.eraseFromParent();
1708  return Legalized;
1709 }
1710 
1712 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
1713  LLT NarrowTy) {
1714  const unsigned Opc = MI.getOpcode();
1715  const unsigned NumOps = MI.getNumOperands() - 1;
1716  const unsigned NarrowSize = NarrowTy.getSizeInBits();
1717  const unsigned DstReg = MI.getOperand(0).getReg();
1718  const unsigned Flags = MI.getFlags();
1719  const LLT DstTy = MRI.getType(DstReg);
1720  const unsigned Size = DstTy.getSizeInBits();
1721  const int NumParts = Size / NarrowSize;
1722  const LLT EltTy = DstTy.getElementType();
1723  const unsigned EltSize = EltTy.getSizeInBits();
1724  const unsigned BitsForNumParts = NarrowSize * NumParts;
1725 
1726  // Check if we have any leftovers. If we do, then only handle the case where
1727  // the leftover is one element.
1728  if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
1729  return UnableToLegalize;
1730 
1731  if (BitsForNumParts != Size) {
1732  unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
1733  MIRBuilder.buildUndef(AccumDstReg);
1734 
1735  // Handle the pieces which evenly divide into the requested type with
1736  // extract/op/insert sequence.
1737  for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
1738  SmallVector<SrcOp, 4> SrcOps;
1739  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1740  unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
1741  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
1742  SrcOps.push_back(PartOpReg);
1743  }
1744 
1745  unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
1746  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1747 
1748  unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
1749  MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
1750  AccumDstReg = PartInsertReg;
1751  }
1752 
1753  // Handle the remaining element sized leftover piece.
1754  SmallVector<SrcOp, 4> SrcOps;
1755  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1756  unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy);
1757  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
1758  BitsForNumParts);
1759  SrcOps.push_back(PartOpReg);
1760  }
1761 
1762  unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy);
1763  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1764  MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
1765  MI.eraseFromParent();
1766 
1767  return Legalized;
1768  }
1769 
1770  SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1771 
1772  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1773 
1774  if (NumOps >= 2)
1775  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1776 
1777  if (NumOps >= 3)
1778  extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1779 
1780  for (int i = 0; i < NumParts; ++i) {
1781  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1782 
1783  if (NumOps == 1)
1784  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1785  else if (NumOps == 2) {
1786  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1787  } else if (NumOps == 3) {
1788  MIRBuilder.buildInstr(Opc, {DstReg},
1789  {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1790  }
1791 
1792  DstRegs.push_back(DstReg);
1793  }
1794 
1795  if (NarrowTy.isVector())
1796  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1797  else
1798  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1799 
1800  MI.eraseFromParent();
1801  return Legalized;
1802 }
1803 
1804 // Handle splitting vector operations which need to have the same number of
1805 // elements in each type index, but each type index may have a different element
1806 // type.
1807 //
1808 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
1809 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1810 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1811 //
1812 // Also handles some irregular breakdown cases, e.g.
1813 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
1814 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1815 // s64 = G_SHL s64, s32
1817 LegalizerHelper::fewerElementsVectorMultiEltType(
1818  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
1819  if (TypeIdx != 0)
1820  return UnableToLegalize;
1821 
1822  const LLT NarrowTy0 = NarrowTyArg;
1823  const unsigned NewNumElts =
1824  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
1825 
1826  const unsigned DstReg = MI.getOperand(0).getReg();
1827  LLT DstTy = MRI.getType(DstReg);
1828  LLT LeftoverTy0;
1829 
1830  int NumParts, NumLeftover;
1831  // All of the operands need to have the same number of elements, so if we can
1832  // determine a type breakdown for the result type, we can for all of the
1833  // source types.
1834  std::tie(NumParts, NumLeftover)
1835  = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0);
1836  if (NumParts < 0)
1837  return UnableToLegalize;
1838 
1840 
1841  SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs;
1842  SmallVector<unsigned, 4> PartRegs, LeftoverRegs;
1843 
1844  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1845  LLT LeftoverTy;
1846  unsigned SrcReg = MI.getOperand(I).getReg();
1847  LLT SrcTyI = MRI.getType(SrcReg);
1848  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
1849  LLT LeftoverTyI;
1850 
1851  // Split this operand into the requested typed registers, and any leftover
1852  // required to reproduce the original type.
1853  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
1854  LeftoverRegs))
1855  return UnableToLegalize;
1856 
1857  if (I == 1) {
1858  // For the first operand, create an instruction for each part and setup
1859  // the result.
1860  for (unsigned PartReg : PartRegs) {
1861  unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1863  .addDef(PartDstReg)
1864  .addUse(PartReg));
1865  DstRegs.push_back(PartDstReg);
1866  }
1867 
1868  for (unsigned LeftoverReg : LeftoverRegs) {
1869  unsigned PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
1871  .addDef(PartDstReg)
1872  .addUse(LeftoverReg));
1873  LeftoverDstRegs.push_back(PartDstReg);
1874  }
1875  } else {
1876  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
1877 
1878  // Add the newly created operand splits to the existing instructions. The
1879  // odd-sized pieces are ordered after the requested NarrowTyArg sized
1880  // pieces.
1881  unsigned InstCount = 0;
1882  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
1883  NewInsts[InstCount++].addUse(PartRegs[J]);
1884  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
1885  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
1886  }
1887 
1888  PartRegs.clear();
1889  LeftoverRegs.clear();
1890  }
1891 
1892  // Insert the newly built operations and rebuild the result register.
1893  for (auto &MIB : NewInsts)
1894  MIRBuilder.insertInstr(MIB);
1895 
1896  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
1897 
1898  MI.eraseFromParent();
1899  return Legalized;
1900 }
1901 
1903 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
1904  LLT NarrowTy) {
1905  if (TypeIdx != 0)
1906  return UnableToLegalize;
1907 
1908  unsigned DstReg = MI.getOperand(0).getReg();
1909  unsigned SrcReg = MI.getOperand(1).getReg();
1910  LLT DstTy = MRI.getType(DstReg);
1911  LLT SrcTy = MRI.getType(SrcReg);
1912 
1913  LLT NarrowTy0 = NarrowTy;
1914  LLT NarrowTy1;
1915  unsigned NumParts;
1916 
1917  if (NarrowTy.isVector()) {
1918  // Uneven breakdown not handled.
1919  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
1920  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
1921  return UnableToLegalize;
1922 
1923  NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
1924  } else {
1925  NumParts = DstTy.getNumElements();
1926  NarrowTy1 = SrcTy.getElementType();
1927  }
1928 
1929  SmallVector<unsigned, 4> SrcRegs, DstRegs;
1930  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
1931 
1932  for (unsigned I = 0; I < NumParts; ++I) {
1933  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1934  MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
1935  .addDef(DstReg)
1936  .addUse(SrcRegs[I]);
1937 
1938  NewInst->setFlags(MI.getFlags());
1939  DstRegs.push_back(DstReg);
1940  }
1941 
1942  if (NarrowTy.isVector())
1943  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1944  else
1945  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1946 
1947  MI.eraseFromParent();
1948  return Legalized;
1949 }
1950 
1952 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
1953  LLT NarrowTy) {
1954  unsigned DstReg = MI.getOperand(0).getReg();
1955  unsigned Src0Reg = MI.getOperand(2).getReg();
1956  LLT DstTy = MRI.getType(DstReg);
1957  LLT SrcTy = MRI.getType(Src0Reg);
1958 
1959  unsigned NumParts;
1960  LLT NarrowTy0, NarrowTy1;
1961 
1962  if (TypeIdx == 0) {
1963  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1964  unsigned OldElts = DstTy.getNumElements();
1965 
1966  NarrowTy0 = NarrowTy;
1967  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
1968  NarrowTy1 = NarrowTy.isVector() ?
1969  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
1970  SrcTy.getElementType();
1971 
1972  } else {
1973  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1974  unsigned OldElts = SrcTy.getNumElements();
1975 
1976  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
1977  NarrowTy.getNumElements();
1978  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
1979  DstTy.getScalarSizeInBits());
1980  NarrowTy1 = NarrowTy;
1981  }
1982 
1983  // FIXME: Don't know how to handle the situation where the small vectors
1984  // aren't all the same size yet.
1985  if (NarrowTy1.isVector() &&
1986  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
1987  return UnableToLegalize;
1988 
1989  CmpInst::Predicate Pred
1990  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1991 
1992  SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1993  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
1994  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
1995 
1996  for (unsigned I = 0; I < NumParts; ++I) {
1997  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1998  DstRegs.push_back(DstReg);
1999 
2000  if (MI.getOpcode() == TargetOpcode::G_ICMP)
2001  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2002  else {
2003  MachineInstr *NewCmp
2004  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2005  NewCmp->setFlags(MI.getFlags());
2006  }
2007  }
2008 
2009  if (NarrowTy1.isVector())
2010  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2011  else
2012  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2013 
2014  MI.eraseFromParent();
2015  return Legalized;
2016 }
2017 
2019 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2020  LLT NarrowTy) {
2021  unsigned DstReg = MI.getOperand(0).getReg();
2022  unsigned CondReg = MI.getOperand(1).getReg();
2023 
2024  unsigned NumParts = 0;
2025  LLT NarrowTy0, NarrowTy1;
2026 
2027  LLT DstTy = MRI.getType(DstReg);
2028  LLT CondTy = MRI.getType(CondReg);
2029  unsigned Size = DstTy.getSizeInBits();
2030 
2031  assert(TypeIdx == 0 || CondTy.isVector());
2032 
2033  if (TypeIdx == 0) {
2034  NarrowTy0 = NarrowTy;
2035  NarrowTy1 = CondTy;
2036 
2037  unsigned NarrowSize = NarrowTy0.getSizeInBits();
2038  // FIXME: Don't know how to handle the situation where the small vectors
2039  // aren't all the same size yet.
2040  if (Size % NarrowSize != 0)
2041  return UnableToLegalize;
2042 
2043  NumParts = Size / NarrowSize;
2044 
2045  // Need to break down the condition type
2046  if (CondTy.isVector()) {
2047  if (CondTy.getNumElements() == NumParts)
2048  NarrowTy1 = CondTy.getElementType();
2049  else
2050  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2051  CondTy.getScalarSizeInBits());
2052  }
2053  } else {
2054  NumParts = CondTy.getNumElements();
2055  if (NarrowTy.isVector()) {
2056  // TODO: Handle uneven breakdown.
2057  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2058  return UnableToLegalize;
2059 
2060  return UnableToLegalize;
2061  } else {
2062  NarrowTy0 = DstTy.getElementType();
2063  NarrowTy1 = NarrowTy;
2064  }
2065  }
2066 
2067  SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2068  if (CondTy.isVector())
2069  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2070 
2071  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2072  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2073 
2074  for (unsigned i = 0; i < NumParts; ++i) {
2075  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2076  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2077  Src1Regs[i], Src2Regs[i]);
2078  DstRegs.push_back(DstReg);
2079  }
2080 
2081  if (NarrowTy0.isVector())
2082  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2083  else
2084  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2085 
2086  MI.eraseFromParent();
2087  return Legalized;
2088 }
2089 
2091 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2092  LLT NarrowTy) {
2093  const unsigned DstReg = MI.getOperand(0).getReg();
2094  LLT PhiTy = MRI.getType(DstReg);
2095  LLT LeftoverTy;
2096 
2097  // All of the operands need to have the same number of elements, so if we can
2098  // determine a type breakdown for the result type, we can for all of the
2099  // source types.
2100  int NumParts, NumLeftover;
2101  std::tie(NumParts, NumLeftover)
2102  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2103  if (NumParts < 0)
2104  return UnableToLegalize;
2105 
2106  SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs;
2108 
2109  const int TotalNumParts = NumParts + NumLeftover;
2110 
2111  // Insert the new phis in the result block first.
2112  for (int I = 0; I != TotalNumParts; ++I) {
2113  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2114  unsigned PartDstReg = MRI.createGenericVirtualRegister(Ty);
2115  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2116  .addDef(PartDstReg));
2117  if (I < NumParts)
2118  DstRegs.push_back(PartDstReg);
2119  else
2120  LeftoverDstRegs.push_back(PartDstReg);
2121  }
2122 
2123  MachineBasicBlock *MBB = MI.getParent();
2124  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2125  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2126 
2127  SmallVector<unsigned, 4> PartRegs, LeftoverRegs;
2128 
2129  // Insert code to extract the incoming values in each predecessor block.
2130  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2131  PartRegs.clear();
2132  LeftoverRegs.clear();
2133 
2134  unsigned SrcReg = MI.getOperand(I).getReg();
2135  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2136  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2137 
2138  LLT Unused;
2139  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2140  LeftoverRegs))
2141  return UnableToLegalize;
2142 
2143  // Add the newly created operand splits to the existing instructions. The
2144  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2145  // pieces.
2146  for (int J = 0; J != TotalNumParts; ++J) {
2147  MachineInstrBuilder MIB = NewInsts[J];
2148  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2149  MIB.addMBB(&OpMBB);
2150  }
2151  }
2152 
2153  MI.eraseFromParent();
2154  return Legalized;
2155 }
2156 
2158 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2159  LLT NarrowTy) {
2160  // FIXME: Don't know how to handle secondary types yet.
2161  if (TypeIdx != 0)
2162  return UnableToLegalize;
2163 
2164  MachineMemOperand *MMO = *MI.memoperands_begin();
2165 
2166  // This implementation doesn't work for atomics. Give up instead of doing
2167  // something invalid.
2168  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2170  return UnableToLegalize;
2171 
2172  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2173  unsigned ValReg = MI.getOperand(0).getReg();
2174  unsigned AddrReg = MI.getOperand(1).getReg();
2175  LLT ValTy = MRI.getType(ValReg);
2176 
2177  int NumParts = -1;
2178  int NumLeftover = -1;
2179  LLT LeftoverTy;
2180  SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs;
2181  if (IsLoad) {
2182  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2183  } else {
2184  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2185  NarrowLeftoverRegs)) {
2186  NumParts = NarrowRegs.size();
2187  NumLeftover = NarrowLeftoverRegs.size();
2188  }
2189  }
2190 
2191  if (NumParts == -1)
2192  return UnableToLegalize;
2193 
2194  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2195 
2196  unsigned TotalSize = ValTy.getSizeInBits();
2197 
2198  // Split the load/store into PartTy sized pieces starting at Offset. If this
2199  // is a load, return the new registers in ValRegs. For a store, each elements
2200  // of ValRegs should be PartTy. Returns the next offset that needs to be
2201  // handled.
2202  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs,
2203  unsigned Offset) -> unsigned {
2205  unsigned PartSize = PartTy.getSizeInBits();
2206  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2207  Offset += PartSize, ++Idx) {
2208  unsigned ByteSize = PartSize / 8;
2209  unsigned ByteOffset = Offset / 8;
2210  unsigned NewAddrReg = 0;
2211 
2212  MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2213 
2214  MachineMemOperand *NewMMO =
2215  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2216 
2217  if (IsLoad) {
2218  unsigned Dst = MRI.createGenericVirtualRegister(PartTy);
2219  ValRegs.push_back(Dst);
2220  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2221  } else {
2222  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2223  }
2224  }
2225 
2226  return Offset;
2227  };
2228 
2229  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2230 
2231  // Handle the rest of the register if this isn't an even type breakdown.
2232  if (LeftoverTy.isValid())
2233  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2234 
2235  if (IsLoad) {
2236  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2237  LeftoverTy, NarrowLeftoverRegs);
2238  }
2239 
2240  MI.eraseFromParent();
2241  return Legalized;
2242 }
2243 
2246  LLT NarrowTy) {
2247  using namespace TargetOpcode;
2248 
2249  MIRBuilder.setInstr(MI);
2250  switch (MI.getOpcode()) {
2251  case G_IMPLICIT_DEF:
2252  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2253  case G_AND:
2254  case G_OR:
2255  case G_XOR:
2256  case G_ADD:
2257  case G_SUB:
2258  case G_MUL:
2259  case G_SMULH:
2260  case G_UMULH:
2261  case G_FADD:
2262  case G_FMUL:
2263  case G_FSUB:
2264  case G_FNEG:
2265  case G_FABS:
2266  case G_FCANONICALIZE:
2267  case G_FDIV:
2268  case G_FREM:
2269  case G_FMA:
2270  case G_FPOW:
2271  case G_FEXP:
2272  case G_FEXP2:
2273  case G_FLOG:
2274  case G_FLOG2:
2275  case G_FLOG10:
2276  case G_FCEIL:
2277  case G_FFLOOR:
2278  case G_INTRINSIC_ROUND:
2279  case G_INTRINSIC_TRUNC:
2280  case G_FCOS:
2281  case G_FSIN:
2282  case G_FSQRT:
2283  case G_BSWAP:
2284  case G_SDIV:
2285  return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2286  case G_SHL:
2287  case G_LSHR:
2288  case G_ASHR:
2289  case G_CTLZ:
2290  case G_CTLZ_ZERO_UNDEF:
2291  case G_CTTZ:
2292  case G_CTTZ_ZERO_UNDEF:
2293  case G_CTPOP:
2294  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2295  case G_ZEXT:
2296  case G_SEXT:
2297  case G_ANYEXT:
2298  case G_FPEXT:
2299  case G_FPTRUNC:
2300  case G_SITOFP:
2301  case G_UITOFP:
2302  case G_FPTOSI:
2303  case G_FPTOUI:
2304  case G_INTTOPTR:
2305  case G_PTRTOINT:
2306  case G_ADDRSPACE_CAST:
2307  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2308  case G_ICMP:
2309  case G_FCMP:
2310  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2311  case G_SELECT:
2312  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2313  case G_PHI:
2314  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
2315  case G_LOAD:
2316  case G_STORE:
2317  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2318  default:
2319  return UnableToLegalize;
2320  }
2321 }
2322 
2324 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
2325  const LLT HalfTy, const LLT AmtTy) {
2326 
2327  unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
2328  unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
2329  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2330 
2331  if (Amt.isNullValue()) {
2332  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2333  MI.eraseFromParent();
2334  return Legalized;
2335  }
2336 
2337  LLT NVT = HalfTy;
2338  unsigned NVTBits = HalfTy.getSizeInBits();
2339  unsigned VTBits = 2 * NVTBits;
2340 
2341  SrcOp Lo(0), Hi(0);
2342  if (MI.getOpcode() == TargetOpcode::G_SHL) {
2343  if (Amt.ugt(VTBits)) {
2344  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2345  } else if (Amt.ugt(NVTBits)) {
2346  Lo = MIRBuilder.buildConstant(NVT, 0);
2347  Hi = MIRBuilder.buildShl(NVT, InL,
2348  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2349  } else if (Amt == NVTBits) {
2350  Lo = MIRBuilder.buildConstant(NVT, 0);
2351  Hi = InL;
2352  } else {
2353  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2354  auto OrLHS =
2355  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2356  auto OrRHS = MIRBuilder.buildLShr(
2357  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2358  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2359  }
2360  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2361  if (Amt.ugt(VTBits)) {
2362  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2363  } else if (Amt.ugt(NVTBits)) {
2364  Lo = MIRBuilder.buildLShr(NVT, InH,
2365  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2366  Hi = MIRBuilder.buildConstant(NVT, 0);
2367  } else if (Amt == NVTBits) {
2368  Lo = InH;
2369  Hi = MIRBuilder.buildConstant(NVT, 0);
2370  } else {
2371  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2372 
2373  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2374  auto OrRHS = MIRBuilder.buildShl(
2375  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2376 
2377  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2378  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2379  }
2380  } else {
2381  if (Amt.ugt(VTBits)) {
2382  Hi = Lo = MIRBuilder.buildAShr(
2383  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2384  } else if (Amt.ugt(NVTBits)) {
2385  Lo = MIRBuilder.buildAShr(NVT, InH,
2386  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2387  Hi = MIRBuilder.buildAShr(NVT, InH,
2388  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2389  } else if (Amt == NVTBits) {
2390  Lo = InH;
2391  Hi = MIRBuilder.buildAShr(NVT, InH,
2392  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2393  } else {
2394  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2395 
2396  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2397  auto OrRHS = MIRBuilder.buildShl(
2398  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2399 
2400  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2401  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2402  }
2403  }
2404 
2405  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2406  MI.eraseFromParent();
2407 
2408  return Legalized;
2409 }
2410 
2411 // TODO: Optimize if constant shift amount.
2413 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
2414  LLT RequestedTy) {
2415  if (TypeIdx == 1) {
2416  Observer.changingInstr(MI);
2417  narrowScalarSrc(MI, RequestedTy, 2);
2418  Observer.changedInstr(MI);
2419  return Legalized;
2420  }
2421 
2422  unsigned DstReg = MI.getOperand(0).getReg();
2423  LLT DstTy = MRI.getType(DstReg);
2424  if (DstTy.isVector())
2425  return UnableToLegalize;
2426 
2427  unsigned Amt = MI.getOperand(2).getReg();
2428  LLT ShiftAmtTy = MRI.getType(Amt);
2429  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2430  if (DstEltSize % 2 != 0)
2431  return UnableToLegalize;
2432 
2433  // Ignore the input type. We can only go to exactly half the size of the
2434  // input. If that isn't small enough, the resulting pieces will be further
2435  // legalized.
2436  const unsigned NewBitSize = DstEltSize / 2;
2437  const LLT HalfTy = LLT::scalar(NewBitSize);
2438  const LLT CondTy = LLT::scalar(1);
2439 
2440  if (const MachineInstr *KShiftAmt =
2441  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2442  return narrowScalarShiftByConstant(
2443  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2444  }
2445 
2446  // TODO: Expand with known bits.
2447 
2448  // Handle the fully general expansion by an unknown amount.
2449  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2450 
2451  unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
2452  unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
2453  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2454 
2455  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2456  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2457 
2458  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2459  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2460  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2461 
2462  unsigned ResultRegs[2];
2463  switch (MI.getOpcode()) {
2464  case TargetOpcode::G_SHL: {
2465  // Short: ShAmt < NewBitSize
2466  auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2467 
2468  auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2469  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2470  auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2471 
2472  // Long: ShAmt >= NewBitSize
2473  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
2474  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2475 
2476  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2477  auto Hi = MIRBuilder.buildSelect(
2478  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2479 
2480  ResultRegs[0] = Lo.getReg(0);
2481  ResultRegs[1] = Hi.getReg(0);
2482  break;
2483  }
2484  case TargetOpcode::G_LSHR: {
2485  // Short: ShAmt < NewBitSize
2486  auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2487 
2488  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2489  auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2490  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2491 
2492  // Long: ShAmt >= NewBitSize
2493  auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
2494  auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2495 
2496  auto Lo = MIRBuilder.buildSelect(
2497  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2498  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2499 
2500  ResultRegs[0] = Lo.getReg(0);
2501  ResultRegs[1] = Hi.getReg(0);
2502  break;
2503  }
2504  case TargetOpcode::G_ASHR: {
2505  // Short: ShAmt < NewBitSize
2506  auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
2507 
2508  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2509  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
2510  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2511 
2512  // Long: ShAmt >= NewBitSize
2513 
2514  // Sign of Hi part.
2515  auto HiL = MIRBuilder.buildAShr(
2516  HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
2517 
2518  auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2519 
2520  auto Lo = MIRBuilder.buildSelect(
2521  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2522 
2523  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2524 
2525  ResultRegs[0] = Lo.getReg(0);
2526  ResultRegs[1] = Hi.getReg(0);
2527  break;
2528  }
2529  default:
2530  llvm_unreachable("not a shift");
2531  }
2532 
2533  MIRBuilder.buildMerge(DstReg, ResultRegs);
2534  MI.eraseFromParent();
2535  return Legalized;
2536 }
2537 
2539 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2540  LLT MoreTy) {
2541  assert(TypeIdx == 0 && "Expecting only Idx 0");
2542 
2543  Observer.changingInstr(MI);
2544  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2545  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2546  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2547  moreElementsVectorSrc(MI, MoreTy, I);
2548  }
2549 
2550  MachineBasicBlock &MBB = *MI.getParent();
2551  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2552  moreElementsVectorDst(MI, MoreTy, 0);
2553  Observer.changedInstr(MI);
2554  return Legalized;
2555 }
2556 
2559  LLT MoreTy) {
2560  MIRBuilder.setInstr(MI);
2561  unsigned Opc = MI.getOpcode();
2562  switch (Opc) {
2563  case TargetOpcode::G_IMPLICIT_DEF: {
2564  Observer.changingInstr(MI);
2565  moreElementsVectorDst(MI, MoreTy, 0);
2566  Observer.changedInstr(MI);
2567  return Legalized;
2568  }
2569  case TargetOpcode::G_AND:
2570  case TargetOpcode::G_OR:
2571  case TargetOpcode::G_XOR: {
2572  Observer.changingInstr(MI);
2573  moreElementsVectorSrc(MI, MoreTy, 1);
2574  moreElementsVectorSrc(MI, MoreTy, 2);
2575  moreElementsVectorDst(MI, MoreTy, 0);
2576  Observer.changedInstr(MI);
2577  return Legalized;
2578  }
2579  case TargetOpcode::G_EXTRACT:
2580  if (TypeIdx != 1)
2581  return UnableToLegalize;
2582  Observer.changingInstr(MI);
2583  moreElementsVectorSrc(MI, MoreTy, 1);
2584  Observer.changedInstr(MI);
2585  return Legalized;
2586  case TargetOpcode::G_INSERT:
2587  if (TypeIdx != 0)
2588  return UnableToLegalize;
2589  Observer.changingInstr(MI);
2590  moreElementsVectorSrc(MI, MoreTy, 1);
2591  moreElementsVectorDst(MI, MoreTy, 0);
2592  Observer.changedInstr(MI);
2593  return Legalized;
2594  case TargetOpcode::G_SELECT:
2595  if (TypeIdx != 0)
2596  return UnableToLegalize;
2597  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
2598  return UnableToLegalize;
2599 
2600  Observer.changingInstr(MI);
2601  moreElementsVectorSrc(MI, MoreTy, 2);
2602  moreElementsVectorSrc(MI, MoreTy, 3);
2603  moreElementsVectorDst(MI, MoreTy, 0);
2604  Observer.changedInstr(MI);
2605  return Legalized;
2606  case TargetOpcode::G_PHI:
2607  return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
2608  default:
2609  return UnableToLegalize;
2610  }
2611 }
2612 
2613 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<unsigned> &DstRegs,
2614  ArrayRef<unsigned> Src1Regs,
2615  ArrayRef<unsigned> Src2Regs,
2616  LLT NarrowTy) {
2618  unsigned SrcParts = Src1Regs.size();
2619  unsigned DstParts = DstRegs.size();
2620 
2621  unsigned DstIdx = 0; // Low bits of the result.
2622  unsigned FactorSum =
2623  B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
2624  DstRegs[DstIdx] = FactorSum;
2625 
2626  unsigned CarrySumPrevDstIdx;
2627  SmallVector<unsigned, 4> Factors;
2628 
2629  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
2630  // Collect low parts of muls for DstIdx.
2631  for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
2632  i <= std::min(DstIdx, SrcParts - 1); ++i) {
2633  MachineInstrBuilder Mul =
2634  B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
2635  Factors.push_back(Mul.getReg(0));
2636  }
2637  // Collect high parts of muls from previous DstIdx.
2638  for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
2639  i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
2640  MachineInstrBuilder Umulh =
2641  B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
2642  Factors.push_back(Umulh.getReg(0));
2643  }
2644  // Add CarrySum from additons calculated for previous DstIdx.
2645  if (DstIdx != 1) {
2646  Factors.push_back(CarrySumPrevDstIdx);
2647  }
2648 
2649  unsigned CarrySum = 0;
2650  // Add all factors and accumulate all carries into CarrySum.
2651  if (DstIdx != DstParts - 1) {
2652  MachineInstrBuilder Uaddo =
2653  B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
2654  FactorSum = Uaddo.getReg(0);
2655  CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
2656  for (unsigned i = 2; i < Factors.size(); ++i) {
2657  MachineInstrBuilder Uaddo =
2658  B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
2659  FactorSum = Uaddo.getReg(0);
2660  MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
2661  CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
2662  }
2663  } else {
2664  // Since value for the next index is not calculated, neither is CarrySum.
2665  FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
2666  for (unsigned i = 2; i < Factors.size(); ++i)
2667  FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
2668  }
2669 
2670  CarrySumPrevDstIdx = CarrySum;
2671  DstRegs[DstIdx] = FactorSum;
2672  Factors.clear();
2673  }
2674 }
2675 
2677 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
2678  unsigned DstReg = MI.getOperand(0).getReg();
2679  unsigned Src1 = MI.getOperand(1).getReg();
2680  unsigned Src2 = MI.getOperand(2).getReg();
2681 
2682  LLT Ty = MRI.getType(DstReg);
2683  if (Ty.isVector())
2684  return UnableToLegalize;
2685 
2686  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
2687  unsigned DstSize = Ty.getSizeInBits();
2688  unsigned NarrowSize = NarrowTy.getSizeInBits();
2689  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
2690  return UnableToLegalize;
2691 
2692  unsigned NumDstParts = DstSize / NarrowSize;
2693  unsigned NumSrcParts = SrcSize / NarrowSize;
2694  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
2695  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
2696 
2697  SmallVector<unsigned, 2> Src1Parts, Src2Parts, DstTmpRegs;
2698  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
2699  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
2700  DstTmpRegs.resize(DstTmpParts);
2701  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
2702 
2703  // Take only high half of registers if this is high mul.
2704  ArrayRef<unsigned> DstRegs(
2705  IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
2706  MIRBuilder.buildMerge(DstReg, DstRegs);
2707  MI.eraseFromParent();
2708  return Legalized;
2709 }
2710 
2712 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2713  LLT NarrowTy) {
2714  if (TypeIdx != 1)
2715  return UnableToLegalize;
2716 
2717  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2718 
2719  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2720  // FIXME: add support for when SizeOp1 isn't an exact multiple of
2721  // NarrowSize.
2722  if (SizeOp1 % NarrowSize != 0)
2723  return UnableToLegalize;
2724  int NumParts = SizeOp1 / NarrowSize;
2725 
2726  SmallVector<unsigned, 2> SrcRegs, DstRegs;
2727  SmallVector<uint64_t, 2> Indexes;
2728  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2729 
2730  unsigned OpReg = MI.getOperand(0).getReg();
2731  uint64_t OpStart = MI.getOperand(2).getImm();
2732  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2733  for (int i = 0; i < NumParts; ++i) {
2734  unsigned SrcStart = i * NarrowSize;
2735 
2736  if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
2737  // No part of the extract uses this subregister, ignore it.
2738  continue;
2739  } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2740  // The entire subregister is extracted, forward the value.
2741  DstRegs.push_back(SrcRegs[i]);
2742  continue;
2743  }
2744 
2745  // OpSegStart is where this destination segment would start in OpReg if it
2746  // extended infinitely in both directions.
2747  int64_t ExtractOffset;
2748  uint64_t SegSize;
2749  if (OpStart < SrcStart) {
2750  ExtractOffset = 0;
2751  SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
2752  } else {
2753  ExtractOffset = OpStart - SrcStart;
2754  SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
2755  }
2756 
2757  unsigned SegReg = SrcRegs[i];
2758  if (ExtractOffset != 0 || SegSize != NarrowSize) {
2759  // A genuine extract is needed.
2760  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2761  MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
2762  }
2763 
2764  DstRegs.push_back(SegReg);
2765  }
2766 
2767  unsigned DstReg = MI.getOperand(0).getReg();
2768  if(MRI.getType(DstReg).isVector())
2769  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2770  else
2771  MIRBuilder.buildMerge(DstReg, DstRegs);
2772  MI.eraseFromParent();
2773  return Legalized;
2774 }
2775 
2777 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2778  LLT NarrowTy) {
2779  // FIXME: Don't know how to handle secondary types yet.
2780  if (TypeIdx != 0)
2781  return UnableToLegalize;
2782 
2783  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2784  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2785 
2786  // FIXME: add support for when SizeOp0 isn't an exact multiple of
2787  // NarrowSize.
2788  if (SizeOp0 % NarrowSize != 0)
2789  return UnableToLegalize;
2790 
2791  int NumParts = SizeOp0 / NarrowSize;
2792 
2793  SmallVector<unsigned, 2> SrcRegs, DstRegs;
2794  SmallVector<uint64_t, 2> Indexes;
2795  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2796 
2797  unsigned OpReg = MI.getOperand(2).getReg();
2798  uint64_t OpStart = MI.getOperand(3).getImm();
2799  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2800  for (int i = 0; i < NumParts; ++i) {
2801  unsigned DstStart = i * NarrowSize;
2802 
2803  if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
2804  // No part of the insert affects this subregister, forward the original.
2805  DstRegs.push_back(SrcRegs[i]);
2806  continue;
2807  } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2808  // The entire subregister is defined by this insert, forward the new
2809  // value.
2810  DstRegs.push_back(OpReg);
2811  continue;
2812  }
2813 
2814  // OpSegStart is where this destination segment would start in OpReg if it
2815  // extended infinitely in both directions.
2816  int64_t ExtractOffset, InsertOffset;
2817  uint64_t SegSize;
2818  if (OpStart < DstStart) {
2819  InsertOffset = 0;
2820  ExtractOffset = DstStart - OpStart;
2821  SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
2822  } else {
2823  InsertOffset = OpStart - DstStart;
2824  ExtractOffset = 0;
2825  SegSize =
2826  std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
2827  }
2828 
2829  unsigned SegReg = OpReg;
2830  if (ExtractOffset != 0 || SegSize != OpSize) {
2831  // A genuine extract is needed.
2832  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2833  MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
2834  }
2835 
2836  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2837  MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
2838  DstRegs.push_back(DstReg);
2839  }
2840 
2841  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
2842  unsigned DstReg = MI.getOperand(0).getReg();
2843  if(MRI.getType(DstReg).isVector())
2844  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2845  else
2846  MIRBuilder.buildMerge(DstReg, DstRegs);
2847  MI.eraseFromParent();
2848  return Legalized;
2849 }
2850 
2852 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
2853  LLT NarrowTy) {
2854  unsigned DstReg = MI.getOperand(0).getReg();
2855  LLT DstTy = MRI.getType(DstReg);
2856 
2857  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
2858 
2859  SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs;
2860  SmallVector<unsigned, 4> Src0Regs, Src0LeftoverRegs;
2861  SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs;
2862  LLT LeftoverTy;
2863  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
2864  Src0Regs, Src0LeftoverRegs))
2865  return UnableToLegalize;
2866 
2867  LLT Unused;
2868  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
2869  Src1Regs, Src1LeftoverRegs))
2870  llvm_unreachable("inconsistent extractParts result");
2871 
2872  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
2873  auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2874  {Src0Regs[I], Src1Regs[I]});
2875  DstRegs.push_back(Inst->getOperand(0).getReg());
2876  }
2877 
2878  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
2879  auto Inst = MIRBuilder.buildInstr(
2880  MI.getOpcode(),
2881  {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
2882  DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
2883  }
2884 
2885  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
2886  LeftoverTy, DstLeftoverRegs);
2887 
2888  MI.eraseFromParent();
2889  return Legalized;
2890 }
2891 
2893 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
2894  LLT NarrowTy) {
2895  if (TypeIdx != 0)
2896  return UnableToLegalize;
2897 
2898  unsigned CondReg = MI.getOperand(1).getReg();
2899  LLT CondTy = MRI.getType(CondReg);
2900  if (CondTy.isVector()) // TODO: Handle vselect
2901  return UnableToLegalize;
2902 
2903  unsigned DstReg = MI.getOperand(0).getReg();
2904  LLT DstTy = MRI.getType(DstReg);
2905 
2906  SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs;
2907  SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs;
2908  SmallVector<unsigned, 4> Src2Regs, Src2LeftoverRegs;
2909  LLT LeftoverTy;
2910  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
2911  Src1Regs, Src1LeftoverRegs))
2912  return UnableToLegalize;
2913 
2914  LLT Unused;
2915  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
2916  Src2Regs, Src2LeftoverRegs))
2917  llvm_unreachable("inconsistent extractParts result");
2918 
2919  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
2920  auto Select = MIRBuilder.buildSelect(NarrowTy,
2921  CondReg, Src1Regs[I], Src2Regs[I]);
2922  DstRegs.push_back(Select->getOperand(0).getReg());
2923  }
2924 
2925  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
2926  auto Select = MIRBuilder.buildSelect(
2927  LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
2928  DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
2929  }
2930 
2931  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
2932  LeftoverTy, DstLeftoverRegs);
2933 
2934  MI.eraseFromParent();
2935  return Legalized;
2936 }
2937 
2939 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2940  unsigned Opc = MI.getOpcode();
2941  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
2942  auto isSupported = [this](const LegalityQuery &Q) {
2943  auto QAction = LI.getAction(Q).Action;
2944  return QAction == Legal || QAction == Libcall || QAction == Custom;
2945  };
2946  switch (Opc) {
2947  default:
2948  return UnableToLegalize;
2949  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
2950  // This trivially expands to CTLZ.
2951  Observer.changingInstr(MI);
2952  MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
2953  Observer.changedInstr(MI);
2954  return Legalized;
2955  }
2956  case TargetOpcode::G_CTLZ: {
2957  unsigned SrcReg = MI.getOperand(1).getReg();
2958  unsigned Len = Ty.getSizeInBits();
2959  if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
2960  // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
2961  auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
2962  {Ty}, {SrcReg});
2963  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2964  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2965  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2966  SrcReg, MIBZero);
2967  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2968  MIBCtlzZU);
2969  MI.eraseFromParent();
2970  return Legalized;
2971  }
2972  // for now, we do this:
2973  // NewLen = NextPowerOf2(Len);
2974  // x = x | (x >> 1);
2975  // x = x | (x >> 2);
2976  // ...
2977  // x = x | (x >>16);
2978  // x = x | (x >>32); // for 64-bit input
2979  // Upto NewLen/2
2980  // return Len - popcount(x);
2981  //
2982  // Ref: "Hacker's Delight" by Henry Warren
2983  unsigned Op = SrcReg;
2984  unsigned NewLen = PowerOf2Ceil(Len);
2985  for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
2986  auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
2987  auto MIBOp = MIRBuilder.buildInstr(
2988  TargetOpcode::G_OR, {Ty},
2989  {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
2990  {Op, MIBShiftAmt})});
2991  Op = MIBOp->getOperand(0).getReg();
2992  }
2993  auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
2994  MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2995  {MIRBuilder.buildConstant(Ty, Len), MIBPop});
2996  MI.eraseFromParent();
2997  return Legalized;
2998  }
2999  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3000  // This trivially expands to CTTZ.
3001  Observer.changingInstr(MI);
3002  MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3003  Observer.changedInstr(MI);
3004  return Legalized;
3005  }
3006  case TargetOpcode::G_CTTZ: {
3007  unsigned SrcReg = MI.getOperand(1).getReg();
3008  unsigned Len = Ty.getSizeInBits();
3009  if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3010  // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3011  // zero.
3012  auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3013  {Ty}, {SrcReg});
3014  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3015  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3016  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3017  SrcReg, MIBZero);
3018  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3019  MIBCttzZU);
3020  MI.eraseFromParent();
3021  return Legalized;
3022  }
3023  // for now, we use: { return popcount(~x & (x - 1)); }
3024  // unless the target has ctlz but not ctpop, in which case we use:
3025  // { return 32 - nlz(~x & (x-1)); }
3026  // Ref: "Hacker's Delight" by Henry Warren
3027  auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3028  auto MIBNot =
3029  MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3030  auto MIBTmp = MIRBuilder.buildInstr(
3031  TargetOpcode::G_AND, {Ty},
3032  {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3033  {SrcReg, MIBCstNeg1})});
3034  if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3035  isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3036  auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3038  TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3039  {MIBCstLen,
3040  MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3041  MI.eraseFromParent();
3042  return Legalized;
3043  }
3044  MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3045  MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3046  return Legalized;
3047  }
3048  }
3049 }
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType)
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:164
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:833
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1)
Build and insert Res = G_GEP Op0, Op1.
MachineBasicBlock * getMBB() const
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
unsigned getScalarSizeInBits() const
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
void setFPImm(const ConstantFP *CFP)
AtomicOrdering getFailureOrdering() const
For cmpxchg atomic operations, return the atomic ordering requirements when store does not occur...
void push_back(const T &Elt)
Definition: SmallVector.h:211
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:62
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
virtual const TargetLowering * getTargetLowering() const
unsigned less than
Definition: InstrTypes.h:671
LLT getScalarType() const
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_OR Op0, Op1.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:810
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:371
static uint32_t Concat[]
MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res, CarryOut = G_UADDO Op0, Op1.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type...
Definition: LegalizerInfo.h:52
MachineInstrBuilder buildStore(unsigned Val, unsigned Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
bool isVector() const
void setMF(MachineFunction &MF)
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
A description of a memory reference used in the backend.
bool isSigned() const
Definition: InstrTypes.h:816
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions. ...
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:163
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1, const SrcOp &CarryIn)
Build and insert Res, CarryOut = G_UADDE Op0, Op1, CarryIn.
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args)
Helper function that creates the given libcall.
AtomicOrdering getOrdering() const
Return the atomic ordering requirements for this memory operation.
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4446
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
uint64_t getSizeInBits() const
Return the size in bits of the memory reference.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
virtual const TargetInstrInfo * getInstrInfo() const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:122
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
MachineInstr * getOpcodeDef(unsigned Opcode, unsigned Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:258
void setChangeObserver(GISelChangeObserver &Observer)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LegalizeResult legalizeInstrStep(MachineInstr &MI)
Replace MI by a sequence of legal instructions that can implement the same operation.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
unsigned getReg() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
Helper class to build MachineInstr.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:587
MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
void setImm(int64_t immVal)
MachineInstrBuilder buildInsert(unsigned Res, unsigned Src, unsigned Op, unsigned Index)
virtual const CallLowering * getCallLowering() const
unsigned getAddressSpace() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:288
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
Some kind of error has occurred and we could not legalize this instruction.
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:639
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
Instruction was already legal and no change was made to the MachineFunction.
size_t size() const
Definition: SmallVector.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
static Type * getFP128Ty(LLVMContext &C)
Definition: Type.cpp:168
const APFloat & getValueAPF() const
Definition: Constants.h:302
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:162
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:119
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeResult libcall(MachineInstr &MI)
Legalize an instruction by emiting a runtime library call instead.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
void setFlags(unsigned flags)
Definition: MachineInstr.h:303
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:81
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:694
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static MachineOperand CreateES(const char *SymName, unsigned char TargetFlags=0)
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType)
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
Representation of each machine instruction.
Definition: MachineInstr.h:63
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
Instruction has been legalized and the MachineFunction changed.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_FCMP PredOp0, Op1.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
static Constant * getZeroValueForNegation(Type *Ty)
Floating point negation must be implemented with f(x) = -0.0 - x.
Definition: Constants.cpp:780
uint32_t Size
Definition: Profile.cpp:46
void setCImm(const ConstantInt *CI)
const DataLayout & getDataLayout() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Optional< MachineInstrBuilder > materializeGEP(unsigned &Res, unsigned Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:289
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:651
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
This file describes how to lower LLVM calls to machine code calls.
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
unsigned getReg(unsigned Idx)
Get the register for the operand index.
MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, unsigned CmpVal, unsigned NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
IRTranslator LLVM IR MI
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
const ConstantInt * getCImm() const
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:405
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:658
This file describes how to lower LLVM code to machine code.
unsigned getPredicate() const