Line data Source code
1 : //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file This file implements the LegalizerHelper class to legalize
11 : /// individual instructions and the LegalizeMachineIR wrapper pass for the
12 : /// primary legalization.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 : #include "llvm/CodeGen/GlobalISel/CallLowering.h"
18 : #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 : #include "llvm/CodeGen/MachineRegisterInfo.h"
20 : #include "llvm/CodeGen/TargetInstrInfo.h"
21 : #include "llvm/CodeGen/TargetLowering.h"
22 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 : #include "llvm/Support/Debug.h"
24 : #include "llvm/Support/MathExtras.h"
25 : #include "llvm/Support/raw_ostream.h"
26 :
27 : #define DEBUG_TYPE "legalizer"
28 :
29 : using namespace llvm;
30 : using namespace LegalizeActions;
31 :
32 1512 : LegalizerHelper::LegalizerHelper(MachineFunction &MF)
33 1512 : : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
34 1512 : MIRBuilder.setMF(MF);
35 1512 : }
36 :
37 13 : LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI)
38 13 : : MRI(MF.getRegInfo()), LI(LI) {
39 13 : MIRBuilder.setMF(MF);
40 13 : }
41 : LegalizerHelper::LegalizeResult
42 5877 : LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
43 : LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
44 :
45 5877 : auto Step = LI.getAction(MI, MRI);
46 5877 : switch (Step.Action) {
47 : case Legal:
48 : LLVM_DEBUG(dbgs() << ".. Already legal\n");
49 : return AlreadyLegal;
50 118 : case Libcall:
51 : LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
52 118 : return libcall(MI);
53 65 : case NarrowScalar:
54 : LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
55 65 : return narrowScalar(MI, Step.TypeIdx, Step.NewType);
56 347 : case WidenScalar:
57 : LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
58 347 : return widenScalar(MI, Step.TypeIdx, Step.NewType);
59 40 : case Lower:
60 : LLVM_DEBUG(dbgs() << ".. Lower\n");
61 40 : return lower(MI, Step.TypeIdx, Step.NewType);
62 32 : case FewerElements:
63 : LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
64 32 : return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
65 95 : case Custom:
66 : LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
67 95 : return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
68 : : UnableToLegalize;
69 11 : default:
70 : LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
71 11 : return UnableToLegalize;
72 : }
73 : }
74 :
75 100 : void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
76 : SmallVectorImpl<unsigned> &VRegs) {
77 341 : for (int i = 0; i < NumParts; ++i)
78 482 : VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
79 200 : MIRBuilder.buildUnmerge(VRegs, Reg);
80 100 : }
81 :
82 98 : static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
83 98 : switch (Opcode) {
84 : case TargetOpcode::G_SDIV:
85 : assert(Size == 32 && "Unsupported size");
86 : return RTLIB::SDIV_I32;
87 12 : case TargetOpcode::G_UDIV:
88 : assert(Size == 32 && "Unsupported size");
89 12 : return RTLIB::UDIV_I32;
90 6 : case TargetOpcode::G_SREM:
91 : assert(Size == 32 && "Unsupported size");
92 6 : return RTLIB::SREM_I32;
93 6 : case TargetOpcode::G_UREM:
94 : assert(Size == 32 && "Unsupported size");
95 6 : return RTLIB::UREM_I32;
96 8 : case TargetOpcode::G_FADD:
97 : assert((Size == 32 || Size == 64) && "Unsupported size");
98 8 : return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
99 12 : case TargetOpcode::G_FSUB:
100 : assert((Size == 32 || Size == 64) && "Unsupported size");
101 12 : return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
102 4 : case TargetOpcode::G_FMUL:
103 : assert((Size == 32 || Size == 64) && "Unsupported size");
104 4 : return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
105 4 : case TargetOpcode::G_FDIV:
106 : assert((Size == 32 || Size == 64) && "Unsupported size");
107 4 : return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
108 14 : case TargetOpcode::G_FREM:
109 14 : return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
110 14 : case TargetOpcode::G_FPOW:
111 14 : return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
112 6 : case TargetOpcode::G_FMA:
113 : assert((Size == 32 || Size == 64) && "Unsupported size");
114 6 : return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
115 : }
116 0 : llvm_unreachable("Unknown libcall function");
117 : }
118 :
119 : LegalizerHelper::LegalizeResult
120 200 : llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
121 : const CallLowering::ArgInfo &Result,
122 : ArrayRef<CallLowering::ArgInfo> Args) {
123 200 : auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
124 200 : auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
125 : const char *Name = TLI.getLibcallName(Libcall);
126 :
127 200 : MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
128 200 : if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
129 200 : MachineOperand::CreateES(Name), Result, Args))
130 0 : return LegalizerHelper::UnableToLegalize;
131 :
132 : return LegalizerHelper::Legalized;
133 : }
134 :
135 : // Useful for libcalls where all operands have the same type.
136 : static LegalizerHelper::LegalizeResult
137 98 : simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
138 : Type *OpType) {
139 196 : auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
140 :
141 : SmallVector<CallLowering::ArgInfo, 3> Args;
142 300 : for (unsigned i = 1; i < MI.getNumOperands(); i++)
143 606 : Args.push_back({MI.getOperand(i).getReg(), OpType});
144 196 : return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
145 98 : Args);
146 : }
147 :
148 20 : static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
149 : Type *FromType) {
150 20 : auto ToMVT = MVT::getVT(ToType);
151 20 : auto FromMVT = MVT::getVT(FromType);
152 :
153 20 : switch (Opcode) {
154 2 : case TargetOpcode::G_FPEXT:
155 2 : return RTLIB::getFPEXT(FromMVT, ToMVT);
156 2 : case TargetOpcode::G_FPTRUNC:
157 2 : return RTLIB::getFPROUND(FromMVT, ToMVT);
158 4 : case TargetOpcode::G_FPTOSI:
159 4 : return RTLIB::getFPTOSINT(FromMVT, ToMVT);
160 4 : case TargetOpcode::G_FPTOUI:
161 4 : return RTLIB::getFPTOUINT(FromMVT, ToMVT);
162 4 : case TargetOpcode::G_SITOFP:
163 4 : return RTLIB::getSINTTOFP(FromMVT, ToMVT);
164 4 : case TargetOpcode::G_UITOFP:
165 4 : return RTLIB::getUINTTOFP(FromMVT, ToMVT);
166 : }
167 0 : llvm_unreachable("Unsupported libcall function");
168 : }
169 :
170 : static LegalizerHelper::LegalizeResult
171 20 : conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
172 : Type *FromType) {
173 40 : RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
174 60 : return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
175 20 : {{MI.getOperand(1).getReg(), FromType}});
176 : }
177 :
178 : LegalizerHelper::LegalizeResult
179 118 : LegalizerHelper::libcall(MachineInstr &MI) {
180 118 : LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
181 118 : unsigned Size = LLTy.getSizeInBits();
182 118 : auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
183 :
184 118 : MIRBuilder.setInstr(MI);
185 :
186 236 : switch (MI.getOpcode()) {
187 : default:
188 : return UnableToLegalize;
189 36 : case TargetOpcode::G_SDIV:
190 : case TargetOpcode::G_UDIV:
191 : case TargetOpcode::G_SREM:
192 : case TargetOpcode::G_UREM: {
193 36 : Type *HLTy = Type::getInt32Ty(Ctx);
194 36 : auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
195 36 : if (Status != Legalized)
196 : return Status;
197 : break;
198 : }
199 62 : case TargetOpcode::G_FADD:
200 : case TargetOpcode::G_FSUB:
201 : case TargetOpcode::G_FMUL:
202 : case TargetOpcode::G_FDIV:
203 : case TargetOpcode::G_FMA:
204 : case TargetOpcode::G_FPOW:
205 : case TargetOpcode::G_FREM: {
206 62 : Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
207 62 : auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
208 62 : if (Status != Legalized)
209 : return Status;
210 : break;
211 : }
212 2 : case TargetOpcode::G_FPEXT: {
213 : // FIXME: Support other floating point types (half, fp128 etc)
214 2 : unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
215 2 : unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
216 2 : if (ToSize != 64 || FromSize != 32)
217 : return UnableToLegalize;
218 2 : LegalizeResult Status = conversionLibcall(
219 2 : MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
220 2 : if (Status != Legalized)
221 : return Status;
222 : break;
223 : }
224 2 : case TargetOpcode::G_FPTRUNC: {
225 : // FIXME: Support other floating point types (half, fp128 etc)
226 2 : unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
227 2 : unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
228 2 : if (ToSize != 32 || FromSize != 64)
229 : return UnableToLegalize;
230 2 : LegalizeResult Status = conversionLibcall(
231 2 : MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
232 2 : if (Status != Legalized)
233 : return Status;
234 : break;
235 : }
236 8 : case TargetOpcode::G_FPTOSI:
237 : case TargetOpcode::G_FPTOUI: {
238 : // FIXME: Support other types
239 8 : unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
240 8 : unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
241 8 : if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
242 : return UnableToLegalize;
243 8 : LegalizeResult Status = conversionLibcall(
244 8 : MI, MIRBuilder, Type::getInt32Ty(Ctx),
245 8 : FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
246 8 : if (Status != Legalized)
247 : return Status;
248 : break;
249 : }
250 8 : case TargetOpcode::G_SITOFP:
251 : case TargetOpcode::G_UITOFP: {
252 : // FIXME: Support other types
253 8 : unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
254 8 : unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
255 8 : if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
256 : return UnableToLegalize;
257 8 : LegalizeResult Status = conversionLibcall(
258 8 : MI, MIRBuilder,
259 8 : ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
260 8 : Type::getInt32Ty(Ctx));
261 8 : if (Status != Legalized)
262 : return Status;
263 : break;
264 : }
265 : }
266 :
267 118 : MI.eraseFromParent();
268 118 : return Legalized;
269 : }
270 :
271 65 : LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
272 : unsigned TypeIdx,
273 : LLT NarrowTy) {
274 : // FIXME: Don't know how to handle secondary types yet.
275 65 : if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
276 : return UnableToLegalize;
277 :
278 65 : MIRBuilder.setInstr(MI);
279 :
280 65 : uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
281 65 : uint64_t NarrowSize = NarrowTy.getSizeInBits();
282 :
283 130 : switch (MI.getOpcode()) {
284 : default:
285 : return UnableToLegalize;
286 4 : case TargetOpcode::G_IMPLICIT_DEF: {
287 : // FIXME: add support for when SizeOp0 isn't an exact multiple of
288 : // NarrowSize.
289 4 : if (SizeOp0 % NarrowSize != 0)
290 : return UnableToLegalize;
291 4 : int NumParts = SizeOp0 / NarrowSize;
292 :
293 : SmallVector<unsigned, 2> DstRegs;
294 12 : for (int i = 0; i < NumParts; ++i)
295 8 : DstRegs.push_back(
296 8 : MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
297 4 : MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
298 4 : MI.eraseFromParent();
299 : return Legalized;
300 : }
301 4 : case TargetOpcode::G_ADD: {
302 : // FIXME: add support for when SizeOp0 isn't an exact multiple of
303 : // NarrowSize.
304 4 : if (SizeOp0 % NarrowSize != 0)
305 : return UnableToLegalize;
306 : // Expand in terms of carry-setting/consuming G_ADDE instructions.
307 4 : int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
308 :
309 : SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
310 4 : extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
311 4 : extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
312 :
313 8 : unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
314 4 : MIRBuilder.buildConstant(CarryIn, 0);
315 :
316 13 : for (int i = 0; i < NumParts; ++i) {
317 18 : unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
318 18 : unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
319 :
320 : MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
321 27 : Src2Regs[i], CarryIn);
322 :
323 9 : DstRegs.push_back(DstReg);
324 : CarryIn = CarryOut;
325 : }
326 4 : unsigned DstReg = MI.getOperand(0).getReg();
327 4 : MIRBuilder.buildMerge(DstReg, DstRegs);
328 4 : MI.eraseFromParent();
329 : return Legalized;
330 : }
331 8 : case TargetOpcode::G_EXTRACT: {
332 8 : if (TypeIdx != 1)
333 : return UnableToLegalize;
334 :
335 8 : int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
336 : // FIXME: add support for when SizeOp1 isn't an exact multiple of
337 : // NarrowSize.
338 8 : if (SizeOp1 % NarrowSize != 0)
339 : return UnableToLegalize;
340 8 : int NumParts = SizeOp1 / NarrowSize;
341 :
342 : SmallVector<unsigned, 2> SrcRegs, DstRegs;
343 : SmallVector<uint64_t, 2> Indexes;
344 8 : extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
345 :
346 8 : unsigned OpReg = MI.getOperand(0).getReg();
347 8 : uint64_t OpStart = MI.getOperand(2).getImm();
348 16 : uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
349 24 : for (int i = 0; i < NumParts; ++i) {
350 16 : unsigned SrcStart = i * NarrowSize;
351 :
352 16 : if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
353 : // No part of the extract uses this subregister, ignore it.
354 12 : continue;
355 15 : } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
356 : // The entire subregister is extracted, forward the value.
357 5 : DstRegs.push_back(SrcRegs[i]);
358 5 : continue;
359 : }
360 :
361 : // OpSegStart is where this destination segment would start in OpReg if it
362 : // extended infinitely in both directions.
363 : int64_t ExtractOffset;
364 : uint64_t SegSize;
365 4 : if (OpStart < SrcStart) {
366 : ExtractOffset = 0;
367 2 : SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
368 : } else {
369 3 : ExtractOffset = OpStart - SrcStart;
370 4 : SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
371 : }
372 :
373 4 : unsigned SegReg = SrcRegs[i];
374 4 : if (ExtractOffset != 0 || SegSize != NarrowSize) {
375 : // A genuine extract is needed.
376 12 : SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
377 8 : MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
378 : }
379 :
380 4 : DstRegs.push_back(SegReg);
381 : }
382 :
383 8 : MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
384 8 : MI.eraseFromParent();
385 : return Legalized;
386 : }
387 11 : case TargetOpcode::G_INSERT: {
388 : // FIXME: add support for when SizeOp0 isn't an exact multiple of
389 : // NarrowSize.
390 11 : if (SizeOp0 % NarrowSize != 0)
391 : return UnableToLegalize;
392 :
393 11 : int NumParts = SizeOp0 / NarrowSize;
394 :
395 : SmallVector<unsigned, 2> SrcRegs, DstRegs;
396 : SmallVector<uint64_t, 2> Indexes;
397 11 : extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
398 :
399 11 : unsigned OpReg = MI.getOperand(2).getReg();
400 11 : uint64_t OpStart = MI.getOperand(3).getImm();
401 22 : uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
402 34 : for (int i = 0; i < NumParts; ++i) {
403 23 : unsigned DstStart = i * NarrowSize;
404 :
405 23 : if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
406 : // No part of the insert affects this subregister, forward the original.
407 11 : DstRegs.push_back(SrcRegs[i]);
408 18 : continue;
409 14 : } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
410 : // The entire subregister is defined by this insert, forward the new
411 : // value.
412 7 : DstRegs.push_back(OpReg);
413 7 : continue;
414 : }
415 :
416 : // OpSegStart is where this destination segment would start in OpReg if it
417 : // extended infinitely in both directions.
418 : int64_t ExtractOffset, InsertOffset;
419 : uint64_t SegSize;
420 5 : if (OpStart < DstStart) {
421 : InsertOffset = 0;
422 1 : ExtractOffset = DstStart - OpStart;
423 2 : SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
424 : } else {
425 4 : InsertOffset = OpStart - DstStart;
426 : ExtractOffset = 0;
427 4 : SegSize =
428 5 : std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
429 : }
430 :
431 5 : unsigned SegReg = OpReg;
432 5 : if (ExtractOffset != 0 || SegSize != OpSize) {
433 : // A genuine extract is needed.
434 6 : SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
435 2 : MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
436 : }
437 :
438 10 : unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
439 10 : MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
440 5 : DstRegs.push_back(DstReg);
441 : }
442 :
443 : assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
444 11 : MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
445 11 : MI.eraseFromParent();
446 : return Legalized;
447 : }
448 8 : case TargetOpcode::G_LOAD: {
449 : // FIXME: add support for when SizeOp0 isn't an exact multiple of
450 : // NarrowSize.
451 8 : if (SizeOp0 % NarrowSize != 0)
452 : return UnableToLegalize;
453 :
454 8 : const auto &MMO = **MI.memoperands_begin();
455 : // This implementation doesn't work for atomics. Give up instead of doing
456 : // something invalid.
457 8 : if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
458 : MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
459 : return UnableToLegalize;
460 :
461 8 : int NumParts = SizeOp0 / NarrowSize;
462 : LLT OffsetTy = LLT::scalar(
463 16 : MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
464 :
465 : SmallVector<unsigned, 2> DstRegs;
466 24 : for (int i = 0; i < NumParts; ++i) {
467 32 : unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
468 16 : unsigned SrcReg = 0;
469 16 : unsigned Adjustment = i * NarrowSize / 8;
470 :
471 64 : MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
472 16 : MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
473 16 : NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
474 16 : MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
475 : MMO.getOrdering(), MMO.getFailureOrdering());
476 :
477 16 : MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
478 : Adjustment);
479 :
480 16 : MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
481 :
482 16 : DstRegs.push_back(DstReg);
483 : }
484 8 : unsigned DstReg = MI.getOperand(0).getReg();
485 8 : MIRBuilder.buildMerge(DstReg, DstRegs);
486 8 : MI.eraseFromParent();
487 : return Legalized;
488 : }
489 20 : case TargetOpcode::G_STORE: {
490 : // FIXME: add support for when SizeOp0 isn't an exact multiple of
491 : // NarrowSize.
492 20 : if (SizeOp0 % NarrowSize != 0)
493 : return UnableToLegalize;
494 :
495 9 : const auto &MMO = **MI.memoperands_begin();
496 : // This implementation doesn't work for atomics. Give up instead of doing
497 : // something invalid.
498 9 : if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
499 : MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
500 : return UnableToLegalize;
501 :
502 9 : int NumParts = SizeOp0 / NarrowSize;
503 : LLT OffsetTy = LLT::scalar(
504 18 : MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
505 :
506 : SmallVector<unsigned, 2> SrcRegs;
507 9 : extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
508 :
509 27 : for (int i = 0; i < NumParts; ++i) {
510 18 : unsigned DstReg = 0;
511 18 : unsigned Adjustment = i * NarrowSize / 8;
512 :
513 72 : MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
514 18 : MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
515 18 : NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
516 18 : MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
517 : MMO.getOrdering(), MMO.getFailureOrdering());
518 :
519 18 : MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
520 : Adjustment);
521 :
522 36 : MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
523 : }
524 9 : MI.eraseFromParent();
525 : return Legalized;
526 : }
527 7 : case TargetOpcode::G_CONSTANT: {
528 : // FIXME: add support for when SizeOp0 isn't an exact multiple of
529 : // NarrowSize.
530 7 : if (SizeOp0 % NarrowSize != 0)
531 : return UnableToLegalize;
532 7 : int NumParts = SizeOp0 / NarrowSize;
533 7 : const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
534 7 : LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
535 :
536 : SmallVector<unsigned, 2> DstRegs;
537 21 : for (int i = 0; i < NumParts; ++i) {
538 28 : unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
539 : ConstantInt *CI =
540 28 : ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
541 14 : MIRBuilder.buildConstant(DstReg, *CI);
542 14 : DstRegs.push_back(DstReg);
543 : }
544 7 : unsigned DstReg = MI.getOperand(0).getReg();
545 7 : MIRBuilder.buildMerge(DstReg, DstRegs);
546 7 : MI.eraseFromParent();
547 : return Legalized;
548 : }
549 1 : case TargetOpcode::G_OR: {
550 : // Legalize bitwise operation:
551 : // A = BinOp<Ty> B, C
552 : // into:
553 : // B1, ..., BN = G_UNMERGE_VALUES B
554 : // C1, ..., CN = G_UNMERGE_VALUES C
555 : // A1 = BinOp<Ty/N> B1, C2
556 : // ...
557 : // AN = BinOp<Ty/N> BN, CN
558 : // A = G_MERGE_VALUES A1, ..., AN
559 :
560 : // FIXME: add support for when SizeOp0 isn't an exact multiple of
561 : // NarrowSize.
562 1 : if (SizeOp0 % NarrowSize != 0)
563 : return UnableToLegalize;
564 1 : int NumParts = SizeOp0 / NarrowSize;
565 :
566 : // List the registers where the destination will be scattered.
567 : SmallVector<unsigned, 2> DstRegs;
568 : // List the registers where the first argument will be split.
569 : SmallVector<unsigned, 2> SrcsReg1;
570 : // List the registers where the second argument will be split.
571 : SmallVector<unsigned, 2> SrcsReg2;
572 : // Create all the temporary registers.
573 3 : for (int i = 0; i < NumParts; ++i) {
574 4 : unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
575 4 : unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
576 4 : unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
577 :
578 2 : DstRegs.push_back(DstReg);
579 2 : SrcsReg1.push_back(SrcReg1);
580 2 : SrcsReg2.push_back(SrcReg2);
581 : }
582 : // Explode the big arguments into smaller chunks.
583 2 : MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
584 2 : MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
585 :
586 : // Do the operation on each small part.
587 3 : for (int i = 0; i < NumParts; ++i)
588 8 : MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]);
589 :
590 : // Gather the destination registers into the final destination.
591 1 : unsigned DstReg = MI.getOperand(0).getReg();
592 1 : MIRBuilder.buildMerge(DstReg, DstRegs);
593 1 : MI.eraseFromParent();
594 : return Legalized;
595 : }
596 : }
597 : }
598 :
599 369 : void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
600 : unsigned OpIdx, unsigned ExtOpcode) {
601 369 : MachineOperand &MO = MI.getOperand(OpIdx);
602 369 : auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg());
603 369 : MO.setReg(ExtB->getOperand(0).getReg());
604 369 : }
605 :
606 310 : void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
607 : unsigned OpIdx, unsigned TruncOpcode) {
608 310 : MachineOperand &MO = MI.getOperand(OpIdx);
609 620 : unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
610 620 : MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
611 310 : MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt);
612 310 : MO.setReg(DstExt);
613 310 : }
614 :
615 : LegalizerHelper::LegalizeResult
616 354 : LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
617 354 : MIRBuilder.setInstr(MI);
618 :
619 708 : switch (MI.getOpcode()) {
620 : default:
621 : return UnableToLegalize;
622 2 : case TargetOpcode::G_UADDO:
623 : case TargetOpcode::G_USUBO: {
624 2 : if (TypeIdx == 1)
625 : return UnableToLegalize; // TODO
626 : auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, WideTy,
627 2 : MI.getOperand(2).getReg());
628 : auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, WideTy,
629 2 : MI.getOperand(3).getReg());
630 2 : unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
631 2 : ? TargetOpcode::G_ADD
632 : : TargetOpcode::G_SUB;
633 : // Do the arithmetic in the larger type.
634 2 : auto NewOp = MIRBuilder.buildInstr(Opcode, WideTy, LHSZext, RHSZext);
635 2 : LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
636 2 : APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
637 : auto AndOp = MIRBuilder.buildInstr(
638 : TargetOpcode::G_AND, WideTy, NewOp,
639 2 : MIRBuilder.buildConstant(WideTy, Mask.getZExtValue()));
640 : // There is no overflow if the AndOp is the same as NewOp.
641 2 : MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
642 : AndOp);
643 : // Now trunc the NewOp to the original result.
644 2 : MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
645 2 : MI.eraseFromParent();
646 : return Legalized;
647 : }
648 5 : case TargetOpcode::G_CTTZ:
649 : case TargetOpcode::G_CTTZ_ZERO_UNDEF:
650 : case TargetOpcode::G_CTLZ:
651 : case TargetOpcode::G_CTLZ_ZERO_UNDEF:
652 : case TargetOpcode::G_CTPOP: {
653 : // First ZEXT the input.
654 5 : auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
655 5 : LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
656 10 : if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
657 : // The count is the same in the larger type except if the original
658 : // value was zero. This can be handled by setting the bit just off
659 : // the top of the original type.
660 : auto TopBit =
661 1 : APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
662 : MIBSrc = MIRBuilder.buildInstr(
663 : TargetOpcode::G_OR, WideTy, MIBSrc,
664 2 : MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue()));
665 : }
666 : // Perform the operation at the larger size.
667 10 : auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), WideTy, MIBSrc);
668 : // This is already the correct result for CTPOP and CTTZs
669 10 : if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
670 : MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
671 : // The correct result is NewOp - (Difference in widety and current ty).
672 2 : unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
673 : MIBNewOp =
674 : MIRBuilder.buildInstr(TargetOpcode::G_SUB, WideTy, MIBNewOp,
675 2 : MIRBuilder.buildConstant(WideTy, SizeDiff));
676 : }
677 5 : auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
678 : // Make the original instruction a trunc now, and update it's source.
679 5 : MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
680 10 : MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
681 5 : MIRBuilder.recordInsertion(&MI);
682 : return Legalized;
683 : }
684 :
685 76 : case TargetOpcode::G_ADD:
686 : case TargetOpcode::G_AND:
687 : case TargetOpcode::G_MUL:
688 : case TargetOpcode::G_OR:
689 : case TargetOpcode::G_XOR:
690 : case TargetOpcode::G_SUB:
691 : // Perform operation at larger width (any extension is fine here, high bits
692 : // don't affect the result) and then truncate the result back to the
693 : // original type.
694 76 : widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
695 76 : widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
696 76 : widenScalarDst(MI, WideTy);
697 76 : MIRBuilder.recordInsertion(&MI);
698 76 : return Legalized;
699 :
700 4 : case TargetOpcode::G_SHL:
701 4 : widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
702 : // The "number of bits to shift" operand must preserve its value as an
703 : // unsigned integer:
704 4 : widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
705 4 : widenScalarDst(MI, WideTy);
706 4 : MIRBuilder.recordInsertion(&MI);
707 4 : return Legalized;
708 :
709 34 : case TargetOpcode::G_SDIV:
710 : case TargetOpcode::G_SREM:
711 34 : widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
712 34 : widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
713 34 : widenScalarDst(MI, WideTy);
714 34 : MIRBuilder.recordInsertion(&MI);
715 34 : return Legalized;
716 :
717 3 : case TargetOpcode::G_ASHR:
718 3 : widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
719 : // The "number of bits to shift" operand must preserve its value as an
720 : // unsigned integer:
721 3 : widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
722 3 : widenScalarDst(MI, WideTy);
723 3 : MIRBuilder.recordInsertion(&MI);
724 3 : return Legalized;
725 :
726 36 : case TargetOpcode::G_UDIV:
727 : case TargetOpcode::G_UREM:
728 : case TargetOpcode::G_LSHR:
729 36 : widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
730 36 : widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
731 36 : widenScalarDst(MI, WideTy);
732 36 : MIRBuilder.recordInsertion(&MI);
733 36 : return Legalized;
734 :
735 3 : case TargetOpcode::G_SELECT:
736 3 : if (TypeIdx != 0)
737 : return UnableToLegalize;
738 : // Perform operation at larger width (any extension is fine here, high bits
739 : // don't affect the result) and then truncate the result back to the
740 : // original type.
741 3 : widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
742 3 : widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
743 3 : widenScalarDst(MI, WideTy);
744 3 : MIRBuilder.recordInsertion(&MI);
745 3 : return Legalized;
746 :
747 12 : case TargetOpcode::G_FPTOSI:
748 : case TargetOpcode::G_FPTOUI:
749 12 : if (TypeIdx != 0)
750 : return UnableToLegalize;
751 11 : widenScalarDst(MI, WideTy);
752 11 : MIRBuilder.recordInsertion(&MI);
753 11 : return Legalized;
754 :
755 7 : case TargetOpcode::G_SITOFP:
756 7 : if (TypeIdx != 1)
757 : return UnableToLegalize;
758 7 : widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
759 7 : MIRBuilder.recordInsertion(&MI);
760 7 : return Legalized;
761 :
762 3 : case TargetOpcode::G_UITOFP:
763 3 : if (TypeIdx != 1)
764 : return UnableToLegalize;
765 3 : widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
766 3 : MIRBuilder.recordInsertion(&MI);
767 3 : return Legalized;
768 :
769 1 : case TargetOpcode::G_INSERT:
770 1 : if (TypeIdx != 0)
771 : return UnableToLegalize;
772 1 : widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
773 1 : widenScalarDst(MI, WideTy);
774 1 : MIRBuilder.recordInsertion(&MI);
775 1 : return Legalized;
776 :
777 9 : case TargetOpcode::G_LOAD:
778 : // For some types like i24, we might try to widen to i32. To properly handle
779 : // this we should be using a dedicated extending load, until then avoid
780 : // trying to legalize.
781 18 : if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
782 9 : WideTy.getSizeInBits())
783 : return UnableToLegalize;
784 : LLVM_FALLTHROUGH;
785 : case TargetOpcode::G_SEXTLOAD:
786 : case TargetOpcode::G_ZEXTLOAD:
787 9 : widenScalarDst(MI, WideTy);
788 9 : MIRBuilder.recordInsertion(&MI);
789 9 : return Legalized;
790 :
791 : case TargetOpcode::G_STORE: {
792 17 : if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
793 0 : WideTy != LLT::scalar(8))
794 2 : return UnableToLegalize;
795 :
796 15 : widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
797 15 : MIRBuilder.recordInsertion(&MI);
798 15 : return Legalized;
799 : }
800 56 : case TargetOpcode::G_CONSTANT: {
801 56 : MachineOperand &SrcMO = MI.getOperand(1);
802 56 : LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
803 112 : const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
804 56 : SrcMO.setCImm(ConstantInt::get(Ctx, Val));
805 :
806 56 : widenScalarDst(MI, WideTy);
807 56 : MIRBuilder.recordInsertion(&MI);
808 : return Legalized;
809 : }
810 1 : case TargetOpcode::G_FCONSTANT: {
811 1 : MachineOperand &SrcMO = MI.getOperand(1);
812 1 : LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
813 1 : APFloat Val = SrcMO.getFPImm()->getValueAPF();
814 : bool LosesInfo;
815 1 : switch (WideTy.getSizeInBits()) {
816 1 : case 32:
817 1 : Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
818 1 : break;
819 0 : case 64:
820 0 : Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
821 0 : break;
822 0 : default:
823 0 : llvm_unreachable("Unhandled fp widen type");
824 : }
825 1 : SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
826 :
827 1 : widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
828 1 : MIRBuilder.recordInsertion(&MI);
829 : return Legalized;
830 : }
831 0 : case TargetOpcode::G_BRCOND:
832 0 : widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
833 0 : MIRBuilder.recordInsertion(&MI);
834 0 : return Legalized;
835 :
836 28 : case TargetOpcode::G_FCMP:
837 28 : if (TypeIdx == 0)
838 28 : widenScalarDst(MI, WideTy);
839 : else {
840 0 : widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
841 0 : widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
842 : }
843 28 : MIRBuilder.recordInsertion(&MI);
844 28 : return Legalized;
845 :
846 42 : case TargetOpcode::G_ICMP:
847 42 : if (TypeIdx == 0)
848 38 : widenScalarDst(MI, WideTy);
849 : else {
850 4 : unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
851 4 : MI.getOperand(1).getPredicate()))
852 4 : ? TargetOpcode::G_SEXT
853 : : TargetOpcode::G_ZEXT;
854 4 : widenScalarSrc(MI, WideTy, 2, ExtOpcode);
855 4 : widenScalarSrc(MI, WideTy, 3, ExtOpcode);
856 : }
857 42 : MIRBuilder.recordInsertion(&MI);
858 42 : return Legalized;
859 :
860 3 : case TargetOpcode::G_GEP:
861 : assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
862 3 : widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
863 3 : MIRBuilder.recordInsertion(&MI);
864 3 : return Legalized;
865 :
866 : case TargetOpcode::G_PHI: {
867 : assert(TypeIdx == 0 && "Expecting only Idx 0");
868 :
869 30 : for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
870 20 : MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
871 20 : MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
872 20 : widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
873 : }
874 :
875 10 : MachineBasicBlock &MBB = *MI.getParent();
876 20 : MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
877 10 : widenScalarDst(MI, WideTy);
878 10 : MIRBuilder.recordInsertion(&MI);
879 10 : return Legalized;
880 : }
881 : }
882 : }
883 :
884 : LegalizerHelper::LegalizeResult
885 46 : LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
886 : using namespace TargetOpcode;
887 46 : MIRBuilder.setInstr(MI);
888 :
889 92 : switch(MI.getOpcode()) {
890 : default:
891 : return UnableToLegalize;
892 27 : case TargetOpcode::G_SREM:
893 : case TargetOpcode::G_UREM: {
894 54 : unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
895 67 : MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
896 : .addDef(QuotReg)
897 27 : .addUse(MI.getOperand(1).getReg())
898 27 : .addUse(MI.getOperand(2).getReg());
899 :
900 54 : unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
901 27 : MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
902 27 : MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
903 27 : ProdReg);
904 27 : MI.eraseFromParent();
905 27 : return Legalized;
906 : }
907 2 : case TargetOpcode::G_SMULO:
908 : case TargetOpcode::G_UMULO: {
909 : // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
910 : // result.
911 2 : unsigned Res = MI.getOperand(0).getReg();
912 2 : unsigned Overflow = MI.getOperand(1).getReg();
913 2 : unsigned LHS = MI.getOperand(2).getReg();
914 2 : unsigned RHS = MI.getOperand(3).getReg();
915 :
916 2 : MIRBuilder.buildMul(Res, LHS, RHS);
917 :
918 2 : unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
919 2 : ? TargetOpcode::G_SMULH
920 : : TargetOpcode::G_UMULH;
921 :
922 4 : unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
923 2 : MIRBuilder.buildInstr(Opcode)
924 : .addDef(HiPart)
925 : .addUse(LHS)
926 : .addUse(RHS);
927 :
928 4 : unsigned Zero = MRI.createGenericVirtualRegister(Ty);
929 2 : MIRBuilder.buildConstant(Zero, 0);
930 :
931 : // For *signed* multiply, overflow is detected by checking:
932 : // (hi != (lo >> bitwidth-1))
933 2 : if (Opcode == TargetOpcode::G_SMULH) {
934 2 : unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
935 2 : unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
936 1 : MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
937 1 : MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
938 : .addDef(Shifted)
939 : .addUse(Res)
940 : .addUse(ShiftAmt);
941 1 : MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
942 : } else {
943 1 : MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
944 : }
945 2 : MI.eraseFromParent();
946 2 : return Legalized;
947 : }
948 7 : case TargetOpcode::G_FNEG: {
949 : // TODO: Handle vector types once we are able to
950 : // represent them.
951 7 : if (Ty.isVector())
952 0 : return UnableToLegalize;
953 7 : unsigned Res = MI.getOperand(0).getReg();
954 : Type *ZeroTy;
955 7 : LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
956 7 : switch (Ty.getSizeInBits()) {
957 0 : case 16:
958 0 : ZeroTy = Type::getHalfTy(Ctx);
959 0 : break;
960 3 : case 32:
961 3 : ZeroTy = Type::getFloatTy(Ctx);
962 3 : break;
963 3 : case 64:
964 3 : ZeroTy = Type::getDoubleTy(Ctx);
965 3 : break;
966 1 : case 128:
967 1 : ZeroTy = Type::getFP128Ty(Ctx);
968 1 : break;
969 0 : default:
970 0 : llvm_unreachable("unexpected floating-point type");
971 : }
972 : ConstantFP &ZeroForNegation =
973 7 : *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
974 7 : auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
975 7 : MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
976 : .addDef(Res)
977 7 : .addUse(Zero->getOperand(0).getReg())
978 7 : .addUse(MI.getOperand(1).getReg());
979 7 : MI.eraseFromParent();
980 7 : return Legalized;
981 : }
982 0 : case TargetOpcode::G_FSUB: {
983 : // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
984 : // First, check if G_FNEG is marked as Lower. If so, we may
985 : // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
986 0 : if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
987 : return UnableToLegalize;
988 0 : unsigned Res = MI.getOperand(0).getReg();
989 0 : unsigned LHS = MI.getOperand(1).getReg();
990 0 : unsigned RHS = MI.getOperand(2).getReg();
991 0 : unsigned Neg = MRI.createGenericVirtualRegister(Ty);
992 0 : MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
993 0 : MIRBuilder.buildInstr(TargetOpcode::G_FADD)
994 : .addDef(Res)
995 : .addUse(LHS)
996 : .addUse(Neg);
997 0 : MI.eraseFromParent();
998 0 : return Legalized;
999 : }
1000 2 : case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1001 2 : unsigned OldValRes = MI.getOperand(0).getReg();
1002 2 : unsigned SuccessRes = MI.getOperand(1).getReg();
1003 2 : unsigned Addr = MI.getOperand(2).getReg();
1004 2 : unsigned CmpVal = MI.getOperand(3).getReg();
1005 2 : unsigned NewVal = MI.getOperand(4).getReg();
1006 : MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1007 2 : **MI.memoperands_begin());
1008 2 : MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1009 2 : MI.eraseFromParent();
1010 2 : return Legalized;
1011 : }
1012 2 : case TargetOpcode::G_LOAD:
1013 : case TargetOpcode::G_SEXTLOAD:
1014 : case TargetOpcode::G_ZEXTLOAD: {
1015 : // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1016 2 : unsigned DstReg = MI.getOperand(0).getReg();
1017 2 : unsigned PtrReg = MI.getOperand(1).getReg();
1018 4 : LLT DstTy = MRI.getType(DstReg);
1019 2 : auto &MMO = **MI.memoperands_begin();
1020 :
1021 2 : if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1022 : // In the case of G_LOAD, this was a non-extending load already and we're
1023 : // about to lower to the same instruction.
1024 2 : if (MI.getOpcode() == TargetOpcode::G_LOAD)
1025 : return UnableToLegalize;
1026 0 : MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1027 0 : MI.eraseFromParent();
1028 0 : return Legalized;
1029 : }
1030 :
1031 0 : if (DstTy.isScalar()) {
1032 3 : unsigned TmpReg = MRI.createGenericVirtualRegister(
1033 : LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1034 1 : MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1035 1 : switch (MI.getOpcode()) {
1036 0 : default:
1037 0 : llvm_unreachable("Unexpected opcode");
1038 0 : case TargetOpcode::G_LOAD:
1039 0 : MIRBuilder.buildAnyExt(DstReg, TmpReg);
1040 0 : break;
1041 0 : case TargetOpcode::G_SEXTLOAD:
1042 0 : MIRBuilder.buildSExt(DstReg, TmpReg);
1043 0 : break;
1044 1 : case TargetOpcode::G_ZEXTLOAD:
1045 1 : MIRBuilder.buildZExt(DstReg, TmpReg);
1046 1 : break;
1047 : }
1048 1 : MI.eraseFromParent();
1049 1 : return Legalized;
1050 : }
1051 :
1052 : return UnableToLegalize;
1053 : }
1054 6 : case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1055 : case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1056 : case TargetOpcode::G_CTLZ:
1057 : case TargetOpcode::G_CTTZ:
1058 : case TargetOpcode::G_CTPOP:
1059 6 : return lowerBitCount(MI, TypeIdx, Ty);
1060 : }
1061 : }
1062 :
1063 : LegalizerHelper::LegalizeResult
1064 32 : LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
1065 : LLT NarrowTy) {
1066 : // FIXME: Don't know how to handle secondary types yet.
1067 32 : if (TypeIdx != 0)
1068 : return UnableToLegalize;
1069 64 : switch (MI.getOpcode()) {
1070 : default:
1071 : return UnableToLegalize;
1072 32 : case TargetOpcode::G_ADD: {
1073 32 : unsigned NarrowSize = NarrowTy.getSizeInBits();
1074 32 : unsigned DstReg = MI.getOperand(0).getReg();
1075 32 : unsigned Size = MRI.getType(DstReg).getSizeInBits();
1076 32 : int NumParts = Size / NarrowSize;
1077 : // FIXME: Don't know how to handle the situation where the small vectors
1078 : // aren't all the same size yet.
1079 32 : if (Size % NarrowSize != 0)
1080 : return UnableToLegalize;
1081 :
1082 32 : MIRBuilder.setInstr(MI);
1083 :
1084 : SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1085 32 : extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
1086 32 : extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
1087 :
1088 115 : for (int i = 0; i < NumParts; ++i) {
1089 166 : unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1090 249 : MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
1091 83 : DstRegs.push_back(DstReg);
1092 : }
1093 :
1094 32 : MIRBuilder.buildMerge(DstReg, DstRegs);
1095 32 : MI.eraseFromParent();
1096 : return Legalized;
1097 : }
1098 : }
1099 : }
1100 :
1101 : LegalizerHelper::LegalizeResult
1102 6 : LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1103 6 : unsigned Opc = MI.getOpcode();
1104 6 : auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1105 : auto isLegalOrCustom = [this](const LegalityQuery &Q) {
1106 8 : auto QAction = LI.getAction(Q).Action;
1107 8 : return QAction == Legal || QAction == Custom;
1108 : };
1109 6 : switch (Opc) {
1110 : default:
1111 : return UnableToLegalize;
1112 0 : case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1113 : // This trivially expands to CTLZ.
1114 0 : MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
1115 0 : MIRBuilder.recordInsertion(&MI);
1116 0 : return Legalized;
1117 : }
1118 2 : case TargetOpcode::G_CTLZ: {
1119 2 : unsigned SrcReg = MI.getOperand(1).getReg();
1120 2 : unsigned Len = Ty.getSizeInBits();
1121 2 : if (isLegalOrCustom({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
1122 : // If CTLZ_ZERO_UNDEF is legal or custom, emit that and a select with
1123 : // zero.
1124 : auto MIBCtlzZU =
1125 1 : MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, Ty, SrcReg);
1126 1 : auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1127 1 : auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1128 2 : auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1129 1 : SrcReg, MIBZero);
1130 1 : MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1131 1 : MIBCtlzZU);
1132 1 : MI.eraseFromParent();
1133 : return Legalized;
1134 : }
1135 : // for now, we do this:
1136 : // NewLen = NextPowerOf2(Len);
1137 : // x = x | (x >> 1);
1138 : // x = x | (x >> 2);
1139 : // ...
1140 : // x = x | (x >>16);
1141 : // x = x | (x >>32); // for 64-bit input
1142 : // Upto NewLen/2
1143 : // return Len - popcount(x);
1144 : //
1145 : // Ref: "Hacker's Delight" by Henry Warren
1146 1 : unsigned Op = SrcReg;
1147 2 : unsigned NewLen = PowerOf2Ceil(Len);
1148 4 : for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
1149 3 : auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
1150 : auto MIBOp = MIRBuilder.buildInstr(
1151 : TargetOpcode::G_OR, Ty, Op,
1152 3 : MIRBuilder.buildInstr(TargetOpcode::G_LSHR, Ty, Op, MIBShiftAmt));
1153 3 : Op = MIBOp->getOperand(0).getReg();
1154 : }
1155 1 : auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, Ty, Op);
1156 2 : MIRBuilder.buildInstr(TargetOpcode::G_SUB, MI.getOperand(0).getReg(),
1157 1 : MIRBuilder.buildConstant(Ty, Len), MIBPop);
1158 1 : MI.eraseFromParent();
1159 1 : return Legalized;
1160 : }
1161 1 : case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
1162 : // This trivially expands to CTTZ.
1163 1 : MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
1164 1 : MIRBuilder.recordInsertion(&MI);
1165 1 : return Legalized;
1166 : }
1167 3 : case TargetOpcode::G_CTTZ: {
1168 3 : unsigned SrcReg = MI.getOperand(1).getReg();
1169 3 : unsigned Len = Ty.getSizeInBits();
1170 3 : if (isLegalOrCustom({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
1171 : // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
1172 : // zero.
1173 : auto MIBCttzZU =
1174 1 : MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, Ty, SrcReg);
1175 1 : auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1176 1 : auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1177 2 : auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1178 1 : SrcReg, MIBZero);
1179 1 : MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1180 1 : MIBCttzZU);
1181 1 : MI.eraseFromParent();
1182 : return Legalized;
1183 : }
1184 : // for now, we use: { return popcount(~x & (x - 1)); }
1185 : // unless the target has ctlz but not ctpop, in which case we use:
1186 : // { return 32 - nlz(~x & (x-1)); }
1187 : // Ref: "Hacker's Delight" by Henry Warren
1188 2 : auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
1189 : auto MIBNot =
1190 2 : MIRBuilder.buildInstr(TargetOpcode::G_XOR, Ty, SrcReg, MIBCstNeg1);
1191 : auto MIBTmp = MIRBuilder.buildInstr(
1192 : TargetOpcode::G_AND, Ty, MIBNot,
1193 2 : MIRBuilder.buildInstr(TargetOpcode::G_ADD, Ty, SrcReg, MIBCstNeg1));
1194 3 : if (!isLegalOrCustom({TargetOpcode::G_CTPOP, {Ty}}) &&
1195 1 : isLegalOrCustom({TargetOpcode::G_CTLZ, {Ty}})) {
1196 1 : auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
1197 : MIRBuilder.buildInstr(
1198 2 : TargetOpcode::G_SUB, MI.getOperand(0).getReg(),
1199 : MIBCstLen,
1200 1 : MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, Ty, MIBTmp));
1201 1 : MI.eraseFromParent();
1202 : return Legalized;
1203 : }
1204 1 : MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
1205 2 : MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
1206 1 : return Legalized;
1207 : }
1208 : }
1209 : }
|