Bug Summary

File:llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Warning:line 3038, column 7
Value stored to 'NumLeftover' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LegalizerHelper.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/CodeGen/GlobalISel -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/CodeGen/GlobalISel -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16#include "llvm/CodeGen/GlobalISel/CallLowering.h"
17#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/TargetFrameLowering.h"
21#include "llvm/CodeGen/TargetInstrInfo.h"
22#include "llvm/CodeGen/TargetLowering.h"
23#include "llvm/CodeGen/TargetSubtargetInfo.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27
28#define DEBUG_TYPE"legalizer" "legalizer"
29
30using namespace llvm;
31using namespace LegalizeActions;
32
33/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34///
35/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36/// with any leftover piece as type \p LeftoverTy
37///
38/// Returns -1 in the first element of the pair if the breakdown is not
39/// satisfiable.
40static std::pair<int, int>
41getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42 assert(!LeftoverTy.isValid() && "this is an out argument")((!LeftoverTy.isValid() && "this is an out argument")
? static_cast<void> (0) : __assert_fail ("!LeftoverTy.isValid() && \"this is an out argument\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 42, __PRETTY_FUNCTION__))
;
43
44 unsigned Size = OrigTy.getSizeInBits();
45 unsigned NarrowSize = NarrowTy.getSizeInBits();
46 unsigned NumParts = Size / NarrowSize;
47 unsigned LeftoverSize = Size - NumParts * NarrowSize;
48 assert(Size > NarrowSize)((Size > NarrowSize) ? static_cast<void> (0) : __assert_fail
("Size > NarrowSize", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 48, __PRETTY_FUNCTION__))
;
49
50 if (LeftoverSize == 0)
51 return {NumParts, 0};
52
53 if (NarrowTy.isVector()) {
54 unsigned EltSize = OrigTy.getScalarSizeInBits();
55 if (LeftoverSize % EltSize != 0)
56 return {-1, -1};
57 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58 } else {
59 LeftoverTy = LLT::scalar(LeftoverSize);
60 }
61
62 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63 return std::make_pair(NumParts, NumLeftover);
64}
65
66static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
67
68 if (!Ty.isScalar())
69 return nullptr;
70
71 switch (Ty.getSizeInBits()) {
72 case 16:
73 return Type::getHalfTy(Ctx);
74 case 32:
75 return Type::getFloatTy(Ctx);
76 case 64:
77 return Type::getDoubleTy(Ctx);
78 case 128:
79 return Type::getFP128Ty(Ctx);
80 default:
81 return nullptr;
82 }
83}
84
85LegalizerHelper::LegalizerHelper(MachineFunction &MF,
86 GISelChangeObserver &Observer,
87 MachineIRBuilder &Builder)
88 : MIRBuilder(Builder), MRI(MF.getRegInfo()),
89 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
90 MIRBuilder.setMF(MF);
91 MIRBuilder.setChangeObserver(Observer);
92}
93
94LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
95 GISelChangeObserver &Observer,
96 MachineIRBuilder &B)
97 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
98 MIRBuilder.setMF(MF);
99 MIRBuilder.setChangeObserver(Observer);
100}
101LegalizerHelper::LegalizeResult
102LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
103 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Legalizing: "; MI.print(dbgs
()); } } while (false)
;
104
105 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
106 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
107 return LI.legalizeIntrinsic(MI, MIRBuilder, Observer) ? Legalized
108 : UnableToLegalize;
109 auto Step = LI.getAction(MI, MRI);
110 switch (Step.Action) {
111 case Legal:
112 LLVM_DEBUG(dbgs() << ".. Already legal\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Already legal\n"; } } while
(false)
;
113 return AlreadyLegal;
114 case Libcall:
115 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Convert to libcall\n"; }
} while (false)
;
116 return libcall(MI);
117 case NarrowScalar:
118 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Narrow scalar\n"; } } while
(false)
;
119 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
120 case WidenScalar:
121 LLVM_DEBUG(dbgs() << ".. Widen scalar\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Widen scalar\n"; } } while
(false)
;
122 return widenScalar(MI, Step.TypeIdx, Step.NewType);
123 case Lower:
124 LLVM_DEBUG(dbgs() << ".. Lower\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Lower\n"; } } while (false
)
;
125 return lower(MI, Step.TypeIdx, Step.NewType);
126 case FewerElements:
127 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Reduce number of elements\n"
; } } while (false)
;
128 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
129 case MoreElements:
130 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Increase number of elements\n"
; } } while (false)
;
131 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
132 case Custom:
133 LLVM_DEBUG(dbgs() << ".. Custom legalization\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Custom legalization\n"; }
} while (false)
;
134 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
135 : UnableToLegalize;
136 default:
137 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Unable to legalize\n"; }
} while (false)
;
138 return UnableToLegalize;
139 }
140}
141
142void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
143 SmallVectorImpl<Register> &VRegs) {
144 for (int i = 0; i < NumParts; ++i)
145 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
146 MIRBuilder.buildUnmerge(VRegs, Reg);
147}
148
149bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
150 LLT MainTy, LLT &LeftoverTy,
151 SmallVectorImpl<Register> &VRegs,
152 SmallVectorImpl<Register> &LeftoverRegs) {
153 assert(!LeftoverTy.isValid() && "this is an out argument")((!LeftoverTy.isValid() && "this is an out argument")
? static_cast<void> (0) : __assert_fail ("!LeftoverTy.isValid() && \"this is an out argument\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 153, __PRETTY_FUNCTION__))
;
154
155 unsigned RegSize = RegTy.getSizeInBits();
156 unsigned MainSize = MainTy.getSizeInBits();
157 unsigned NumParts = RegSize / MainSize;
158 unsigned LeftoverSize = RegSize - NumParts * MainSize;
159
160 // Use an unmerge when possible.
161 if (LeftoverSize == 0) {
162 for (unsigned I = 0; I < NumParts; ++I)
163 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
164 MIRBuilder.buildUnmerge(VRegs, Reg);
165 return true;
166 }
167
168 if (MainTy.isVector()) {
169 unsigned EltSize = MainTy.getScalarSizeInBits();
170 if (LeftoverSize % EltSize != 0)
171 return false;
172 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
173 } else {
174 LeftoverTy = LLT::scalar(LeftoverSize);
175 }
176
177 // For irregular sizes, extract the individual parts.
178 for (unsigned I = 0; I != NumParts; ++I) {
179 Register NewReg = MRI.createGenericVirtualRegister(MainTy);
180 VRegs.push_back(NewReg);
181 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
182 }
183
184 for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
185 Offset += LeftoverSize) {
186 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
187 LeftoverRegs.push_back(NewReg);
188 MIRBuilder.buildExtract(NewReg, Reg, Offset);
189 }
190
191 return true;
192}
193
194void LegalizerHelper::insertParts(Register DstReg,
195 LLT ResultTy, LLT PartTy,
196 ArrayRef<Register> PartRegs,
197 LLT LeftoverTy,
198 ArrayRef<Register> LeftoverRegs) {
199 if (!LeftoverTy.isValid()) {
200 assert(LeftoverRegs.empty())((LeftoverRegs.empty()) ? static_cast<void> (0) : __assert_fail
("LeftoverRegs.empty()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 200, __PRETTY_FUNCTION__))
;
201
202 if (!ResultTy.isVector()) {
203 MIRBuilder.buildMerge(DstReg, PartRegs);
204 return;
205 }
206
207 if (PartTy.isVector())
208 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
209 else
210 MIRBuilder.buildBuildVector(DstReg, PartRegs);
211 return;
212 }
213
214 unsigned PartSize = PartTy.getSizeInBits();
215 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
216
217 Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
218 MIRBuilder.buildUndef(CurResultReg);
219
220 unsigned Offset = 0;
221 for (Register PartReg : PartRegs) {
222 Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
223 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
224 CurResultReg = NewResultReg;
225 Offset += PartSize;
226 }
227
228 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
229 // Use the original output register for the final insert to avoid a copy.
230 Register NewResultReg = (I + 1 == E) ?
231 DstReg : MRI.createGenericVirtualRegister(ResultTy);
232
233 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
234 CurResultReg = NewResultReg;
235 Offset += LeftoverPartSize;
236 }
237}
238
239/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
240static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
241 const MachineInstr &MI) {
242 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES)((MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) ? static_cast
<void> (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 242, __PRETTY_FUNCTION__))
;
243
244 const int NumResults = MI.getNumOperands() - 1;
245 Regs.resize(NumResults);
246 for (int I = 0; I != NumResults; ++I)
247 Regs[I] = MI.getOperand(I).getReg();
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253
254 LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy));
255 if (SrcTy == GCDTy) {
256 // If the source already evenly divides the result type, we don't need to do
257 // anything.
258 Parts.push_back(SrcReg);
259 } else {
260 // Need to split into common type sized pieces.
261 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
262 getUnmergeResults(Parts, *Unmerge);
263 }
264
265 return GCDTy;
266}
267
268LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
269 SmallVectorImpl<Register> &VRegs,
270 unsigned PadStrategy) {
271 LLT LCMTy = getLCMType(DstTy, NarrowTy);
272
273 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
274 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
275 int NumOrigSrc = VRegs.size();
276
277 Register PadReg;
278
279 // Get a value we can use to pad the source value if the sources won't evenly
280 // cover the result type.
281 if (NumOrigSrc < NumParts * NumSubParts) {
282 if (PadStrategy == TargetOpcode::G_ZEXT)
283 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
284 else if (PadStrategy == TargetOpcode::G_ANYEXT)
285 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
286 else {
287 assert(PadStrategy == TargetOpcode::G_SEXT)((PadStrategy == TargetOpcode::G_SEXT) ? static_cast<void>
(0) : __assert_fail ("PadStrategy == TargetOpcode::G_SEXT", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 287, __PRETTY_FUNCTION__))
;
288
289 // Shift the sign bit of the low register through the high register.
290 auto ShiftAmt =
291 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
292 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
293 }
294 }
295
296 // Registers for the final merge to be produced.
297 SmallVector<Register, 4> Remerge(NumParts);
298
299 // Registers needed for intermediate merges, which will be merged into a
300 // source for Remerge.
301 SmallVector<Register, 4> SubMerge(NumSubParts);
302
303 // Once we've fully read off the end of the original source bits, we can reuse
304 // the same high bits for remaining padding elements.
305 Register AllPadReg;
306
307 // Build merges to the LCM type to cover the original result type.
308 for (int I = 0; I != NumParts; ++I) {
309 bool AllMergePartsArePadding = true;
310
311 // Build the requested merges to the requested type.
312 for (int J = 0; J != NumSubParts; ++J) {
313 int Idx = I * NumSubParts + J;
314 if (Idx >= NumOrigSrc) {
315 SubMerge[J] = PadReg;
316 continue;
317 }
318
319 SubMerge[J] = VRegs[Idx];
320
321 // There are meaningful bits here we can't reuse later.
322 AllMergePartsArePadding = false;
323 }
324
325 // If we've filled up a complete piece with padding bits, we can directly
326 // emit the natural sized constant if applicable, rather than a merge of
327 // smaller constants.
328 if (AllMergePartsArePadding && !AllPadReg) {
329 if (PadStrategy == TargetOpcode::G_ANYEXT)
330 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
331 else if (PadStrategy == TargetOpcode::G_ZEXT)
332 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
333
334 // If this is a sign extension, we can't materialize a trivial constant
335 // with the right type and have to produce a merge.
336 }
337
338 if (AllPadReg) {
339 // Avoid creating additional instructions if we're just adding additional
340 // copies of padding bits.
341 Remerge[I] = AllPadReg;
342 continue;
343 }
344
345 if (NumSubParts == 1)
346 Remerge[I] = SubMerge[0];
347 else
348 Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
349
350 // In the sign extend padding case, re-use the first all-signbit merge.
351 if (AllMergePartsArePadding && !AllPadReg)
352 AllPadReg = Remerge[I];
353 }
354
355 VRegs = std::move(Remerge);
356 return LCMTy;
357}
358
359void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
360 ArrayRef<Register> RemergeRegs) {
361 LLT DstTy = MRI.getType(DstReg);
362
363 // Create the merge to the widened source, and extract the relevant bits into
364 // the result.
365
366 if (DstTy == LCMTy) {
367 MIRBuilder.buildMerge(DstReg, RemergeRegs);
368 return;
369 }
370
371 auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
372 if (DstTy.isScalar() && LCMTy.isScalar()) {
373 MIRBuilder.buildTrunc(DstReg, Remerge);
374 return;
375 }
376
377 if (LCMTy.isVector()) {
378 MIRBuilder.buildExtract(DstReg, Remerge, 0);
379 return;
380 }
381
382 llvm_unreachable("unhandled case")::llvm::llvm_unreachable_internal("unhandled case", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 382)
;
383}
384
385static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
386#define RTLIBCASE(LibcallPrefix)do { switch (Size) { case 32: return RTLIB::LibcallPrefix32; case
64: return RTLIB::LibcallPrefix64; case 128: return RTLIB::LibcallPrefix128
; default: ::llvm::llvm_unreachable_internal("unexpected size"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 386); } } while (0)
\
387 do { \
388 switch (Size) { \
389 case 32: \
390 return RTLIB::LibcallPrefix##32; \
391 case 64: \
392 return RTLIB::LibcallPrefix##64; \
393 case 128: \
394 return RTLIB::LibcallPrefix##128; \
395 default: \
396 llvm_unreachable("unexpected size")::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 396)
; \
397 } \
398 } while (0)
399
400 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size")(((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64 || Size == 128) && \"Unsupported size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 400, __PRETTY_FUNCTION__))
;
401
402 switch (Opcode) {
403 case TargetOpcode::G_SDIV:
404 RTLIBCASE(SDIV_I)do { switch (Size) { case 32: return RTLIB::SDIV_I32; case 64
: return RTLIB::SDIV_I64; case 128: return RTLIB::SDIV_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 404); } } while (0)
;
405 case TargetOpcode::G_UDIV:
406 RTLIBCASE(UDIV_I)do { switch (Size) { case 32: return RTLIB::UDIV_I32; case 64
: return RTLIB::UDIV_I64; case 128: return RTLIB::UDIV_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 406); } } while (0)
;
407 case TargetOpcode::G_SREM:
408 RTLIBCASE(SREM_I)do { switch (Size) { case 32: return RTLIB::SREM_I32; case 64
: return RTLIB::SREM_I64; case 128: return RTLIB::SREM_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 408); } } while (0)
;
409 case TargetOpcode::G_UREM:
410 RTLIBCASE(UREM_I)do { switch (Size) { case 32: return RTLIB::UREM_I32; case 64
: return RTLIB::UREM_I64; case 128: return RTLIB::UREM_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 410); } } while (0)
;
411 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
412 RTLIBCASE(CTLZ_I)do { switch (Size) { case 32: return RTLIB::CTLZ_I32; case 64
: return RTLIB::CTLZ_I64; case 128: return RTLIB::CTLZ_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 412); } } while (0)
;
413 case TargetOpcode::G_FADD:
414 RTLIBCASE(ADD_F)do { switch (Size) { case 32: return RTLIB::ADD_F32; case 64:
return RTLIB::ADD_F64; case 128: return RTLIB::ADD_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 414); } } while (0)
;
415 case TargetOpcode::G_FSUB:
416 RTLIBCASE(SUB_F)do { switch (Size) { case 32: return RTLIB::SUB_F32; case 64:
return RTLIB::SUB_F64; case 128: return RTLIB::SUB_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 416); } } while (0)
;
417 case TargetOpcode::G_FMUL:
418 RTLIBCASE(MUL_F)do { switch (Size) { case 32: return RTLIB::MUL_F32; case 64:
return RTLIB::MUL_F64; case 128: return RTLIB::MUL_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 418); } } while (0)
;
419 case TargetOpcode::G_FDIV:
420 RTLIBCASE(DIV_F)do { switch (Size) { case 32: return RTLIB::DIV_F32; case 64:
return RTLIB::DIV_F64; case 128: return RTLIB::DIV_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 420); } } while (0)
;
421 case TargetOpcode::G_FEXP:
422 RTLIBCASE(EXP_F)do { switch (Size) { case 32: return RTLIB::EXP_F32; case 64:
return RTLIB::EXP_F64; case 128: return RTLIB::EXP_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 422); } } while (0)
;
423 case TargetOpcode::G_FEXP2:
424 RTLIBCASE(EXP2_F)do { switch (Size) { case 32: return RTLIB::EXP2_F32; case 64
: return RTLIB::EXP2_F64; case 128: return RTLIB::EXP2_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 424); } } while (0)
;
425 case TargetOpcode::G_FREM:
426 RTLIBCASE(REM_F)do { switch (Size) { case 32: return RTLIB::REM_F32; case 64:
return RTLIB::REM_F64; case 128: return RTLIB::REM_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 426); } } while (0)
;
427 case TargetOpcode::G_FPOW:
428 RTLIBCASE(POW_F)do { switch (Size) { case 32: return RTLIB::POW_F32; case 64:
return RTLIB::POW_F64; case 128: return RTLIB::POW_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 428); } } while (0)
;
429 case TargetOpcode::G_FMA:
430 RTLIBCASE(FMA_F)do { switch (Size) { case 32: return RTLIB::FMA_F32; case 64:
return RTLIB::FMA_F64; case 128: return RTLIB::FMA_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 430); } } while (0)
;
431 case TargetOpcode::G_FSIN:
432 RTLIBCASE(SIN_F)do { switch (Size) { case 32: return RTLIB::SIN_F32; case 64:
return RTLIB::SIN_F64; case 128: return RTLIB::SIN_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 432); } } while (0)
;
433 case TargetOpcode::G_FCOS:
434 RTLIBCASE(COS_F)do { switch (Size) { case 32: return RTLIB::COS_F32; case 64:
return RTLIB::COS_F64; case 128: return RTLIB::COS_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 434); } } while (0)
;
435 case TargetOpcode::G_FLOG10:
436 RTLIBCASE(LOG10_F)do { switch (Size) { case 32: return RTLIB::LOG10_F32; case 64
: return RTLIB::LOG10_F64; case 128: return RTLIB::LOG10_F128
; default: ::llvm::llvm_unreachable_internal("unexpected size"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 436); } } while (0)
;
437 case TargetOpcode::G_FLOG:
438 RTLIBCASE(LOG_F)do { switch (Size) { case 32: return RTLIB::LOG_F32; case 64:
return RTLIB::LOG_F64; case 128: return RTLIB::LOG_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 438); } } while (0)
;
439 case TargetOpcode::G_FLOG2:
440 RTLIBCASE(LOG2_F)do { switch (Size) { case 32: return RTLIB::LOG2_F32; case 64
: return RTLIB::LOG2_F64; case 128: return RTLIB::LOG2_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 440); } } while (0)
;
441 case TargetOpcode::G_FCEIL:
442 RTLIBCASE(CEIL_F)do { switch (Size) { case 32: return RTLIB::CEIL_F32; case 64
: return RTLIB::CEIL_F64; case 128: return RTLIB::CEIL_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 442); } } while (0)
;
443 case TargetOpcode::G_FFLOOR:
444 RTLIBCASE(FLOOR_F)do { switch (Size) { case 32: return RTLIB::FLOOR_F32; case 64
: return RTLIB::FLOOR_F64; case 128: return RTLIB::FLOOR_F128
; default: ::llvm::llvm_unreachable_internal("unexpected size"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 444); } } while (0)
;
445 case TargetOpcode::G_FMINNUM:
446 RTLIBCASE(FMIN_F)do { switch (Size) { case 32: return RTLIB::FMIN_F32; case 64
: return RTLIB::FMIN_F64; case 128: return RTLIB::FMIN_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 446); } } while (0)
;
447 case TargetOpcode::G_FMAXNUM:
448 RTLIBCASE(FMAX_F)do { switch (Size) { case 32: return RTLIB::FMAX_F32; case 64
: return RTLIB::FMAX_F64; case 128: return RTLIB::FMAX_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 448); } } while (0)
;
449 case TargetOpcode::G_FSQRT:
450 RTLIBCASE(SQRT_F)do { switch (Size) { case 32: return RTLIB::SQRT_F32; case 64
: return RTLIB::SQRT_F64; case 128: return RTLIB::SQRT_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 450); } } while (0)
;
451 case TargetOpcode::G_FRINT:
452 RTLIBCASE(RINT_F)do { switch (Size) { case 32: return RTLIB::RINT_F32; case 64
: return RTLIB::RINT_F64; case 128: return RTLIB::RINT_F128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 452); } } while (0)
;
453 case TargetOpcode::G_FNEARBYINT:
454 RTLIBCASE(NEARBYINT_F)do { switch (Size) { case 32: return RTLIB::NEARBYINT_F32; case
64: return RTLIB::NEARBYINT_F64; case 128: return RTLIB::NEARBYINT_F128
; default: ::llvm::llvm_unreachable_internal("unexpected size"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 454); } } while (0)
;
455 }
456 llvm_unreachable("Unknown libcall function")::llvm::llvm_unreachable_internal("Unknown libcall function",
"/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 456)
;
457}
458
459/// True if an instruction is in tail position in its caller. Intended for
460/// legalizing libcalls as tail calls when possible.
461static bool isLibCallInTailPosition(MachineInstr &MI) {
462 const Function &F = MI.getParent()->getParent()->getFunction();
463
464 // Conservatively require the attributes of the call to match those of
465 // the return. Ignore NoAlias and NonNull because they don't affect the
466 // call sequence.
467 AttributeList CallerAttrs = F.getAttributes();
468 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
469 .removeAttribute(Attribute::NoAlias)
470 .removeAttribute(Attribute::NonNull)
471 .hasAttributes())
472 return false;
473
474 // It's not safe to eliminate the sign / zero extension of the return value.
475 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
476 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
477 return false;
478
479 // Only tail call if the following instruction is a standard return.
480 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
481 MachineInstr *Next = MI.getNextNode();
482 if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
483 return false;
484
485 return true;
486}
487
488LegalizerHelper::LegalizeResult
489llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
490 const CallLowering::ArgInfo &Result,
491 ArrayRef<CallLowering::ArgInfo> Args) {
492 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
493 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
494 const char *Name = TLI.getLibcallName(Libcall);
495
496 CallLowering::CallLoweringInfo Info;
497 Info.CallConv = TLI.getLibcallCallingConv(Libcall);
498 Info.Callee = MachineOperand::CreateES(Name);
499 Info.OrigRet = Result;
500 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
501 if (!CLI.lowerCall(MIRBuilder, Info))
502 return LegalizerHelper::UnableToLegalize;
503
504 return LegalizerHelper::Legalized;
505}
506
507// Useful for libcalls where all operands have the same type.
508static LegalizerHelper::LegalizeResult
509simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
510 Type *OpType) {
511 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
512
513 SmallVector<CallLowering::ArgInfo, 3> Args;
514 for (unsigned i = 1; i < MI.getNumOperands(); i++)
515 Args.push_back({MI.getOperand(i).getReg(), OpType});
516 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
517 Args);
518}
519
520LegalizerHelper::LegalizeResult
521llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
522 MachineInstr &MI) {
523 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)((MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
? static_cast<void> (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 523, __PRETTY_FUNCTION__))
;
524 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
525
526 SmallVector<CallLowering::ArgInfo, 3> Args;
527 // Add all the args, except for the last which is an imm denoting 'tail'.
528 for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
529 Register Reg = MI.getOperand(i).getReg();
530
531 // Need derive an IR type for call lowering.
532 LLT OpLLT = MRI.getType(Reg);
533 Type *OpTy = nullptr;
534 if (OpLLT.isPointer())
535 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
536 else
537 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
538 Args.push_back({Reg, OpTy});
539 }
540
541 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
542 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
543 Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
544 RTLIB::Libcall RTLibcall;
545 switch (ID) {
546 case Intrinsic::memcpy:
547 RTLibcall = RTLIB::MEMCPY;
548 break;
549 case Intrinsic::memset:
550 RTLibcall = RTLIB::MEMSET;
551 break;
552 case Intrinsic::memmove:
553 RTLibcall = RTLIB::MEMMOVE;
554 break;
555 default:
556 return LegalizerHelper::UnableToLegalize;
557 }
558 const char *Name = TLI.getLibcallName(RTLibcall);
559
560 MIRBuilder.setInstr(MI);
561
562 CallLowering::CallLoweringInfo Info;
563 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
564 Info.Callee = MachineOperand::CreateES(Name);
565 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
566 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
567 isLibCallInTailPosition(MI);
568
569 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
570 if (!CLI.lowerCall(MIRBuilder, Info))
571 return LegalizerHelper::UnableToLegalize;
572
573 if (Info.LoweredTailCall) {
574 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?")((Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"
) ? static_cast<void> (0) : __assert_fail ("Info.IsTailCall && \"Lowered tail call when it wasn't a tail call?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 574, __PRETTY_FUNCTION__))
;
575 // We must have a return following the call to get past
576 // isLibCallInTailPosition.
577 assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&((MI.getNextNode() && MI.getNextNode()->isReturn()
&& "Expected instr following MI to be a return?") ? static_cast
<void> (0) : __assert_fail ("MI.getNextNode() && MI.getNextNode()->isReturn() && \"Expected instr following MI to be a return?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 578, __PRETTY_FUNCTION__))
578 "Expected instr following MI to be a return?")((MI.getNextNode() && MI.getNextNode()->isReturn()
&& "Expected instr following MI to be a return?") ? static_cast
<void> (0) : __assert_fail ("MI.getNextNode() && MI.getNextNode()->isReturn() && \"Expected instr following MI to be a return?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 578, __PRETTY_FUNCTION__))
;
579
580 // We lowered a tail call, so the call is now the return from the block.
581 // Delete the old return.
582 MI.getNextNode()->eraseFromParent();
583 }
584
585 return LegalizerHelper::Legalized;
586}
587
588static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
589 Type *FromType) {
590 auto ToMVT = MVT::getVT(ToType);
591 auto FromMVT = MVT::getVT(FromType);
592
593 switch (Opcode) {
594 case TargetOpcode::G_FPEXT:
595 return RTLIB::getFPEXT(FromMVT, ToMVT);
596 case TargetOpcode::G_FPTRUNC:
597 return RTLIB::getFPROUND(FromMVT, ToMVT);
598 case TargetOpcode::G_FPTOSI:
599 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
600 case TargetOpcode::G_FPTOUI:
601 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
602 case TargetOpcode::G_SITOFP:
603 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
604 case TargetOpcode::G_UITOFP:
605 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
606 }
607 llvm_unreachable("Unsupported libcall function")::llvm::llvm_unreachable_internal("Unsupported libcall function"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 607)
;
608}
609
610static LegalizerHelper::LegalizeResult
611conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
612 Type *FromType) {
613 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
614 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
615 {{MI.getOperand(1).getReg(), FromType}});
616}
617
618LegalizerHelper::LegalizeResult
619LegalizerHelper::libcall(MachineInstr &MI) {
620 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
621 unsigned Size = LLTy.getSizeInBits();
622 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
623
624 MIRBuilder.setInstr(MI);
625
626 switch (MI.getOpcode()) {
627 default:
628 return UnableToLegalize;
629 case TargetOpcode::G_SDIV:
630 case TargetOpcode::G_UDIV:
631 case TargetOpcode::G_SREM:
632 case TargetOpcode::G_UREM:
633 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
634 Type *HLTy = IntegerType::get(Ctx, Size);
635 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
636 if (Status != Legalized)
637 return Status;
638 break;
639 }
640 case TargetOpcode::G_FADD:
641 case TargetOpcode::G_FSUB:
642 case TargetOpcode::G_FMUL:
643 case TargetOpcode::G_FDIV:
644 case TargetOpcode::G_FMA:
645 case TargetOpcode::G_FPOW:
646 case TargetOpcode::G_FREM:
647 case TargetOpcode::G_FCOS:
648 case TargetOpcode::G_FSIN:
649 case TargetOpcode::G_FLOG10:
650 case TargetOpcode::G_FLOG:
651 case TargetOpcode::G_FLOG2:
652 case TargetOpcode::G_FEXP:
653 case TargetOpcode::G_FEXP2:
654 case TargetOpcode::G_FCEIL:
655 case TargetOpcode::G_FFLOOR:
656 case TargetOpcode::G_FMINNUM:
657 case TargetOpcode::G_FMAXNUM:
658 case TargetOpcode::G_FSQRT:
659 case TargetOpcode::G_FRINT:
660 case TargetOpcode::G_FNEARBYINT: {
661 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
662 if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) {
663 LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "No libcall available for size "
<< Size << ".\n"; } } while (false)
;
664 return UnableToLegalize;
665 }
666 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
667 if (Status != Legalized)
668 return Status;
669 break;
670 }
671 case TargetOpcode::G_FPEXT:
672 case TargetOpcode::G_FPTRUNC: {
673 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
674 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
675 if (!FromTy || !ToTy)
676 return UnableToLegalize;
677 LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
678 if (Status != Legalized)
679 return Status;
680 break;
681 }
682 case TargetOpcode::G_FPTOSI:
683 case TargetOpcode::G_FPTOUI: {
684 // FIXME: Support other types
685 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
686 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
687 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
688 return UnableToLegalize;
689 LegalizeResult Status = conversionLibcall(
690 MI, MIRBuilder,
691 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
692 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
693 if (Status != Legalized)
694 return Status;
695 break;
696 }
697 case TargetOpcode::G_SITOFP:
698 case TargetOpcode::G_UITOFP: {
699 // FIXME: Support other types
700 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
701 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
702 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
703 return UnableToLegalize;
704 LegalizeResult Status = conversionLibcall(
705 MI, MIRBuilder,
706 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
707 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
708 if (Status != Legalized)
709 return Status;
710 break;
711 }
712 }
713
714 MI.eraseFromParent();
715 return Legalized;
716}
717
718LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
719 unsigned TypeIdx,
720 LLT NarrowTy) {
721 MIRBuilder.setInstr(MI);
722
723 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
724 uint64_t NarrowSize = NarrowTy.getSizeInBits();
725
726 switch (MI.getOpcode()) {
727 default:
728 return UnableToLegalize;
729 case TargetOpcode::G_IMPLICIT_DEF: {
730 // FIXME: add support for when SizeOp0 isn't an exact multiple of
731 // NarrowSize.
732 if (SizeOp0 % NarrowSize != 0)
733 return UnableToLegalize;
734 int NumParts = SizeOp0 / NarrowSize;
735
736 SmallVector<Register, 2> DstRegs;
737 for (int i = 0; i < NumParts; ++i)
738 DstRegs.push_back(
739 MIRBuilder.buildUndef(NarrowTy).getReg(0));
740
741 Register DstReg = MI.getOperand(0).getReg();
742 if(MRI.getType(DstReg).isVector())
743 MIRBuilder.buildBuildVector(DstReg, DstRegs);
744 else
745 MIRBuilder.buildMerge(DstReg, DstRegs);
746 MI.eraseFromParent();
747 return Legalized;
748 }
749 case TargetOpcode::G_CONSTANT: {
750 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
751 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
752 unsigned TotalSize = Ty.getSizeInBits();
753 unsigned NarrowSize = NarrowTy.getSizeInBits();
754 int NumParts = TotalSize / NarrowSize;
755
756 SmallVector<Register, 4> PartRegs;
757 for (int I = 0; I != NumParts; ++I) {
758 unsigned Offset = I * NarrowSize;
759 auto K = MIRBuilder.buildConstant(NarrowTy,
760 Val.lshr(Offset).trunc(NarrowSize));
761 PartRegs.push_back(K.getReg(0));
762 }
763
764 LLT LeftoverTy;
765 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
766 SmallVector<Register, 1> LeftoverRegs;
767 if (LeftoverBits != 0) {
768 LeftoverTy = LLT::scalar(LeftoverBits);
769 auto K = MIRBuilder.buildConstant(
770 LeftoverTy,
771 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
772 LeftoverRegs.push_back(K.getReg(0));
773 }
774
775 insertParts(MI.getOperand(0).getReg(),
776 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
777
778 MI.eraseFromParent();
779 return Legalized;
780 }
781 case TargetOpcode::G_SEXT:
782 case TargetOpcode::G_ZEXT:
783 case TargetOpcode::G_ANYEXT:
784 return narrowScalarExt(MI, TypeIdx, NarrowTy);
785 case TargetOpcode::G_TRUNC: {
786 if (TypeIdx != 1)
787 return UnableToLegalize;
788
789 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
790 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
791 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Can't narrow trunc to type "
<< NarrowTy << "\n"; } } while (false)
;
792 return UnableToLegalize;
793 }
794
795 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
796 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
797 MI.eraseFromParent();
798 return Legalized;
799 }
800
801 case TargetOpcode::G_ADD: {
802 // FIXME: add support for when SizeOp0 isn't an exact multiple of
803 // NarrowSize.
804 if (SizeOp0 % NarrowSize != 0)
805 return UnableToLegalize;
806 // Expand in terms of carry-setting/consuming G_ADDE instructions.
807 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
808
809 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
810 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
811 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
812
813 Register CarryIn;
814 for (int i = 0; i < NumParts; ++i) {
815 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
816 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
817
818 if (i == 0)
819 MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
820 else {
821 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
822 Src2Regs[i], CarryIn);
823 }
824
825 DstRegs.push_back(DstReg);
826 CarryIn = CarryOut;
827 }
828 Register DstReg = MI.getOperand(0).getReg();
829 if(MRI.getType(DstReg).isVector())
830 MIRBuilder.buildBuildVector(DstReg, DstRegs);
831 else
832 MIRBuilder.buildMerge(DstReg, DstRegs);
833 MI.eraseFromParent();
834 return Legalized;
835 }
836 case TargetOpcode::G_SUB: {
837 // FIXME: add support for when SizeOp0 isn't an exact multiple of
838 // NarrowSize.
839 if (SizeOp0 % NarrowSize != 0)
840 return UnableToLegalize;
841
842 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
843
844 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
845 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
846 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
847
848 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
849 Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
850 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
851 {Src1Regs[0], Src2Regs[0]});
852 DstRegs.push_back(DstReg);
853 Register BorrowIn = BorrowOut;
854 for (int i = 1; i < NumParts; ++i) {
855 DstReg = MRI.createGenericVirtualRegister(NarrowTy);
856 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
857
858 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
859 {Src1Regs[i], Src2Regs[i], BorrowIn});
860
861 DstRegs.push_back(DstReg);
862 BorrowIn = BorrowOut;
863 }
864 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
865 MI.eraseFromParent();
866 return Legalized;
867 }
868 case TargetOpcode::G_MUL:
869 case TargetOpcode::G_UMULH:
870 return narrowScalarMul(MI, NarrowTy);
871 case TargetOpcode::G_EXTRACT:
872 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
873 case TargetOpcode::G_INSERT:
874 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
875 case TargetOpcode::G_LOAD: {
876 const auto &MMO = **MI.memoperands_begin();
877 Register DstReg = MI.getOperand(0).getReg();
878 LLT DstTy = MRI.getType(DstReg);
879 if (DstTy.isVector())
880 return UnableToLegalize;
881
882 if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
883 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
884 auto &MMO = **MI.memoperands_begin();
885 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
886 MIRBuilder.buildAnyExt(DstReg, TmpReg);
887 MI.eraseFromParent();
888 return Legalized;
889 }
890
891 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
892 }
893 case TargetOpcode::G_ZEXTLOAD:
894 case TargetOpcode::G_SEXTLOAD: {
895 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
896 Register DstReg = MI.getOperand(0).getReg();
897 Register PtrReg = MI.getOperand(1).getReg();
898
899 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
900 auto &MMO = **MI.memoperands_begin();
901 if (MMO.getSizeInBits() == NarrowSize) {
902 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
903 } else {
904 MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
905 }
906
907 if (ZExt)
908 MIRBuilder.buildZExt(DstReg, TmpReg);
909 else
910 MIRBuilder.buildSExt(DstReg, TmpReg);
911
912 MI.eraseFromParent();
913 return Legalized;
914 }
915 case TargetOpcode::G_STORE: {
916 const auto &MMO = **MI.memoperands_begin();
917
918 Register SrcReg = MI.getOperand(0).getReg();
919 LLT SrcTy = MRI.getType(SrcReg);
920 if (SrcTy.isVector())
921 return UnableToLegalize;
922
923 int NumParts = SizeOp0 / NarrowSize;
924 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
925 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
926 if (SrcTy.isVector() && LeftoverBits != 0)
927 return UnableToLegalize;
928
929 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
930 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
931 auto &MMO = **MI.memoperands_begin();
932 MIRBuilder.buildTrunc(TmpReg, SrcReg);
933 MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
934 MI.eraseFromParent();
935 return Legalized;
936 }
937
938 return reduceLoadStoreWidth(MI, 0, NarrowTy);
939 }
940 case TargetOpcode::G_SELECT:
941 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
942 case TargetOpcode::G_AND:
943 case TargetOpcode::G_OR:
944 case TargetOpcode::G_XOR: {
945 // Legalize bitwise operation:
946 // A = BinOp<Ty> B, C
947 // into:
948 // B1, ..., BN = G_UNMERGE_VALUES B
949 // C1, ..., CN = G_UNMERGE_VALUES C
950 // A1 = BinOp<Ty/N> B1, C2
951 // ...
952 // AN = BinOp<Ty/N> BN, CN
953 // A = G_MERGE_VALUES A1, ..., AN
954 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
955 }
956 case TargetOpcode::G_SHL:
957 case TargetOpcode::G_LSHR:
958 case TargetOpcode::G_ASHR:
959 return narrowScalarShift(MI, TypeIdx, NarrowTy);
960 case TargetOpcode::G_CTLZ:
961 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
962 case TargetOpcode::G_CTTZ:
963 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
964 case TargetOpcode::G_CTPOP:
965 if (TypeIdx == 1)
966 switch (MI.getOpcode()) {
967 case TargetOpcode::G_CTLZ:
968 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
969 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
970 case TargetOpcode::G_CTTZ:
971 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
972 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
973 case TargetOpcode::G_CTPOP:
974 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
975 default:
976 return UnableToLegalize;
977 }
978
979 Observer.changingInstr(MI);
980 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
981 Observer.changedInstr(MI);
982 return Legalized;
983 case TargetOpcode::G_INTTOPTR:
984 if (TypeIdx != 1)
985 return UnableToLegalize;
986
987 Observer.changingInstr(MI);
988 narrowScalarSrc(MI, NarrowTy, 1);
989 Observer.changedInstr(MI);
990 return Legalized;
991 case TargetOpcode::G_PTRTOINT:
992 if (TypeIdx != 0)
993 return UnableToLegalize;
994
995 Observer.changingInstr(MI);
996 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
997 Observer.changedInstr(MI);
998 return Legalized;
999 case TargetOpcode::G_PHI: {
1000 unsigned NumParts = SizeOp0 / NarrowSize;
1001 SmallVector<Register, 2> DstRegs(NumParts);
1002 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1003 Observer.changingInstr(MI);
1004 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1005 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1006 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1007 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1008 SrcRegs[i / 2]);
1009 }
1010 MachineBasicBlock &MBB = *MI.getParent();
1011 MIRBuilder.setInsertPt(MBB, MI);
1012 for (unsigned i = 0; i < NumParts; ++i) {
1013 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1014 MachineInstrBuilder MIB =
1015 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1016 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1017 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1018 }
1019 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1020 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1021 Observer.changedInstr(MI);
1022 MI.eraseFromParent();
1023 return Legalized;
1024 }
1025 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1026 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1027 if (TypeIdx != 2)
1028 return UnableToLegalize;
1029
1030 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1031 Observer.changingInstr(MI);
1032 narrowScalarSrc(MI, NarrowTy, OpIdx);
1033 Observer.changedInstr(MI);
1034 return Legalized;
1035 }
1036 case TargetOpcode::G_ICMP: {
1037 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1038 if (NarrowSize * 2 != SrcSize)
1039 return UnableToLegalize;
1040
1041 Observer.changingInstr(MI);
1042 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1043 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1044 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1045
1046 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1047 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1048 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1049
1050 CmpInst::Predicate Pred =
1051 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1052 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1053
1054 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1055 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1056 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1057 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1058 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1059 MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1060 } else {
1061 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1062 MachineInstrBuilder CmpHEQ =
1063 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1064 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1065 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1066 MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1067 }
1068 Observer.changedInstr(MI);
1069 MI.eraseFromParent();
1070 return Legalized;
1071 }
1072 case TargetOpcode::G_SEXT_INREG: {
1073 if (TypeIdx != 0)
1074 return UnableToLegalize;
1075
1076 int64_t SizeInBits = MI.getOperand(2).getImm();
1077
1078 // So long as the new type has more bits than the bits we're extending we
1079 // don't need to break it apart.
1080 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1081 Observer.changingInstr(MI);
1082 // We don't lose any non-extension bits by truncating the src and
1083 // sign-extending the dst.
1084 MachineOperand &MO1 = MI.getOperand(1);
1085 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1086 MO1.setReg(TruncMIB.getReg(0));
1087
1088 MachineOperand &MO2 = MI.getOperand(0);
1089 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1090 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1091 MIRBuilder.buildSExt(MO2, DstExt);
1092 MO2.setReg(DstExt);
1093 Observer.changedInstr(MI);
1094 return Legalized;
1095 }
1096
1097 // Break it apart. Components below the extension point are unmodified. The
1098 // component containing the extension point becomes a narrower SEXT_INREG.
1099 // Components above it are ashr'd from the component containing the
1100 // extension point.
1101 if (SizeOp0 % NarrowSize != 0)
1102 return UnableToLegalize;
1103 int NumParts = SizeOp0 / NarrowSize;
1104
1105 // List the registers where the destination will be scattered.
1106 SmallVector<Register, 2> DstRegs;
1107 // List the registers where the source will be split.
1108 SmallVector<Register, 2> SrcRegs;
1109
1110 // Create all the temporary registers.
1111 for (int i = 0; i < NumParts; ++i) {
1112 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1113
1114 SrcRegs.push_back(SrcReg);
1115 }
1116
1117 // Explode the big arguments into smaller chunks.
1118 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1119
1120 Register AshrCstReg =
1121 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1122 .getReg(0);
1123 Register FullExtensionReg = 0;
1124 Register PartialExtensionReg = 0;
1125
1126 // Do the operation on each small part.
1127 for (int i = 0; i < NumParts; ++i) {
1128 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1129 DstRegs.push_back(SrcRegs[i]);
1130 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1131 assert(PartialExtensionReg &&((PartialExtensionReg && "Expected to visit partial extension before full"
) ? static_cast<void> (0) : __assert_fail ("PartialExtensionReg && \"Expected to visit partial extension before full\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1132, __PRETTY_FUNCTION__))
1132 "Expected to visit partial extension before full")((PartialExtensionReg && "Expected to visit partial extension before full"
) ? static_cast<void> (0) : __assert_fail ("PartialExtensionReg && \"Expected to visit partial extension before full\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1132, __PRETTY_FUNCTION__))
;
1133 if (FullExtensionReg) {
1134 DstRegs.push_back(FullExtensionReg);
1135 continue;
1136 }
1137 DstRegs.push_back(
1138 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1139 .getReg(0));
1140 FullExtensionReg = DstRegs.back();
1141 } else {
1142 DstRegs.push_back(
1143 MIRBuilder
1144 .buildInstr(
1145 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1146 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1147 .getReg(0));
1148 PartialExtensionReg = DstRegs.back();
1149 }
1150 }
1151
1152 // Gather the destination registers into the final destination.
1153 Register DstReg = MI.getOperand(0).getReg();
1154 MIRBuilder.buildMerge(DstReg, DstRegs);
1155 MI.eraseFromParent();
1156 return Legalized;
1157 }
1158 case TargetOpcode::G_BSWAP:
1159 case TargetOpcode::G_BITREVERSE: {
1160 if (SizeOp0 % NarrowSize != 0)
1161 return UnableToLegalize;
1162
1163 Observer.changingInstr(MI);
1164 SmallVector<Register, 2> SrcRegs, DstRegs;
1165 unsigned NumParts = SizeOp0 / NarrowSize;
1166 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1167
1168 for (unsigned i = 0; i < NumParts; ++i) {
1169 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1170 {SrcRegs[NumParts - 1 - i]});
1171 DstRegs.push_back(DstPart.getReg(0));
1172 }
1173
1174 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1175
1176 Observer.changedInstr(MI);
1177 MI.eraseFromParent();
1178 return Legalized;
1179 }
1180 }
1181}
1182
1183void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1184 unsigned OpIdx, unsigned ExtOpcode) {
1185 MachineOperand &MO = MI.getOperand(OpIdx);
1186 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1187 MO.setReg(ExtB.getReg(0));
1188}
1189
1190void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1191 unsigned OpIdx) {
1192 MachineOperand &MO = MI.getOperand(OpIdx);
1193 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1194 MO.setReg(ExtB.getReg(0));
1195}
1196
1197void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1198 unsigned OpIdx, unsigned TruncOpcode) {
1199 MachineOperand &MO = MI.getOperand(OpIdx);
1200 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1201 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1202 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1203 MO.setReg(DstExt);
1204}
1205
1206void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1207 unsigned OpIdx, unsigned ExtOpcode) {
1208 MachineOperand &MO = MI.getOperand(OpIdx);
1209 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1210 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1211 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1212 MO.setReg(DstTrunc);
1213}
1214
1215void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1216 unsigned OpIdx) {
1217 MachineOperand &MO = MI.getOperand(OpIdx);
1218 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1219 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1220 MIRBuilder.buildExtract(MO, DstExt, 0);
1221 MO.setReg(DstExt);
1222}
1223
1224void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1225 unsigned OpIdx) {
1226 MachineOperand &MO = MI.getOperand(OpIdx);
1227
1228 LLT OldTy = MRI.getType(MO.getReg());
1229 unsigned OldElts = OldTy.getNumElements();
1230 unsigned NewElts = MoreTy.getNumElements();
1231
1232 unsigned NumParts = NewElts / OldElts;
1233
1234 // Use concat_vectors if the result is a multiple of the number of elements.
1235 if (NumParts * OldElts == NewElts) {
1236 SmallVector<Register, 8> Parts;
1237 Parts.push_back(MO.getReg());
1238
1239 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1240 for (unsigned I = 1; I != NumParts; ++I)
1241 Parts.push_back(ImpDef);
1242
1243 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1244 MO.setReg(Concat.getReg(0));
1245 return;
1246 }
1247
1248 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1249 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1250 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1251 MO.setReg(MoreReg);
1252}
1253
1254LegalizerHelper::LegalizeResult
1255LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1256 LLT WideTy) {
1257 if (TypeIdx != 1)
1258 return UnableToLegalize;
1259
1260 Register DstReg = MI.getOperand(0).getReg();
1261 LLT DstTy = MRI.getType(DstReg);
1262 if (DstTy.isVector())
1263 return UnableToLegalize;
1264
1265 Register Src1 = MI.getOperand(1).getReg();
1266 LLT SrcTy = MRI.getType(Src1);
1267 const int DstSize = DstTy.getSizeInBits();
1268 const int SrcSize = SrcTy.getSizeInBits();
1269 const int WideSize = WideTy.getSizeInBits();
1270 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1271
1272 unsigned NumOps = MI.getNumOperands();
1273 unsigned NumSrc = MI.getNumOperands() - 1;
1274 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1275
1276 if (WideSize >= DstSize) {
1277 // Directly pack the bits in the target type.
1278 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1279
1280 for (unsigned I = 2; I != NumOps; ++I) {
1281 const unsigned Offset = (I - 1) * PartSize;
1282
1283 Register SrcReg = MI.getOperand(I).getReg();
1284 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize))((MRI.getType(SrcReg) == LLT::scalar(PartSize)) ? static_cast
<void> (0) : __assert_fail ("MRI.getType(SrcReg) == LLT::scalar(PartSize)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1284, __PRETTY_FUNCTION__))
;
1285
1286 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1287
1288 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1289 MRI.createGenericVirtualRegister(WideTy);
1290
1291 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1292 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1293 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1294 ResultReg = NextResult;
1295 }
1296
1297 if (WideSize > DstSize)
1298 MIRBuilder.buildTrunc(DstReg, ResultReg);
1299 else if (DstTy.isPointer())
1300 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1301
1302 MI.eraseFromParent();
1303 return Legalized;
1304 }
1305
1306 // Unmerge the original values to the GCD type, and recombine to the next
1307 // multiple greater than the original type.
1308 //
1309 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1310 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1311 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1312 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1313 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1314 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1315 // %12:_(s12) = G_MERGE_VALUES %10, %11
1316 //
1317 // Padding with undef if necessary:
1318 //
1319 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1320 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1321 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1322 // %7:_(s2) = G_IMPLICIT_DEF
1323 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1324 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1325 // %10:_(s12) = G_MERGE_VALUES %8, %9
1326
1327 const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1328 LLT GCDTy = LLT::scalar(GCD);
1329
1330 SmallVector<Register, 8> Parts;
1331 SmallVector<Register, 8> NewMergeRegs;
1332 SmallVector<Register, 8> Unmerges;
1333 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1334
1335 // Decompose the original operands if they don't evenly divide.
1336 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1337 Register SrcReg = MI.getOperand(I).getReg();
1338 if (GCD == SrcSize) {
1339 Unmerges.push_back(SrcReg);
1340 } else {
1341 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1342 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1343 Unmerges.push_back(Unmerge.getReg(J));
1344 }
1345 }
1346
1347 // Pad with undef to the next size that is a multiple of the requested size.
1348 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1349 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1350 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1351 Unmerges.push_back(UndefReg);
1352 }
1353
1354 const int PartsPerGCD = WideSize / GCD;
1355
1356 // Build merges of each piece.
1357 ArrayRef<Register> Slicer(Unmerges);
1358 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1359 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1360 NewMergeRegs.push_back(Merge.getReg(0));
1361 }
1362
1363 // A truncate may be necessary if the requested type doesn't evenly divide the
1364 // original result type.
1365 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1366 MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1367 } else {
1368 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1369 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1370 }
1371
1372 MI.eraseFromParent();
1373 return Legalized;
1374}
1375
1376LegalizerHelper::LegalizeResult
1377LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1378 LLT WideTy) {
1379 if (TypeIdx != 0)
1380 return UnableToLegalize;
1381
1382 int NumDst = MI.getNumOperands() - 1;
1383 Register SrcReg = MI.getOperand(NumDst).getReg();
1384 LLT SrcTy = MRI.getType(SrcReg);
1385 if (SrcTy.isVector())
1386 return UnableToLegalize;
1387
1388 Register Dst0Reg = MI.getOperand(0).getReg();
1389 LLT DstTy = MRI.getType(Dst0Reg);
1390 if (!DstTy.isScalar())
1391 return UnableToLegalize;
1392
1393 if (WideTy.getSizeInBits() == SrcTy.getSizeInBits()) {
1394 if (SrcTy.isPointer()) {
1395 const DataLayout &DL = MIRBuilder.getDataLayout();
1396 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1397 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Not casting non-integral address space integer\n"
; } } while (false)
;
1398 return UnableToLegalize;
1399 }
1400
1401 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1402 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1403 }
1404
1405 // Theres no unmerge type to target. Directly extract the bits from the
1406 // source type
1407 unsigned DstSize = DstTy.getSizeInBits();
1408
1409 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1410 for (int I = 1; I != NumDst; ++I) {
1411 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1412 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1413 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1414 }
1415
1416 MI.eraseFromParent();
1417 return Legalized;
1418 }
1419
1420 // TODO
1421 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1422 return UnableToLegalize;
1423
1424 // Extend the source to a wider type.
1425 LLT LCMTy = getLCMType(SrcTy, WideTy);
1426
1427 Register WideSrc = SrcReg;
1428 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1429 // TODO: If this is an integral address space, cast to integer and anyext.
1430 if (SrcTy.isPointer()) {
1431 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Widening pointer source types not implemented\n"
; } } while (false)
;
1432 return UnableToLegalize;
1433 }
1434
1435 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1436 }
1437
1438 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1439
1440 // Create a sequence of unmerges to the original results. since we may have
1441 // widened the source, we will need to pad the results with dead defs to cover
1442 // the source register.
1443 // e.g. widen s16 to s32:
1444 // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48)
1445 //
1446 // =>
1447 // %4:_(s64) = G_ANYEXT %0:_(s48)
1448 // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge
1449 // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs
1450 // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def
1451
1452 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1453 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1454
1455 for (int I = 0; I != NumUnmerge; ++I) {
1456 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1457
1458 for (int J = 0; J != PartsPerUnmerge; ++J) {
1459 int Idx = I * PartsPerUnmerge + J;
1460 if (Idx < NumDst)
1461 MIB.addDef(MI.getOperand(Idx).getReg());
1462 else {
1463 // Create dead def for excess components.
1464 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1465 }
1466 }
1467
1468 MIB.addUse(Unmerge.getReg(I));
1469 }
1470
1471 MI.eraseFromParent();
1472 return Legalized;
1473}
1474
1475LegalizerHelper::LegalizeResult
1476LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1477 LLT WideTy) {
1478 Register DstReg = MI.getOperand(0).getReg();
1479 Register SrcReg = MI.getOperand(1).getReg();
1480 LLT SrcTy = MRI.getType(SrcReg);
1481
1482 LLT DstTy = MRI.getType(DstReg);
1483 unsigned Offset = MI.getOperand(2).getImm();
1484
1485 if (TypeIdx == 0) {
1486 if (SrcTy.isVector() || DstTy.isVector())
1487 return UnableToLegalize;
1488
1489 SrcOp Src(SrcReg);
1490 if (SrcTy.isPointer()) {
1491 // Extracts from pointers can be handled only if they are really just
1492 // simple integers.
1493 const DataLayout &DL = MIRBuilder.getDataLayout();
1494 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1495 return UnableToLegalize;
1496
1497 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1498 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1499 SrcTy = SrcAsIntTy;
1500 }
1501
1502 if (DstTy.isPointer())
1503 return UnableToLegalize;
1504
1505 if (Offset == 0) {
1506 // Avoid a shift in the degenerate case.
1507 MIRBuilder.buildTrunc(DstReg,
1508 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1509 MI.eraseFromParent();
1510 return Legalized;
1511 }
1512
1513 // Do a shift in the source type.
1514 LLT ShiftTy = SrcTy;
1515 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1516 Src = MIRBuilder.buildAnyExt(WideTy, Src);
1517 ShiftTy = WideTy;
1518 } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1519 return UnableToLegalize;
1520
1521 auto LShr = MIRBuilder.buildLShr(
1522 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1523 MIRBuilder.buildTrunc(DstReg, LShr);
1524 MI.eraseFromParent();
1525 return Legalized;
1526 }
1527
1528 if (SrcTy.isScalar()) {
1529 Observer.changingInstr(MI);
1530 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1531 Observer.changedInstr(MI);
1532 return Legalized;
1533 }
1534
1535 if (!SrcTy.isVector())
1536 return UnableToLegalize;
1537
1538 if (DstTy != SrcTy.getElementType())
1539 return UnableToLegalize;
1540
1541 if (Offset % SrcTy.getScalarSizeInBits() != 0)
1542 return UnableToLegalize;
1543
1544 Observer.changingInstr(MI);
1545 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1546
1547 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1548 Offset);
1549 widenScalarDst(MI, WideTy.getScalarType(), 0);
1550 Observer.changedInstr(MI);
1551 return Legalized;
1552}
1553
1554LegalizerHelper::LegalizeResult
1555LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1556 LLT WideTy) {
1557 if (TypeIdx != 0)
1558 return UnableToLegalize;
1559 Observer.changingInstr(MI);
1560 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1561 widenScalarDst(MI, WideTy);
1562 Observer.changedInstr(MI);
1563 return Legalized;
1564}
1565
1566LegalizerHelper::LegalizeResult
1567LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1568 MIRBuilder.setInstr(MI);
1569
1570 switch (MI.getOpcode()) {
1571 default:
1572 return UnableToLegalize;
1573 case TargetOpcode::G_EXTRACT:
1574 return widenScalarExtract(MI, TypeIdx, WideTy);
1575 case TargetOpcode::G_INSERT:
1576 return widenScalarInsert(MI, TypeIdx, WideTy);
1577 case TargetOpcode::G_MERGE_VALUES:
1578 return widenScalarMergeValues(MI, TypeIdx, WideTy);
1579 case TargetOpcode::G_UNMERGE_VALUES:
1580 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1581 case TargetOpcode::G_UADDO:
1582 case TargetOpcode::G_USUBO: {
1583 if (TypeIdx == 1)
1584 return UnableToLegalize; // TODO
1585 auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
1586 auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
1587 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1588 ? TargetOpcode::G_ADD
1589 : TargetOpcode::G_SUB;
1590 // Do the arithmetic in the larger type.
1591 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1592 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1593 APInt Mask =
1594 APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
1595 auto AndOp = MIRBuilder.buildAnd(
1596 WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
1597 // There is no overflow if the AndOp is the same as NewOp.
1598 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
1599 // Now trunc the NewOp to the original result.
1600 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1601 MI.eraseFromParent();
1602 return Legalized;
1603 }
1604 case TargetOpcode::G_CTTZ:
1605 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1606 case TargetOpcode::G_CTLZ:
1607 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1608 case TargetOpcode::G_CTPOP: {
1609 if (TypeIdx == 0) {
1610 Observer.changingInstr(MI);
1611 widenScalarDst(MI, WideTy, 0);
1612 Observer.changedInstr(MI);
1613 return Legalized;
1614 }
1615
1616 Register SrcReg = MI.getOperand(1).getReg();
1617
1618 // First ZEXT the input.
1619 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1620 LLT CurTy = MRI.getType(SrcReg);
1621 if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1622 // The count is the same in the larger type except if the original
1623 // value was zero. This can be handled by setting the bit just off
1624 // the top of the original type.
1625 auto TopBit =
1626 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1627 MIBSrc = MIRBuilder.buildOr(
1628 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1629 }
1630
1631 // Perform the operation at the larger size.
1632 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1633 // This is already the correct result for CTPOP and CTTZs
1634 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1635 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1636 // The correct result is NewOp - (Difference in widety and current ty).
1637 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1638 MIBNewOp = MIRBuilder.buildSub(
1639 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1640 }
1641
1642 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1643 MI.eraseFromParent();
1644 return Legalized;
1645 }
1646 case TargetOpcode::G_BSWAP: {
1647 Observer.changingInstr(MI);
1648 Register DstReg = MI.getOperand(0).getReg();
1649
1650 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1651 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1652 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1653 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1654
1655 MI.getOperand(0).setReg(DstExt);
1656
1657 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1658
1659 LLT Ty = MRI.getType(DstReg);
1660 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1661 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1662 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1663
1664 MIRBuilder.buildTrunc(DstReg, ShrReg);
1665 Observer.changedInstr(MI);
1666 return Legalized;
1667 }
1668 case TargetOpcode::G_BITREVERSE: {
1669 Observer.changingInstr(MI);
1670
1671 Register DstReg = MI.getOperand(0).getReg();
1672 LLT Ty = MRI.getType(DstReg);
1673 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1674
1675 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1676 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1677 MI.getOperand(0).setReg(DstExt);
1678 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1679
1680 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1681 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1682 MIRBuilder.buildTrunc(DstReg, Shift);
1683 Observer.changedInstr(MI);
1684 return Legalized;
1685 }
1686 case TargetOpcode::G_ADD:
1687 case TargetOpcode::G_AND:
1688 case TargetOpcode::G_MUL:
1689 case TargetOpcode::G_OR:
1690 case TargetOpcode::G_XOR:
1691 case TargetOpcode::G_SUB:
1692 // Perform operation at larger width (any extension is fines here, high bits
1693 // don't affect the result) and then truncate the result back to the
1694 // original type.
1695 Observer.changingInstr(MI);
1696 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1697 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1698 widenScalarDst(MI, WideTy);
1699 Observer.changedInstr(MI);
1700 return Legalized;
1701
1702 case TargetOpcode::G_SHL:
1703 Observer.changingInstr(MI);
1704
1705 if (TypeIdx == 0) {
1706 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1707 widenScalarDst(MI, WideTy);
1708 } else {
1709 assert(TypeIdx == 1)((TypeIdx == 1) ? static_cast<void> (0) : __assert_fail
("TypeIdx == 1", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1709, __PRETTY_FUNCTION__))
;
1710 // The "number of bits to shift" operand must preserve its value as an
1711 // unsigned integer:
1712 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1713 }
1714
1715 Observer.changedInstr(MI);
1716 return Legalized;
1717
1718 case TargetOpcode::G_SDIV:
1719 case TargetOpcode::G_SREM:
1720 case TargetOpcode::G_SMIN:
1721 case TargetOpcode::G_SMAX:
1722 Observer.changingInstr(MI);
1723 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1724 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1725 widenScalarDst(MI, WideTy);
1726 Observer.changedInstr(MI);
1727 return Legalized;
1728
1729 case TargetOpcode::G_ASHR:
1730 case TargetOpcode::G_LSHR:
1731 Observer.changingInstr(MI);
1732
1733 if (TypeIdx == 0) {
1734 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1735 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1736
1737 widenScalarSrc(MI, WideTy, 1, CvtOp);
1738 widenScalarDst(MI, WideTy);
1739 } else {
1740 assert(TypeIdx == 1)((TypeIdx == 1) ? static_cast<void> (0) : __assert_fail
("TypeIdx == 1", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1740, __PRETTY_FUNCTION__))
;
1741 // The "number of bits to shift" operand must preserve its value as an
1742 // unsigned integer:
1743 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1744 }
1745
1746 Observer.changedInstr(MI);
1747 return Legalized;
1748 case TargetOpcode::G_UDIV:
1749 case TargetOpcode::G_UREM:
1750 case TargetOpcode::G_UMIN:
1751 case TargetOpcode::G_UMAX:
1752 Observer.changingInstr(MI);
1753 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1754 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1755 widenScalarDst(MI, WideTy);
1756 Observer.changedInstr(MI);
1757 return Legalized;
1758
1759 case TargetOpcode::G_SELECT:
1760 Observer.changingInstr(MI);
1761 if (TypeIdx == 0) {
1762 // Perform operation at larger width (any extension is fine here, high
1763 // bits don't affect the result) and then truncate the result back to the
1764 // original type.
1765 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1766 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1767 widenScalarDst(MI, WideTy);
1768 } else {
1769 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1770 // Explicit extension is required here since high bits affect the result.
1771 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1772 }
1773 Observer.changedInstr(MI);
1774 return Legalized;
1775
1776 case TargetOpcode::G_FPTOSI:
1777 case TargetOpcode::G_FPTOUI:
1778 Observer.changingInstr(MI);
1779
1780 if (TypeIdx == 0)
1781 widenScalarDst(MI, WideTy);
1782 else
1783 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
1784
1785 Observer.changedInstr(MI);
1786 return Legalized;
1787 case TargetOpcode::G_SITOFP:
1788 if (TypeIdx != 1)
1789 return UnableToLegalize;
1790 Observer.changingInstr(MI);
1791 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1792 Observer.changedInstr(MI);
1793 return Legalized;
1794
1795 case TargetOpcode::G_UITOFP:
1796 if (TypeIdx != 1)
1797 return UnableToLegalize;
1798 Observer.changingInstr(MI);
1799 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1800 Observer.changedInstr(MI);
1801 return Legalized;
1802
1803 case TargetOpcode::G_LOAD:
1804 case TargetOpcode::G_SEXTLOAD:
1805 case TargetOpcode::G_ZEXTLOAD:
1806 Observer.changingInstr(MI);
1807 widenScalarDst(MI, WideTy);
1808 Observer.changedInstr(MI);
1809 return Legalized;
1810
1811 case TargetOpcode::G_STORE: {
1812 if (TypeIdx != 0)
1813 return UnableToLegalize;
1814
1815 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1816 if (!isPowerOf2_32(Ty.getSizeInBits()))
1817 return UnableToLegalize;
1818
1819 Observer.changingInstr(MI);
1820
1821 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1822 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1823 widenScalarSrc(MI, WideTy, 0, ExtType);
1824
1825 Observer.changedInstr(MI);
1826 return Legalized;
1827 }
1828 case TargetOpcode::G_CONSTANT: {
1829 MachineOperand &SrcMO = MI.getOperand(1);
1830 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1831 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
1832 MRI.getType(MI.getOperand(0).getReg()));
1833 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||(((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::
G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && "Illegal Extend"
) ? static_cast<void> (0) : __assert_fail ("(ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && \"Illegal Extend\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1835, __PRETTY_FUNCTION__))
1834 ExtOpc == TargetOpcode::G_ANYEXT) &&(((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::
G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && "Illegal Extend"
) ? static_cast<void> (0) : __assert_fail ("(ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && \"Illegal Extend\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1835, __PRETTY_FUNCTION__))
1835 "Illegal Extend")(((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::
G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && "Illegal Extend"
) ? static_cast<void> (0) : __assert_fail ("(ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && \"Illegal Extend\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1835, __PRETTY_FUNCTION__))
;
1836 const APInt &SrcVal = SrcMO.getCImm()->getValue();
1837 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
1838 ? SrcVal.sext(WideTy.getSizeInBits())
1839 : SrcVal.zext(WideTy.getSizeInBits());
1840 Observer.changingInstr(MI);
1841 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1842
1843 widenScalarDst(MI, WideTy);
1844 Observer.changedInstr(MI);
1845 return Legalized;
1846 }
1847 case TargetOpcode::G_FCONSTANT: {
1848 MachineOperand &SrcMO = MI.getOperand(1);
1849 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1850 APFloat Val = SrcMO.getFPImm()->getValueAPF();
1851 bool LosesInfo;
1852 switch (WideTy.getSizeInBits()) {
1853 case 32:
1854 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1855 &LosesInfo);
1856 break;
1857 case 64:
1858 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1859 &LosesInfo);
1860 break;
1861 default:
1862 return UnableToLegalize;
1863 }
1864
1865 assert(!LosesInfo && "extend should always be lossless")((!LosesInfo && "extend should always be lossless") ?
static_cast<void> (0) : __assert_fail ("!LosesInfo && \"extend should always be lossless\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1865, __PRETTY_FUNCTION__))
;
1866
1867 Observer.changingInstr(MI);
1868 SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1869
1870 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1871 Observer.changedInstr(MI);
1872 return Legalized;
1873 }
1874 case TargetOpcode::G_IMPLICIT_DEF: {
1875 Observer.changingInstr(MI);
1876 widenScalarDst(MI, WideTy);
1877 Observer.changedInstr(MI);
1878 return Legalized;
1879 }
1880 case TargetOpcode::G_BRCOND:
1881 Observer.changingInstr(MI);
1882 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1883 Observer.changedInstr(MI);
1884 return Legalized;
1885
1886 case TargetOpcode::G_FCMP:
1887 Observer.changingInstr(MI);
1888 if (TypeIdx == 0)
1889 widenScalarDst(MI, WideTy);
1890 else {
1891 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1892 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1893 }
1894 Observer.changedInstr(MI);
1895 return Legalized;
1896
1897 case TargetOpcode::G_ICMP:
1898 Observer.changingInstr(MI);
1899 if (TypeIdx == 0)
1900 widenScalarDst(MI, WideTy);
1901 else {
1902 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1903 MI.getOperand(1).getPredicate()))
1904 ? TargetOpcode::G_SEXT
1905 : TargetOpcode::G_ZEXT;
1906 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1907 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1908 }
1909 Observer.changedInstr(MI);
1910 return Legalized;
1911
1912 case TargetOpcode::G_PTR_ADD:
1913 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD")((TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD"
) ? static_cast<void> (0) : __assert_fail ("TypeIdx == 1 && \"unable to legalize pointer of G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1913, __PRETTY_FUNCTION__))
;
1914 Observer.changingInstr(MI);
1915 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1916 Observer.changedInstr(MI);
1917 return Legalized;
1918
1919 case TargetOpcode::G_PHI: {
1920 assert(TypeIdx == 0 && "Expecting only Idx 0")((TypeIdx == 0 && "Expecting only Idx 0") ? static_cast
<void> (0) : __assert_fail ("TypeIdx == 0 && \"Expecting only Idx 0\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1920, __PRETTY_FUNCTION__))
;
1921
1922 Observer.changingInstr(MI);
1923 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1924 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1925 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1926 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1927 }
1928
1929 MachineBasicBlock &MBB = *MI.getParent();
1930 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1931 widenScalarDst(MI, WideTy);
1932 Observer.changedInstr(MI);
1933 return Legalized;
1934 }
1935 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1936 if (TypeIdx == 0) {
1937 Register VecReg = MI.getOperand(1).getReg();
1938 LLT VecTy = MRI.getType(VecReg);
1939 Observer.changingInstr(MI);
1940
1941 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1942 WideTy.getSizeInBits()),
1943 1, TargetOpcode::G_SEXT);
1944
1945 widenScalarDst(MI, WideTy, 0);
1946 Observer.changedInstr(MI);
1947 return Legalized;
1948 }
1949
1950 if (TypeIdx != 2)
1951 return UnableToLegalize;
1952 Observer.changingInstr(MI);
1953 // TODO: Probably should be zext
1954 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1955 Observer.changedInstr(MI);
1956 return Legalized;
1957 }
1958 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1959 if (TypeIdx == 1) {
1960 Observer.changingInstr(MI);
1961
1962 Register VecReg = MI.getOperand(1).getReg();
1963 LLT VecTy = MRI.getType(VecReg);
1964 LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
1965
1966 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
1967 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1968 widenScalarDst(MI, WideVecTy, 0);
1969 Observer.changedInstr(MI);
1970 return Legalized;
1971 }
1972
1973 if (TypeIdx == 2) {
1974 Observer.changingInstr(MI);
1975 // TODO: Probably should be zext
1976 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
1977 Observer.changedInstr(MI);
1978 }
1979
1980 return Legalized;
1981 }
1982 case TargetOpcode::G_FADD:
1983 case TargetOpcode::G_FMUL:
1984 case TargetOpcode::G_FSUB:
1985 case TargetOpcode::G_FMA:
1986 case TargetOpcode::G_FMAD:
1987 case TargetOpcode::G_FNEG:
1988 case TargetOpcode::G_FABS:
1989 case TargetOpcode::G_FCANONICALIZE:
1990 case TargetOpcode::G_FMINNUM:
1991 case TargetOpcode::G_FMAXNUM:
1992 case TargetOpcode::G_FMINNUM_IEEE:
1993 case TargetOpcode::G_FMAXNUM_IEEE:
1994 case TargetOpcode::G_FMINIMUM:
1995 case TargetOpcode::G_FMAXIMUM:
1996 case TargetOpcode::G_FDIV:
1997 case TargetOpcode::G_FREM:
1998 case TargetOpcode::G_FCEIL:
1999 case TargetOpcode::G_FFLOOR:
2000 case TargetOpcode::G_FCOS:
2001 case TargetOpcode::G_FSIN:
2002 case TargetOpcode::G_FLOG10:
2003 case TargetOpcode::G_FLOG:
2004 case TargetOpcode::G_FLOG2:
2005 case TargetOpcode::G_FRINT:
2006 case TargetOpcode::G_FNEARBYINT:
2007 case TargetOpcode::G_FSQRT:
2008 case TargetOpcode::G_FEXP:
2009 case TargetOpcode::G_FEXP2:
2010 case TargetOpcode::G_FPOW:
2011 case TargetOpcode::G_INTRINSIC_TRUNC:
2012 case TargetOpcode::G_INTRINSIC_ROUND:
2013 assert(TypeIdx == 0)((TypeIdx == 0) ? static_cast<void> (0) : __assert_fail
("TypeIdx == 0", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2013, __PRETTY_FUNCTION__))
;
2014 Observer.changingInstr(MI);
2015
2016 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2017 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2018
2019 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2020 Observer.changedInstr(MI);
2021 return Legalized;
2022 case TargetOpcode::G_INTTOPTR:
2023 if (TypeIdx != 1)
2024 return UnableToLegalize;
2025
2026 Observer.changingInstr(MI);
2027 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2028 Observer.changedInstr(MI);
2029 return Legalized;
2030 case TargetOpcode::G_PTRTOINT:
2031 if (TypeIdx != 0)
2032 return UnableToLegalize;
2033
2034 Observer.changingInstr(MI);
2035 widenScalarDst(MI, WideTy, 0);
2036 Observer.changedInstr(MI);
2037 return Legalized;
2038 case TargetOpcode::G_BUILD_VECTOR: {
2039 Observer.changingInstr(MI);
2040
2041 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2042 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2043 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2044
2045 // Avoid changing the result vector type if the source element type was
2046 // requested.
2047 if (TypeIdx == 1) {
2048 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
2049 MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2050 } else {
2051 widenScalarDst(MI, WideTy, 0);
2052 }
2053
2054 Observer.changedInstr(MI);
2055 return Legalized;
2056 }
2057 case TargetOpcode::G_SEXT_INREG:
2058 if (TypeIdx != 0)
2059 return UnableToLegalize;
2060
2061 Observer.changingInstr(MI);
2062 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2063 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2064 Observer.changedInstr(MI);
2065 return Legalized;
2066 }
2067}
2068
2069static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2070 MachineIRBuilder &B, Register Src, LLT Ty) {
2071 auto Unmerge = B.buildUnmerge(Ty, Src);
2072 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2073 Pieces.push_back(Unmerge.getReg(I));
2074}
2075
2076LegalizerHelper::LegalizeResult
2077LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2078 Register Dst = MI.getOperand(0).getReg();
2079 Register Src = MI.getOperand(1).getReg();
2080 LLT DstTy = MRI.getType(Dst);
2081 LLT SrcTy = MRI.getType(Src);
2082
2083 if (SrcTy.isVector() && !DstTy.isVector()) {
2084 SmallVector<Register, 8> SrcRegs;
2085 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcTy.getElementType());
2086 MIRBuilder.buildMerge(Dst, SrcRegs);
2087 MI.eraseFromParent();
2088 return Legalized;
2089 }
2090
2091 if (DstTy.isVector() && !SrcTy.isVector()) {
2092 SmallVector<Register, 8> SrcRegs;
2093 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2094 MIRBuilder.buildMerge(Dst, SrcRegs);
2095 MI.eraseFromParent();
2096 return Legalized;
2097 }
2098
2099 return UnableToLegalize;
2100}
2101
2102LegalizerHelper::LegalizeResult
2103LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2104 using namespace TargetOpcode;
2105 MIRBuilder.setInstr(MI);
2106
2107 switch(MI.getOpcode()) {
2108 default:
2109 return UnableToLegalize;
2110 case TargetOpcode::G_BITCAST:
2111 return lowerBitcast(MI);
2112 case TargetOpcode::G_SREM:
2113 case TargetOpcode::G_UREM: {
2114 auto Quot =
2115 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2116 {MI.getOperand(1), MI.getOperand(2)});
2117
2118 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2119 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2120 MI.eraseFromParent();
2121 return Legalized;
2122 }
2123 case TargetOpcode::G_SADDO:
2124 case TargetOpcode::G_SSUBO:
2125 return lowerSADDO_SSUBO(MI);
2126 case TargetOpcode::G_SMULO:
2127 case TargetOpcode::G_UMULO: {
2128 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2129 // result.
2130 Register Res = MI.getOperand(0).getReg();
2131 Register Overflow = MI.getOperand(1).getReg();
2132 Register LHS = MI.getOperand(2).getReg();
2133 Register RHS = MI.getOperand(3).getReg();
2134
2135 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2136 ? TargetOpcode::G_SMULH
2137 : TargetOpcode::G_UMULH;
2138
2139 Observer.changingInstr(MI);
2140 const auto &TII = MIRBuilder.getTII();
2141 MI.setDesc(TII.get(TargetOpcode::G_MUL));
2142 MI.RemoveOperand(1);
2143 Observer.changedInstr(MI);
2144
2145 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2146
2147 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2148 auto Zero = MIRBuilder.buildConstant(Ty, 0);
2149
2150 // For *signed* multiply, overflow is detected by checking:
2151 // (hi != (lo >> bitwidth-1))
2152 if (Opcode == TargetOpcode::G_SMULH) {
2153 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2154 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2155 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2156 } else {
2157 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2158 }
2159 return Legalized;
2160 }
2161 case TargetOpcode::G_FNEG: {
2162 // TODO: Handle vector types once we are able to
2163 // represent them.
2164 if (Ty.isVector())
2165 return UnableToLegalize;
2166 Register Res = MI.getOperand(0).getReg();
2167 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2168 Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
2169 if (!ZeroTy)
2170 return UnableToLegalize;
2171 ConstantFP &ZeroForNegation =
2172 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
2173 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
2174 Register SubByReg = MI.getOperand(1).getReg();
2175 Register ZeroReg = Zero.getReg(0);
2176 MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
2177 MI.eraseFromParent();
2178 return Legalized;
2179 }
2180 case TargetOpcode::G_FSUB: {
2181 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2182 // First, check if G_FNEG is marked as Lower. If so, we may
2183 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2184 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2185 return UnableToLegalize;
2186 Register Res = MI.getOperand(0).getReg();
2187 Register LHS = MI.getOperand(1).getReg();
2188 Register RHS = MI.getOperand(2).getReg();
2189 Register Neg = MRI.createGenericVirtualRegister(Ty);
2190 MIRBuilder.buildFNeg(Neg, RHS);
2191 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
2192 MI.eraseFromParent();
2193 return Legalized;
2194 }
2195 case TargetOpcode::G_FMAD:
2196 return lowerFMad(MI);
2197 case TargetOpcode::G_INTRINSIC_ROUND:
2198 return lowerIntrinsicRound(MI);
2199 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2200 Register OldValRes = MI.getOperand(0).getReg();
2201 Register SuccessRes = MI.getOperand(1).getReg();
2202 Register Addr = MI.getOperand(2).getReg();
2203 Register CmpVal = MI.getOperand(3).getReg();
2204 Register NewVal = MI.getOperand(4).getReg();
2205 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2206 **MI.memoperands_begin());
2207 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2208 MI.eraseFromParent();
2209 return Legalized;
2210 }
2211 case TargetOpcode::G_LOAD:
2212 case TargetOpcode::G_SEXTLOAD:
2213 case TargetOpcode::G_ZEXTLOAD: {
2214 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2215 Register DstReg = MI.getOperand(0).getReg();
2216 Register PtrReg = MI.getOperand(1).getReg();
2217 LLT DstTy = MRI.getType(DstReg);
2218 auto &MMO = **MI.memoperands_begin();
2219
2220 if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2221 if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2222 // This load needs splitting into power of 2 sized loads.
2223 if (DstTy.isVector())
2224 return UnableToLegalize;
2225 if (isPowerOf2_32(DstTy.getSizeInBits()))
2226 return UnableToLegalize; // Don't know what we're being asked to do.
2227
2228 // Our strategy here is to generate anyextending loads for the smaller
2229 // types up to next power-2 result type, and then combine the two larger
2230 // result values together, before truncating back down to the non-pow-2
2231 // type.
2232 // E.g. v1 = i24 load =>
2233 // v2 = i32 zextload (2 byte)
2234 // v3 = i32 load (1 byte)
2235 // v4 = i32 shl v3, 16
2236 // v5 = i32 or v4, v2
2237 // v1 = i24 trunc v5
2238 // By doing this we generate the correct truncate which should get
2239 // combined away as an artifact with a matching extend.
2240 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2241 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2242
2243 MachineFunction &MF = MIRBuilder.getMF();
2244 MachineMemOperand *LargeMMO =
2245 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2246 MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2247 &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2248
2249 LLT PtrTy = MRI.getType(PtrReg);
2250 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2251 LLT AnyExtTy = LLT::scalar(AnyExtSize);
2252 Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2253 Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2254 auto LargeLoad = MIRBuilder.buildLoadInstr(
2255 TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2256
2257 auto OffsetCst = MIRBuilder.buildConstant(
2258 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2259 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2260 auto SmallPtr =
2261 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2262 auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2263 *SmallMMO);
2264
2265 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2266 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2267 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2268 MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2269 MI.eraseFromParent();
2270 return Legalized;
2271 }
2272 MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2273 MI.eraseFromParent();
2274 return Legalized;
2275 }
2276
2277 if (DstTy.isScalar()) {
2278 Register TmpReg =
2279 MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2280 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2281 switch (MI.getOpcode()) {
2282 default:
2283 llvm_unreachable("Unexpected opcode")::llvm::llvm_unreachable_internal("Unexpected opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2283)
;
2284 case TargetOpcode::G_LOAD:
2285 MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
2286 break;
2287 case TargetOpcode::G_SEXTLOAD:
2288 MIRBuilder.buildSExt(DstReg, TmpReg);
2289 break;
2290 case TargetOpcode::G_ZEXTLOAD:
2291 MIRBuilder.buildZExt(DstReg, TmpReg);
2292 break;
2293 }
2294 MI.eraseFromParent();
2295 return Legalized;
2296 }
2297
2298 return UnableToLegalize;
2299 }
2300 case TargetOpcode::G_STORE: {
2301 // Lower a non-power of 2 store into multiple pow-2 stores.
2302 // E.g. split an i24 store into an i16 store + i8 store.
2303 // We do this by first extending the stored value to the next largest power
2304 // of 2 type, and then using truncating stores to store the components.
2305 // By doing this, likewise with G_LOAD, generate an extend that can be
2306 // artifact-combined away instead of leaving behind extracts.
2307 Register SrcReg = MI.getOperand(0).getReg();
2308 Register PtrReg = MI.getOperand(1).getReg();
2309 LLT SrcTy = MRI.getType(SrcReg);
2310 MachineMemOperand &MMO = **MI.memoperands_begin();
2311 if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2312 return UnableToLegalize;
2313 if (SrcTy.isVector())
2314 return UnableToLegalize;
2315 if (isPowerOf2_32(SrcTy.getSizeInBits()))
2316 return UnableToLegalize; // Don't know what we're being asked to do.
2317
2318 // Extend to the next pow-2.
2319 const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2320 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2321
2322 // Obtain the smaller value by shifting away the larger value.
2323 uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2324 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2325 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2326 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2327
2328 // Generate the PtrAdd and truncating stores.
2329 LLT PtrTy = MRI.getType(PtrReg);
2330 auto OffsetCst = MIRBuilder.buildConstant(
2331 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2332 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2333 auto SmallPtr =
2334 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2335
2336 MachineFunction &MF = MIRBuilder.getMF();
2337 MachineMemOperand *LargeMMO =
2338 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2339 MachineMemOperand *SmallMMO =
2340 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2341 MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2342 MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2343 MI.eraseFromParent();
2344 return Legalized;
2345 }
2346 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2347 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2348 case TargetOpcode::G_CTLZ:
2349 case TargetOpcode::G_CTTZ:
2350 case TargetOpcode::G_CTPOP:
2351 return lowerBitCount(MI, TypeIdx, Ty);
2352 case G_UADDO: {
2353 Register Res = MI.getOperand(0).getReg();
2354 Register CarryOut = MI.getOperand(1).getReg();
2355 Register LHS = MI.getOperand(2).getReg();
2356 Register RHS = MI.getOperand(3).getReg();
2357
2358 MIRBuilder.buildAdd(Res, LHS, RHS);
2359 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2360
2361 MI.eraseFromParent();
2362 return Legalized;
2363 }
2364 case G_UADDE: {
2365 Register Res = MI.getOperand(0).getReg();
2366 Register CarryOut = MI.getOperand(1).getReg();
2367 Register LHS = MI.getOperand(2).getReg();
2368 Register RHS = MI.getOperand(3).getReg();
2369 Register CarryIn = MI.getOperand(4).getReg();
2370 LLT Ty = MRI.getType(Res);
2371
2372 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
2373 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
2374 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2375 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2376
2377 MI.eraseFromParent();
2378 return Legalized;
2379 }
2380 case G_USUBO: {
2381 Register Res = MI.getOperand(0).getReg();
2382 Register BorrowOut = MI.getOperand(1).getReg();
2383 Register LHS = MI.getOperand(2).getReg();
2384 Register RHS = MI.getOperand(3).getReg();
2385
2386 MIRBuilder.buildSub(Res, LHS, RHS);
2387 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2388
2389 MI.eraseFromParent();
2390 return Legalized;
2391 }
2392 case G_USUBE: {
2393 Register Res = MI.getOperand(0).getReg();
2394 Register BorrowOut = MI.getOperand(1).getReg();
2395 Register LHS = MI.getOperand(2).getReg();
2396 Register RHS = MI.getOperand(3).getReg();
2397 Register BorrowIn = MI.getOperand(4).getReg();
2398 const LLT CondTy = MRI.getType(BorrowOut);
2399 const LLT Ty = MRI.getType(Res);
2400
2401 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
2402 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
2403 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2404
2405 auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
2406 auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
2407 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2408
2409 MI.eraseFromParent();
2410 return Legalized;
2411 }
2412 case G_UITOFP:
2413 return lowerUITOFP(MI, TypeIdx, Ty);
2414 case G_SITOFP:
2415 return lowerSITOFP(MI, TypeIdx, Ty);
2416 case G_FPTOUI:
2417 return lowerFPTOUI(MI, TypeIdx, Ty);
2418 case G_FPTOSI:
2419 return lowerFPTOSI(MI);
2420 case G_FPTRUNC:
2421 return lowerFPTRUNC(MI, TypeIdx, Ty);
2422 case G_SMIN:
2423 case G_SMAX:
2424 case G_UMIN:
2425 case G_UMAX:
2426 return lowerMinMax(MI, TypeIdx, Ty);
2427 case G_FCOPYSIGN:
2428 return lowerFCopySign(MI, TypeIdx, Ty);
2429 case G_FMINNUM:
2430 case G_FMAXNUM:
2431 return lowerFMinNumMaxNum(MI);
2432 case G_UNMERGE_VALUES:
2433 return lowerUnmergeValues(MI);
2434 case TargetOpcode::G_SEXT_INREG: {
2435 assert(MI.getOperand(2).isImm() && "Expected immediate")((MI.getOperand(2).isImm() && "Expected immediate") ?
static_cast<void> (0) : __assert_fail ("MI.getOperand(2).isImm() && \"Expected immediate\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2435, __PRETTY_FUNCTION__))
;
2436 int64_t SizeInBits = MI.getOperand(2).getImm();
2437
2438 Register DstReg = MI.getOperand(0).getReg();
2439 Register SrcReg = MI.getOperand(1).getReg();
2440 LLT DstTy = MRI.getType(DstReg);
2441 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2442
2443 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2444 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
2445 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
2446 MI.eraseFromParent();
2447 return Legalized;
2448 }
2449 case G_SHUFFLE_VECTOR:
2450 return lowerShuffleVector(MI);
2451 case G_DYN_STACKALLOC:
2452 return lowerDynStackAlloc(MI);
2453 case G_EXTRACT:
2454 return lowerExtract(MI);
2455 case G_INSERT:
2456 return lowerInsert(MI);
2457 case G_BSWAP:
2458 return lowerBswap(MI);
2459 case G_BITREVERSE:
2460 return lowerBitreverse(MI);
2461 case G_READ_REGISTER:
2462 case G_WRITE_REGISTER:
2463 return lowerReadWriteRegister(MI);
2464 }
2465}
2466
2467LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
2468 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2469 SmallVector<Register, 2> DstRegs;
2470
2471 unsigned NarrowSize = NarrowTy.getSizeInBits();
2472 Register DstReg = MI.getOperand(0).getReg();
2473 unsigned Size = MRI.getType(DstReg).getSizeInBits();
2474 int NumParts = Size / NarrowSize;
2475 // FIXME: Don't know how to handle the situation where the small vectors
2476 // aren't all the same size yet.
2477 if (Size % NarrowSize != 0)
2478 return UnableToLegalize;
2479
2480 for (int i = 0; i < NumParts; ++i) {
2481 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2482 MIRBuilder.buildUndef(TmpReg);
2483 DstRegs.push_back(TmpReg);
2484 }
2485
2486 if (NarrowTy.isVector())
2487 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2488 else
2489 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2490
2491 MI.eraseFromParent();
2492 return Legalized;
2493}
2494
2495LegalizerHelper::LegalizeResult
2496LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
2497 LLT NarrowTy) {
2498 assert(TypeIdx == 0 && "only one type index expected")((TypeIdx == 0 && "only one type index expected") ? static_cast
<void> (0) : __assert_fail ("TypeIdx == 0 && \"only one type index expected\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2498, __PRETTY_FUNCTION__))
;
2499
2500 const unsigned Opc = MI.getOpcode();
2501 const int NumOps = MI.getNumOperands() - 1;
2502 const Register DstReg = MI.getOperand(0).getReg();
2503 const unsigned Flags = MI.getFlags();
2504
2505 assert(NumOps <= 3 && "expected instrution with 1 result and 1-3 sources")((NumOps <= 3 && "expected instrution with 1 result and 1-3 sources"
) ? static_cast<void> (0) : __assert_fail ("NumOps <= 3 && \"expected instrution with 1 result and 1-3 sources\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2505, __PRETTY_FUNCTION__))
;
2506
2507 SmallVector<Register, 8> ExtractedRegs[3];
2508 SmallVector<Register, 8> Parts;
2509
2510 // Break down all the sources into NarrowTy pieces we can operate on. This may
2511 // involve creating merges to a wider type, padded with undef.
2512 for (int I = 0; I != NumOps; ++I) {
2513 Register SrcReg = MI.getOperand(I + 1).getReg();
2514 LLT SrcTy = MRI.getType(SrcReg);
2515 LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, NarrowTy, SrcReg);
2516
2517 // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
2518 buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ExtractedRegs[I],
2519 TargetOpcode::G_ANYEXT);
2520 }
2521
2522 SmallVector<Register, 8> ResultRegs;
2523
2524 // Input operands for each sub-instruction.
2525 SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
2526
2527 int NumParts = ExtractedRegs[0].size();
2528 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2529 const unsigned NarrowSize = NarrowTy.getSizeInBits();
2530
2531 // We widened the source registers to satisfy merge/unmerge size
2532 // constraints. We'll have some extra fully undef parts.
2533 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
2534
2535 for (int I = 0; I != NumRealParts; ++I) {
2536 // Emit this instruction on each of the split pieces.
2537 for (int J = 0; J != NumOps; ++J)
2538 InputRegs[J] = ExtractedRegs[J][I];
2539
2540 auto Inst = MIRBuilder.buildInstr(Opc, {NarrowTy}, InputRegs, Flags);
2541 ResultRegs.push_back(Inst.getReg(0));
2542 }
2543
2544 // Fill out the widened result with undef instead of creating instructions
2545 // with undef inputs.
2546 int NumUndefParts = NumParts - NumRealParts;
2547 if (NumUndefParts != 0)
2548 ResultRegs.append(NumUndefParts, MIRBuilder.buildUndef(NarrowTy).getReg(0));
2549
2550 // Extract the possibly padded result to the original result register.
2551 LLT DstTy = MRI.getType(DstReg);
2552 LLT LCMTy = getLCMType(DstTy, NarrowTy);
2553 buildWidenedRemergeToDst(DstReg, LCMTy, ResultRegs);
2554
2555 MI.eraseFromParent();
2556 return Legalized;
2557}
2558
2559// Handle splitting vector operations which need to have the same number of
2560// elements in each type index, but each type index may have a different element
2561// type.
2562//
2563// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2564// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2565// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2566//
2567// Also handles some irregular breakdown cases, e.g.
2568// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2569// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2570// s64 = G_SHL s64, s32
2571LegalizerHelper::LegalizeResult
2572LegalizerHelper::fewerElementsVectorMultiEltType(
2573 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2574 if (TypeIdx != 0)
2575 return UnableToLegalize;
2576
2577 const LLT NarrowTy0 = NarrowTyArg;
2578 const unsigned NewNumElts =
2579 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2580
2581 const Register DstReg = MI.getOperand(0).getReg();
2582 LLT DstTy = MRI.getType(DstReg);
2583 LLT LeftoverTy0;
2584
2585 // All of the operands need to have the same number of elements, so if we can
2586 // determine a type breakdown for the result type, we can for all of the
2587 // source types.
2588 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2589 if (NumParts < 0)
2590 return UnableToLegalize;
2591
2592 SmallVector<MachineInstrBuilder, 4> NewInsts;
2593
2594 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2595 SmallVector<Register, 4> PartRegs, LeftoverRegs;
2596
2597 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2598 LLT LeftoverTy;
2599 Register SrcReg = MI.getOperand(I).getReg();
2600 LLT SrcTyI = MRI.getType(SrcReg);
2601 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2602 LLT LeftoverTyI;
2603
2604 // Split this operand into the requested typed registers, and any leftover
2605 // required to reproduce the original type.
2606 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2607 LeftoverRegs))
2608 return UnableToLegalize;
2609
2610 if (I == 1) {
2611 // For the first operand, create an instruction for each part and setup
2612 // the result.
2613 for (Register PartReg : PartRegs) {
2614 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2615 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2616 .addDef(PartDstReg)
2617 .addUse(PartReg));
2618 DstRegs.push_back(PartDstReg);
2619 }
2620
2621 for (Register LeftoverReg : LeftoverRegs) {
2622 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2623 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2624 .addDef(PartDstReg)
2625 .addUse(LeftoverReg));
2626 LeftoverDstRegs.push_back(PartDstReg);
2627 }
2628 } else {
2629 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size())((NewInsts.size() == PartRegs.size() + LeftoverRegs.size()) ?
static_cast<void> (0) : __assert_fail ("NewInsts.size() == PartRegs.size() + LeftoverRegs.size()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2629, __PRETTY_FUNCTION__))
;
2630
2631 // Add the newly created operand splits to the existing instructions. The
2632 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2633 // pieces.
2634 unsigned InstCount = 0;
2635 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2636 NewInsts[InstCount++].addUse(PartRegs[J]);
2637 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2638 NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2639 }
2640
2641 PartRegs.clear();
2642 LeftoverRegs.clear();
2643 }
2644
2645 // Insert the newly built operations and rebuild the result register.
2646 for (auto &MIB : NewInsts)
2647 MIRBuilder.insertInstr(MIB);
2648
2649 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2650
2651 MI.eraseFromParent();
2652 return Legalized;
2653}
2654
2655LegalizerHelper::LegalizeResult
2656LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2657 LLT NarrowTy) {
2658 if (TypeIdx != 0)
2659 return UnableToLegalize;
2660
2661 Register DstReg = MI.getOperand(0).getReg();
2662 Register SrcReg = MI.getOperand(1).getReg();
2663 LLT DstTy = MRI.getType(DstReg);
2664 LLT SrcTy = MRI.getType(SrcReg);
2665
2666 LLT NarrowTy0 = NarrowTy;
2667 LLT NarrowTy1;
2668 unsigned NumParts;
2669
2670 if (NarrowTy.isVector()) {
2671 // Uneven breakdown not handled.
2672 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2673 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2674 return UnableToLegalize;
2675
2676 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2677 } else {
2678 NumParts = DstTy.getNumElements();
2679 NarrowTy1 = SrcTy.getElementType();
2680 }
2681
2682 SmallVector<Register, 4> SrcRegs, DstRegs;
2683 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2684
2685 for (unsigned I = 0; I < NumParts; ++I) {
2686 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2687 MachineInstr *NewInst =
2688 MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
2689
2690 NewInst->setFlags(MI.getFlags());
2691 DstRegs.push_back(DstReg);
2692 }
2693
2694 if (NarrowTy.isVector())
2695 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2696 else
2697 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2698
2699 MI.eraseFromParent();
2700 return Legalized;
2701}
2702
2703LegalizerHelper::LegalizeResult
2704LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2705 LLT NarrowTy) {
2706 Register DstReg = MI.getOperand(0).getReg();
2707 Register Src0Reg = MI.getOperand(2).getReg();
2708 LLT DstTy = MRI.getType(DstReg);
2709 LLT SrcTy = MRI.getType(Src0Reg);
2710
2711 unsigned NumParts;
2712 LLT NarrowTy0, NarrowTy1;
2713
2714 if (TypeIdx == 0) {
2715 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2716 unsigned OldElts = DstTy.getNumElements();
2717
2718 NarrowTy0 = NarrowTy;
2719 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2720 NarrowTy1 = NarrowTy.isVector() ?
2721 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2722 SrcTy.getElementType();
2723
2724 } else {
2725 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2726 unsigned OldElts = SrcTy.getNumElements();
2727
2728 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2729 NarrowTy.getNumElements();
2730 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2731 DstTy.getScalarSizeInBits());
2732 NarrowTy1 = NarrowTy;
2733 }
2734
2735 // FIXME: Don't know how to handle the situation where the small vectors
2736 // aren't all the same size yet.
2737 if (NarrowTy1.isVector() &&
2738 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2739 return UnableToLegalize;
2740
2741 CmpInst::Predicate Pred
2742 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2743
2744 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2745 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2746 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2747
2748 for (unsigned I = 0; I < NumParts; ++I) {
2749 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2750 DstRegs.push_back(DstReg);
2751
2752 if (MI.getOpcode() == TargetOpcode::G_ICMP)
2753 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2754 else {
2755 MachineInstr *NewCmp
2756 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2757 NewCmp->setFlags(MI.getFlags());
2758 }
2759 }
2760
2761 if (NarrowTy1.isVector())
2762 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2763 else
2764 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2765
2766 MI.eraseFromParent();
2767 return Legalized;
2768}
2769
2770LegalizerHelper::LegalizeResult
2771LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2772 LLT NarrowTy) {
2773 Register DstReg = MI.getOperand(0).getReg();
2774 Register CondReg = MI.getOperand(1).getReg();
2775
2776 unsigned NumParts = 0;
2777 LLT NarrowTy0, NarrowTy1;
2778
2779 LLT DstTy = MRI.getType(DstReg);
2780 LLT CondTy = MRI.getType(CondReg);
2781 unsigned Size = DstTy.getSizeInBits();
2782
2783 assert(TypeIdx == 0 || CondTy.isVector())((TypeIdx == 0 || CondTy.isVector()) ? static_cast<void>
(0) : __assert_fail ("TypeIdx == 0 || CondTy.isVector()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2783, __PRETTY_FUNCTION__))
;
2784
2785 if (TypeIdx == 0) {
2786 NarrowTy0 = NarrowTy;
2787 NarrowTy1 = CondTy;
2788
2789 unsigned NarrowSize = NarrowTy0.getSizeInBits();
2790 // FIXME: Don't know how to handle the situation where the small vectors
2791 // aren't all the same size yet.
2792 if (Size % NarrowSize != 0)
2793 return UnableToLegalize;
2794
2795 NumParts = Size / NarrowSize;
2796
2797 // Need to break down the condition type
2798 if (CondTy.isVector()) {
2799 if (CondTy.getNumElements() == NumParts)
2800 NarrowTy1 = CondTy.getElementType();
2801 else
2802 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2803 CondTy.getScalarSizeInBits());
2804 }
2805 } else {
2806 NumParts = CondTy.getNumElements();
2807 if (NarrowTy.isVector()) {
2808 // TODO: Handle uneven breakdown.
2809 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2810 return UnableToLegalize;
2811
2812 return UnableToLegalize;
2813 } else {
2814 NarrowTy0 = DstTy.getElementType();
2815 NarrowTy1 = NarrowTy;
2816 }
2817 }
2818
2819 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2820 if (CondTy.isVector())
2821 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2822
2823 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2824 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2825
2826 for (unsigned i = 0; i < NumParts; ++i) {
2827 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2828 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2829 Src1Regs[i], Src2Regs[i]);
2830 DstRegs.push_back(DstReg);
2831 }
2832
2833 if (NarrowTy0.isVector())
2834 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2835 else
2836 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2837
2838 MI.eraseFromParent();
2839 return Legalized;
2840}
2841
2842LegalizerHelper::LegalizeResult
2843LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2844 LLT NarrowTy) {
2845 const Register DstReg = MI.getOperand(0).getReg();
2846 LLT PhiTy = MRI.getType(DstReg);
2847 LLT LeftoverTy;
2848
2849 // All of the operands need to have the same number of elements, so if we can
2850 // determine a type breakdown for the result type, we can for all of the
2851 // source types.
2852 int NumParts, NumLeftover;
2853 std::tie(NumParts, NumLeftover)
2854 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2855 if (NumParts < 0)
2856 return UnableToLegalize;
2857
2858 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2859 SmallVector<MachineInstrBuilder, 4> NewInsts;
2860
2861 const int TotalNumParts = NumParts + NumLeftover;
2862
2863 // Insert the new phis in the result block first.
2864 for (int I = 0; I != TotalNumParts; ++I) {
2865 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2866 Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2867 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2868 .addDef(PartDstReg));
2869 if (I < NumParts)
2870 DstRegs.push_back(PartDstReg);
2871 else
2872 LeftoverDstRegs.push_back(PartDstReg);
2873 }
2874
2875 MachineBasicBlock *MBB = MI.getParent();
2876 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2877 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2878
2879 SmallVector<Register, 4> PartRegs, LeftoverRegs;
2880
2881 // Insert code to extract the incoming values in each predecessor block.
2882 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2883 PartRegs.clear();
2884 LeftoverRegs.clear();
2885
2886 Register SrcReg = MI.getOperand(I).getReg();
2887 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2888 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2889
2890 LLT Unused;
2891 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2892 LeftoverRegs))
2893 return UnableToLegalize;
2894
2895 // Add the newly created operand splits to the existing instructions. The
2896 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2897 // pieces.
2898 for (int J = 0; J != TotalNumParts; ++J) {
2899 MachineInstrBuilder MIB = NewInsts[J];
2900 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2901 MIB.addMBB(&OpMBB);
2902 }
2903 }
2904
2905 MI.eraseFromParent();
2906 return Legalized;
2907}
2908
2909LegalizerHelper::LegalizeResult
2910LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
2911 unsigned TypeIdx,
2912 LLT NarrowTy) {
2913 if (TypeIdx != 1)
2914 return UnableToLegalize;
2915
2916 const int NumDst = MI.getNumOperands() - 1;
2917 const Register SrcReg = MI.getOperand(NumDst).getReg();
2918 LLT SrcTy = MRI.getType(SrcReg);
2919
2920 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2921
2922 // TODO: Create sequence of extracts.
2923 if (DstTy == NarrowTy)
2924 return UnableToLegalize;
2925
2926 LLT GCDTy = getGCDType(SrcTy, NarrowTy);
2927 if (DstTy == GCDTy) {
2928 // This would just be a copy of the same unmerge.
2929 // TODO: Create extracts, pad with undef and create intermediate merges.
2930 return UnableToLegalize;
2931 }
2932
2933 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2934 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2935 const int PartsPerUnmerge = NumDst / NumUnmerge;
2936
2937 for (int I = 0; I != NumUnmerge; ++I) {
2938 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2939
2940 for (int J = 0; J != PartsPerUnmerge; ++J)
2941 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
2942 MIB.addUse(Unmerge.getReg(I));
2943 }
2944
2945 MI.eraseFromParent();
2946 return Legalized;
2947}
2948
2949LegalizerHelper::LegalizeResult
2950LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
2951 unsigned TypeIdx,
2952 LLT NarrowTy) {
2953 assert(TypeIdx == 0 && "not a vector type index")((TypeIdx == 0 && "not a vector type index") ? static_cast
<void> (0) : __assert_fail ("TypeIdx == 0 && \"not a vector type index\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2953, __PRETTY_FUNCTION__))
;
2954 Register DstReg = MI.getOperand(0).getReg();
2955 LLT DstTy = MRI.getType(DstReg);
2956 LLT SrcTy = DstTy.getElementType();
2957
2958 int DstNumElts = DstTy.getNumElements();
2959 int NarrowNumElts = NarrowTy.getNumElements();
2960 int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
2961 LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
2962
2963 SmallVector<Register, 8> ConcatOps;
2964 SmallVector<Register, 8> SubBuildVector;
2965
2966 Register UndefReg;
2967 if (WidenedDstTy != DstTy)
2968 UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
2969
2970 // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
2971 // necessary.
2972 //
2973 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
2974 // -> <2 x s16>
2975 //
2976 // %4:_(s16) = G_IMPLICIT_DEF
2977 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
2978 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
2979 // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
2980 // %3:_(<3 x s16>) = G_EXTRACT %7, 0
2981 for (int I = 0; I != NumConcat; ++I) {
2982 for (int J = 0; J != NarrowNumElts; ++J) {
2983 int SrcIdx = NarrowNumElts * I + J;
2984
2985 if (SrcIdx < DstNumElts) {
2986 Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
2987 SubBuildVector.push_back(SrcReg);
2988 } else
2989 SubBuildVector.push_back(UndefReg);
2990 }
2991
2992 auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
2993 ConcatOps.push_back(BuildVec.getReg(0));
2994 SubBuildVector.clear();
2995 }
2996
2997 if (DstTy == WidenedDstTy)
2998 MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
2999 else {
3000 auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
3001 MIRBuilder.buildExtract(DstReg, Concat, 0);
3002 }
3003
3004 MI.eraseFromParent();
3005 return Legalized;
3006}
3007
3008LegalizerHelper::LegalizeResult
3009LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3010 LLT NarrowTy) {
3011 // FIXME: Don't know how to handle secondary types yet.
3012 if (TypeIdx != 0)
3013 return UnableToLegalize;
3014
3015 MachineMemOperand *MMO = *MI.memoperands_begin();
3016
3017 // This implementation doesn't work for atomics. Give up instead of doing
3018 // something invalid.
3019 if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3020 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3021 return UnableToLegalize;
3022
3023 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3024 Register ValReg = MI.getOperand(0).getReg();
3025 Register AddrReg = MI.getOperand(1).getReg();
3026 LLT ValTy = MRI.getType(ValReg);
3027
3028 int NumParts = -1;
3029 int NumLeftover = -1;
3030 LLT LeftoverTy;
3031 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3032 if (IsLoad) {
3033 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3034 } else {
3035 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3036 NarrowLeftoverRegs)) {
3037 NumParts = NarrowRegs.size();
3038 NumLeftover = NarrowLeftoverRegs.size();
Value stored to 'NumLeftover' is never read
3039 }
3040 }
3041
3042 if (NumParts == -1)
3043 return UnableToLegalize;
3044
3045 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
3046
3047 unsigned TotalSize = ValTy.getSizeInBits();
3048
3049 // Split the load/store into PartTy sized pieces starting at Offset. If this
3050 // is a load, return the new registers in ValRegs. For a store, each elements
3051 // of ValRegs should be PartTy. Returns the next offset that needs to be
3052 // handled.
3053 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3054 unsigned Offset) -> unsigned {
3055 MachineFunction &MF = MIRBuilder.getMF();
3056 unsigned PartSize = PartTy.getSizeInBits();
3057 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3058 Offset += PartSize, ++Idx) {
3059 unsigned ByteSize = PartSize / 8;
3060 unsigned ByteOffset = Offset / 8;
3061 Register NewAddrReg;
3062
3063 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3064
3065 MachineMemOperand *NewMMO =
3066 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3067
3068 if (IsLoad) {
3069 Register Dst = MRI.createGenericVirtualRegister(PartTy);
3070 ValRegs.push_back(Dst);
3071 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3072 } else {
3073 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3074 }
3075 }
3076
3077 return Offset;
3078 };
3079
3080 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3081
3082 // Handle the rest of the register if this isn't an even type breakdown.
3083 if (LeftoverTy.isValid())
3084 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3085
3086 if (IsLoad) {
3087 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3088 LeftoverTy, NarrowLeftoverRegs);
3089 }
3090
3091 MI.eraseFromParent();
3092 return Legalized;
3093}
3094
3095LegalizerHelper::LegalizeResult
3096LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
3097 LLT NarrowTy) {
3098 Register DstReg = MI.getOperand(0).getReg();
3099 Register SrcReg = MI.getOperand(1).getReg();
3100 int64_t Imm = MI.getOperand(2).getImm();
3101
3102 LLT DstTy = MRI.getType(DstReg);
3103
3104 SmallVector<Register, 8> Parts;
3105 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3106 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
3107
3108 for (Register &R : Parts)
3109 R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
3110
3111 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3112
3113 MI.eraseFromParent();
3114 return Legalized;
3115}
3116
3117LegalizerHelper::LegalizeResult
3118LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
3119 LLT NarrowTy) {
3120 using namespace TargetOpcode;
3121
3122 MIRBuilder.setInstr(MI);
3123 switch (MI.getOpcode()) {
3124 case G_IMPLICIT_DEF:
3125 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
3126 case G_AND:
3127 case G_OR:
3128 case G_XOR:
3129 case G_ADD:
3130 case G_SUB:
3131 case G_MUL:
3132 case G_SMULH:
3133 case G_UMULH:
3134 case G_FADD:
3135 case G_FMUL:
3136 case G_FSUB:
3137 case G_FNEG:
3138 case G_FABS:
3139 case G_FCANONICALIZE:
3140 case G_FDIV:
3141 case G_FREM:
3142 case G_FMA:
3143 case G_FMAD:
3144 case G_FPOW:
3145 case G_FEXP:
3146 case G_FEXP2:
3147 case G_FLOG:
3148 case G_FLOG2:
3149 case G_FLOG10:
3150 case G_FNEARBYINT:
3151 case G_FCEIL:
3152 case G_FFLOOR:
3153 case G_FRINT:
3154 case G_INTRINSIC_ROUND:
3155 case G_INTRINSIC_TRUNC:
3156 case G_FCOS:
3157 case G_FSIN:
3158 case G_FSQRT:
3159 case G_BSWAP:
3160 case G_BITREVERSE:
3161 case G_SDIV:
3162 case G_UDIV:
3163 case G_SREM:
3164 case G_UREM:
3165 case G_SMIN:
3166 case G_SMAX:
3167 case G_UMIN:
3168 case G_UMAX:
3169 case G_FMINNUM:
3170 case G_FMAXNUM:
3171 case G_FMINNUM_IEEE:
3172 case G_FMAXNUM_IEEE:
3173 case G_FMINIMUM:
3174 case G_FMAXIMUM:
3175 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
3176 case G_SHL:
3177 case G_LSHR:
3178 case G_ASHR:
3179 case G_CTLZ:
3180 case G_CTLZ_ZERO_UNDEF:
3181 case G_CTTZ:
3182 case G_CTTZ_ZERO_UNDEF:
3183 case G_CTPOP:
3184 case G_FCOPYSIGN:
3185 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
3186 case G_ZEXT:
3187 case G_SEXT:
3188 case G_ANYEXT:
3189 case G_FPEXT:
3190 case G_FPTRUNC:
3191 case G_SITOFP:
3192 case G_UITOFP:
3193 case G_FPTOSI:
3194 case G_FPTOUI:
3195 case G_INTTOPTR:
3196 case G_PTRTOINT:
3197 case G_ADDRSPACE_CAST:
3198 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
3199 case G_ICMP:
3200 case G_FCMP:
3201 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
3202 case G_SELECT:
3203 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
3204 case G_PHI:
3205 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
3206 case G_UNMERGE_VALUES:
3207 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
3208 case G_BUILD_VECTOR:
3209 return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3210 case G_LOAD:
3211 case G_STORE:
3212 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
3213 case G_SEXT_INREG:
3214 return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
3215 default:
3216 return UnableToLegalize;
3217 }
3218}
3219
3220LegalizerHelper::LegalizeResult
3221LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
3222 const LLT HalfTy, const LLT AmtTy) {
3223
3224 Register InL = MRI.createGenericVirtualRegister(HalfTy);
3225 Register InH = MRI.createGenericVirtualRegister(HalfTy);
3226 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
3227
3228 if (Amt.isNullValue()) {
3229 MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
3230 MI.eraseFromParent();
3231 return Legalized;
3232 }
3233
3234 LLT NVT = HalfTy;
3235 unsigned NVTBits = HalfTy.getSizeInBits();
3236 unsigned VTBits = 2 * NVTBits;
3237
3238 SrcOp Lo(Register(0)), Hi(Register(0));
3239 if (MI.getOpcode() == TargetOpcode::G_SHL) {
3240 if (Amt.ugt(VTBits)) {
3241 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3242 } else if (Amt.ugt(NVTBits)) {
3243 Lo = MIRBuilder.buildConstant(NVT, 0);
3244 Hi = MIRBuilder.buildShl(NVT, InL,
3245 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3246 } else if (Amt == NVTBits) {
3247 Lo = MIRBuilder.buildConstant(NVT, 0);
3248 Hi = InL;
3249 } else {
3250 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
3251 auto OrLHS =
3252 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
3253 auto OrRHS = MIRBuilder.buildLShr(
3254 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3255 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3256 }
3257 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3258 if (Amt.ugt(VTBits)) {
3259 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3260 } else if (Amt.ugt(NVTBits)) {
3261 Lo = MIRBuilder.buildLShr(NVT, InH,
3262 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3263 Hi = MIRBuilder.buildConstant(NVT, 0);
3264 } else if (Amt == NVTBits) {
3265 Lo = InH;
3266 Hi = MIRBuilder.buildConstant(NVT, 0);
3267 } else {
3268 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3269
3270 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3271 auto OrRHS = MIRBuilder.buildShl(
3272 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3273
3274 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3275 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3276 }
3277 } else {
3278 if (Amt.ugt(VTBits)) {
3279 Hi = Lo = MIRBuilder.buildAShr(
3280 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3281 } else if (Amt.ugt(NVTBits)) {
3282 Lo = MIRBuilder.buildAShr(NVT, InH,
3283 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3284 Hi = MIRBuilder.buildAShr(NVT, InH,
3285 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3286 } else if (Amt == NVTBits) {
3287 Lo = InH;
3288 Hi = MIRBuilder.buildAShr(NVT, InH,
3289 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3290 } else {
3291 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3292
3293 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3294 auto OrRHS = MIRBuilder.buildShl(
3295 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3296
3297 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3298 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3299 }
3300 }
3301
3302 MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
3303 MI.eraseFromParent();
3304
3305 return Legalized;
3306}
3307
3308// TODO: Optimize if constant shift amount.
3309LegalizerHelper::LegalizeResult
3310LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
3311 LLT RequestedTy) {
3312 if (TypeIdx == 1) {
3313 Observer.changingInstr(MI);
3314 narrowScalarSrc(MI, RequestedTy, 2);
3315 Observer.changedInstr(MI);
3316 return Legalized;
3317 }
3318
3319 Register DstReg = MI.getOperand(0).getReg();
3320 LLT DstTy = MRI.getType(DstReg);
3321 if (DstTy.isVector())
3322 return UnableToLegalize;
3323
3324 Register Amt = MI.getOperand(2).getReg();
3325 LLT ShiftAmtTy = MRI.getType(Amt);
3326 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3327 if (DstEltSize % 2 != 0)
3328 return UnableToLegalize;
3329
3330 // Ignore the input type. We can only go to exactly half the size of the
3331 // input. If that isn't small enough, the resulting pieces will be further
3332 // legalized.
3333 const unsigned NewBitSize = DstEltSize / 2;
3334 const LLT HalfTy = LLT::scalar(NewBitSize);
3335 const LLT CondTy = LLT::scalar(1);
3336
3337 if (const MachineInstr *KShiftAmt =
3338 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3339 return narrowScalarShiftByConstant(
3340 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3341 }
3342
3343 // TODO: Expand with known bits.
3344
3345 // Handle the fully general expansion by an unknown amount.
3346 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3347
3348 Register InL = MRI.createGenericVirtualRegister(HalfTy);
3349 Register InH = MRI.createGenericVirtualRegister(HalfTy);
3350 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
3351
3352 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3353 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3354
3355 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3356 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3357 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3358
3359 Register ResultRegs[2];
3360 switch (MI.getOpcode()) {
3361 case TargetOpcode::G_SHL: {
3362 // Short: ShAmt < NewBitSize
3363 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3364
3365 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3366 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3367 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3368
3369 // Long: ShAmt >= NewBitSize
3370 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
3371 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3372
3373 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3374 auto Hi = MIRBuilder.buildSelect(
3375 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3376
3377 ResultRegs[0] = Lo.getReg(0);
3378 ResultRegs[1] = Hi.getReg(0);
3379 break;
3380 }
3381 case TargetOpcode::G_LSHR:
3382 case TargetOpcode::G_ASHR: {
3383 // Short: ShAmt < NewBitSize
3384 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3385
3386 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3387 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3388 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3389
3390 // Long: ShAmt >= NewBitSize
3391 MachineInstrBuilder HiL;
3392 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3393 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
3394 } else {
3395 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3396 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
3397 }
3398 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3399 {InH, AmtExcess}); // Lo from Hi part.
3400
3401 auto Lo = MIRBuilder.buildSelect(
3402 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3403
3404 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3405
3406 ResultRegs[0] = Lo.getReg(0);
3407 ResultRegs[1] = Hi.getReg(0);
3408 break;
3409 }
3410 default:
3411 llvm_unreachable("not a shift")::llvm::llvm_unreachable_internal("not a shift", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3411)
;
3412 }
3413
3414 MIRBuilder.buildMerge(DstReg, ResultRegs);
3415 MI.eraseFromParent();
3416 return Legalized;
3417}
3418
3419LegalizerHelper::LegalizeResult
3420LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3421 LLT MoreTy) {
3422 assert(TypeIdx == 0 && "Expecting only Idx 0")((TypeIdx == 0 && "Expecting only Idx 0") ? static_cast
<void> (0) : __assert_fail ("TypeIdx == 0 && \"Expecting only Idx 0\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3422, __PRETTY_FUNCTION__))
;
3423
3424 Observer.changingInstr(MI);
3425 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3426 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3427 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3428 moreElementsVectorSrc(MI, MoreTy, I);
3429 }
3430
3431 MachineBasicBlock &MBB = *MI.getParent();
3432 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3433 moreElementsVectorDst(MI, MoreTy, 0);
3434 Observer.changedInstr(MI);
3435 return Legalized;
3436}
3437
3438LegalizerHelper::LegalizeResult
3439LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
3440 LLT MoreTy) {
3441 MIRBuilder.setInstr(MI);
3442 unsigned Opc = MI.getOpcode();
3443 switch (Opc) {
3444 case TargetOpcode::G_IMPLICIT_DEF:
3445 case TargetOpcode::G_LOAD: {
3446 if (TypeIdx != 0)
3447 return UnableToLegalize;
3448 Observer.changingInstr(MI);
3449 moreElementsVectorDst(MI, MoreTy, 0);
3450 Observer.changedInstr(MI);
3451 return Legalized;
3452 }
3453 case TargetOpcode::G_STORE:
3454 if (TypeIdx != 0)
3455 return UnableToLegalize;
3456 Observer.changingInstr(MI);
3457 moreElementsVectorSrc(MI, MoreTy, 0);
3458 Observer.changedInstr(MI);
3459 return Legalized;
3460 case TargetOpcode::G_AND:
3461 case TargetOpcode::G_OR:
3462 case TargetOpcode::G_XOR:
3463 case TargetOpcode::G_SMIN:
3464 case TargetOpcode::G_SMAX:
3465 case TargetOpcode::G_UMIN:
3466 case TargetOpcode::G_UMAX:
3467 case TargetOpcode::G_FMINNUM:
3468 case TargetOpcode::G_FMAXNUM:
3469 case TargetOpcode::G_FMINNUM_IEEE:
3470 case TargetOpcode::G_FMAXNUM_IEEE:
3471 case TargetOpcode::G_FMINIMUM:
3472 case TargetOpcode::G_FMAXIMUM: {
3473 Observer.changingInstr(MI);
3474 moreElementsVectorSrc(MI, MoreTy, 1);
3475 moreElementsVectorSrc(MI, MoreTy, 2);
3476 moreElementsVectorDst(MI, MoreTy, 0);
3477 Observer.changedInstr(MI);
3478 return Legalized;
3479 }
3480 case TargetOpcode::G_EXTRACT:
3481 if (TypeIdx != 1)
3482 return UnableToLegalize;
3483 Observer.changingInstr(MI);
3484 moreElementsVectorSrc(MI, MoreTy, 1);
3485 Observer.changedInstr(MI);
3486 return Legalized;
3487 case TargetOpcode::G_INSERT:
3488 if (TypeIdx != 0)
3489 return UnableToLegalize;
3490 Observer.changingInstr(MI);
3491 moreElementsVectorSrc(MI, MoreTy, 1);
3492 moreElementsVectorDst(MI, MoreTy, 0);
3493 Observer.changedInstr(MI);
3494 return Legalized;
3495 case TargetOpcode::G_SELECT:
3496 if (TypeIdx != 0)
3497 return UnableToLegalize;
3498 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3499 return UnableToLegalize;
3500
3501 Observer.changingInstr(MI);
3502 moreElementsVectorSrc(MI, MoreTy, 2);
3503 moreElementsVectorSrc(MI, MoreTy, 3);
3504 moreElementsVectorDst(MI, MoreTy, 0);
3505 Observer.changedInstr(MI);
3506 return Legalized;
3507 case TargetOpcode::G_UNMERGE_VALUES: {
3508 if (TypeIdx != 1)
3509 return UnableToLegalize;
3510
3511 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3512 int NumDst = MI.getNumOperands() - 1;
3513 moreElementsVectorSrc(MI, MoreTy, NumDst);
3514
3515 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3516 for (int I = 0; I != NumDst; ++I)
3517 MIB.addDef(MI.getOperand(I).getReg());
3518
3519 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3520 for (int I = NumDst; I != NewNumDst; ++I)
3521 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3522
3523 MIB.addUse(MI.getOperand(NumDst).getReg());
3524 MI.eraseFromParent();
3525 return Legalized;
3526 }
3527 case TargetOpcode::G_PHI:
3528 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3529 default:
3530 return UnableToLegalize;
3531 }
3532}
3533
3534void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3535 ArrayRef<Register> Src1Regs,
3536 ArrayRef<Register> Src2Regs,
3537 LLT NarrowTy) {
3538 MachineIRBuilder &B = MIRBuilder;
3539 unsigned SrcParts = Src1Regs.size();
3540 unsigned DstParts = DstRegs.size();
3541
3542 unsigned DstIdx = 0; // Low bits of the result.
3543 Register FactorSum =
3544 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3545 DstRegs[DstIdx] = FactorSum;
3546
3547 unsigned CarrySumPrevDstIdx;
3548 SmallVector<Register, 4> Factors;
3549
3550 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3551 // Collect low parts of muls for DstIdx.
3552 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3553 i <= std::min(DstIdx, SrcParts - 1); ++i) {
3554 MachineInstrBuilder Mul =
3555 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3556 Factors.push_back(Mul.getReg(0));
3557 }
3558 // Collect high parts of muls from previous DstIdx.
3559 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3560 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3561 MachineInstrBuilder Umulh =
3562 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3563 Factors.push_back(Umulh.getReg(0));
3564 }
3565 // Add CarrySum from additions calculated for previous DstIdx.
3566 if (DstIdx != 1) {
3567 Factors.push_back(CarrySumPrevDstIdx);
3568 }
3569
3570 Register CarrySum;
3571 // Add all factors and accumulate all carries into CarrySum.
3572 if (DstIdx != DstParts - 1) {
3573 MachineInstrBuilder Uaddo =
3574 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3575 FactorSum = Uaddo.getReg(0);
3576 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3577 for (unsigned i = 2; i < Factors.size(); ++i) {
3578 MachineInstrBuilder Uaddo =
3579 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3580 FactorSum = Uaddo.getReg(0);
3581 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3582 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3583 }
3584 } else {
3585 // Since value for the next index is not calculated, neither is CarrySum.
3586 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3587 for (unsigned i = 2; i < Factors.size(); ++i)
3588 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3589 }
3590
3591 CarrySumPrevDstIdx = CarrySum;
3592 DstRegs[DstIdx] = FactorSum;
3593 Factors.clear();
3594 }
3595}
3596
3597LegalizerHelper::LegalizeResult
3598LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3599 Register DstReg = MI.getOperand(0).getReg();
3600 Register Src1 = MI.getOperand(1).getReg();
3601 Register Src2 = MI.getOperand(2).getReg();
3602
3603 LLT Ty = MRI.getType(DstReg);
3604 if (Ty.isVector())
3605 return UnableToLegalize;
3606
3607 unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3608 unsigned DstSize = Ty.getSizeInBits();
3609 unsigned NarrowSize = NarrowTy.getSizeInBits();
3610 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3611 return UnableToLegalize;
3612
3613 unsigned NumDstParts = DstSize / NarrowSize;
3614 unsigned NumSrcParts = SrcSize / NarrowSize;
3615 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3616 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3617
3618 SmallVector<Register, 2> Src1Parts, Src2Parts;
3619 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
3620 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3621 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3622 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3623
3624 // Take only high half of registers if this is high mul.
3625 ArrayRef<Register> DstRegs(
3626 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3627 MIRBuilder.buildMerge(DstReg, DstRegs);
3628 MI.eraseFromParent();
3629 return Legalized;
3630}
3631
3632LegalizerHelper::LegalizeResult
3633LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3634 LLT NarrowTy) {
3635 if (TypeIdx != 1)
3636 return UnableToLegalize;
3637
3638 uint64_t NarrowSize = NarrowTy.getSizeInBits();
3639
3640 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3641 // FIXME: add support for when SizeOp1 isn't an exact multiple of
3642 // NarrowSize.
3643 if (SizeOp1 % NarrowSize != 0)
3644 return UnableToLegalize;
3645 int NumParts = SizeOp1 / NarrowSize;
3646
3647 SmallVector<Register, 2> SrcRegs, DstRegs;
3648 SmallVector<uint64_t, 2> Indexes;
3649 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3650
3651 Register OpReg = MI.getOperand(0).getReg();
3652 uint64_t OpStart = MI.getOperand(2).getImm();
3653 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3654 for (int i = 0; i < NumParts; ++i) {
3655 unsigned SrcStart = i * NarrowSize;
3656
3657 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3658 // No part of the extract uses this subregister, ignore it.
3659 continue;
3660 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3661 // The entire subregister is extracted, forward the value.
3662 DstRegs.push_back(SrcRegs[i]);
3663 continue;
3664 }
3665
3666 // OpSegStart is where this destination segment would start in OpReg if it
3667 // extended infinitely in both directions.
3668 int64_t ExtractOffset;
3669 uint64_t SegSize;
3670 if (OpStart < SrcStart) {
3671 ExtractOffset = 0;
3672 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3673 } else {
3674 ExtractOffset = OpStart - SrcStart;
3675 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3676 }
3677
3678 Register SegReg = SrcRegs[i];
3679 if (ExtractOffset != 0 || SegSize != NarrowSize) {
3680 // A genuine extract is needed.
3681 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3682 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3683 }
3684
3685 DstRegs.push_back(SegReg);
3686 }
3687
3688 Register DstReg = MI.getOperand(0).getReg();
3689 if(MRI.getType(DstReg).isVector())
3690 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3691 else
3692 MIRBuilder.buildMerge(DstReg, DstRegs);
3693 MI.eraseFromParent();
3694 return Legalized;
3695}
3696
3697LegalizerHelper::LegalizeResult
3698LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
3699 LLT NarrowTy) {
3700 // FIXME: Don't know how to handle secondary types yet.
3701 if (TypeIdx != 0)
3702 return UnableToLegalize;
3703
3704 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3705 uint64_t NarrowSize = NarrowTy.getSizeInBits();
3706
3707 // FIXME: add support for when SizeOp0 isn't an exact multiple of
3708 // NarrowSize.
3709 if (SizeOp0 % NarrowSize != 0)
3710 return UnableToLegalize;
3711
3712 int NumParts = SizeOp0 / NarrowSize;
3713
3714 SmallVector<Register, 2> SrcRegs, DstRegs;
3715 SmallVector<uint64_t, 2> Indexes;
3716 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3717
3718 Register OpReg = MI.getOperand(2).getReg();
3719 uint64_t OpStart = MI.getOperand(3).getImm();
3720 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3721 for (int i = 0; i < NumParts; ++i) {
3722 unsigned DstStart = i * NarrowSize;
3723
3724 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3725 // No part of the insert affects this subregister, forward the original.
3726 DstRegs.push_back(SrcRegs[i]);
3727 continue;
3728 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3729 // The entire subregister is defined by this insert, forward the new
3730 // value.
3731 DstRegs.push_back(OpReg);
3732 continue;
3733 }
3734
3735 // OpSegStart is where this destination segment would start in OpReg if it
3736 // extended infinitely in both directions.
3737 int64_t ExtractOffset, InsertOffset;
3738 uint64_t SegSize;
3739 if (OpStart < DstStart) {
3740 InsertOffset = 0;
3741 ExtractOffset = DstStart - OpStart;
3742 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3743 } else {
3744 InsertOffset = OpStart - DstStart;
3745 ExtractOffset = 0;
3746 SegSize =
3747 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3748 }
3749
3750 Register SegReg = OpReg;
3751 if (ExtractOffset != 0 || SegSize != OpSize) {
3752 // A genuine extract is needed.
3753 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3754 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3755 }
3756
3757 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3758 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3759 DstRegs.push_back(DstReg);
3760 }
3761
3762 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered")((DstRegs.size() == (unsigned)NumParts && "not all parts covered"
) ? static_cast<void> (0) : __assert_fail ("DstRegs.size() == (unsigned)NumParts && \"not all parts covered\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3762, __PRETTY_FUNCTION__))
;
3763 Register DstReg = MI.getOperand(0).getReg();
3764 if(MRI.getType(DstReg).isVector())
3765 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3766 else
3767 MIRBuilder.buildMerge(DstReg, DstRegs);
3768 MI.eraseFromParent();
3769 return Legalized;
3770}
3771
3772LegalizerHelper::LegalizeResult
3773LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3774 LLT NarrowTy) {
3775 Register DstReg = MI.getOperand(0).getReg();
3776 LLT DstTy = MRI.getType(DstReg);
3777
3778 assert(MI.getNumOperands() == 3 && TypeIdx == 0)((MI.getNumOperands() == 3 && TypeIdx == 0) ? static_cast
<void> (0) : __assert_fail ("MI.getNumOperands() == 3 && TypeIdx == 0"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3778, __PRETTY_FUNCTION__))
;
3779
3780 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3781 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3782 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3783 LLT LeftoverTy;
3784 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3785 Src0Regs, Src0LeftoverRegs))
3786 return UnableToLegalize;
3787
3788 LLT Unused;
3789 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3790 Src1Regs, Src1LeftoverRegs))
3791 llvm_unreachable("inconsistent extractParts result")::llvm::llvm_unreachable_internal("inconsistent extractParts result"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3791)
;
3792
3793 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3794 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3795 {Src0Regs[I], Src1Regs[I]});
3796 DstRegs.push_back(Inst.getReg(0));
3797 }
3798
3799 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3800 auto Inst = MIRBuilder.buildInstr(
3801 MI.getOpcode(),
3802 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3803 DstLeftoverRegs.push_back(Inst.getReg(0));
3804 }
3805
3806 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3807 LeftoverTy, DstLeftoverRegs);
3808
3809 MI.eraseFromParent();
3810 return Legalized;
3811}
3812
3813LegalizerHelper::LegalizeResult
3814LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
3815 LLT NarrowTy) {
3816 if (TypeIdx != 0)
3817 return UnableToLegalize;
3818
3819 Register DstReg = MI.getOperand(0).getReg();
3820 Register SrcReg = MI.getOperand(1).getReg();
3821
3822 LLT DstTy = MRI.getType(DstReg);
3823 if (DstTy.isVector())
3824 return UnableToLegalize;
3825
3826 SmallVector<Register, 8> Parts;
3827 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3828 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
3829 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3830
3831 MI.eraseFromParent();
3832 return Legalized;
3833}
3834
3835LegalizerHelper::LegalizeResult
3836LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
3837 LLT NarrowTy) {
3838 if (TypeIdx != 0)
3839 return UnableToLegalize;
3840
3841 Register CondReg = MI.getOperand(1).getReg();
3842 LLT CondTy = MRI.getType(CondReg);
3843 if (CondTy.isVector()) // TODO: Handle vselect
3844 return UnableToLegalize;
3845
3846 Register DstReg = MI.getOperand(0).getReg();
3847 LLT DstTy = MRI.getType(DstReg);
3848
3849 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3850 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3851 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3852 LLT LeftoverTy;
3853 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3854 Src1Regs, Src1LeftoverRegs))
3855 return UnableToLegalize;
3856
3857 LLT Unused;
3858 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3859 Src2Regs, Src2LeftoverRegs))
3860 llvm_unreachable("inconsistent extractParts result")::llvm::llvm_unreachable_internal("inconsistent extractParts result"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3860)
;
3861
3862 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3863 auto Select = MIRBuilder.buildSelect(NarrowTy,
3864 CondReg, Src1Regs[I], Src2Regs[I]);
3865 DstRegs.push_back(Select.getReg(0));
3866 }
3867
3868 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3869 auto Select = MIRBuilder.buildSelect(
3870 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3871 DstLeftoverRegs.push_back(Select.getReg(0));
3872 }
3873
3874 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3875 LeftoverTy, DstLeftoverRegs);
3876
3877 MI.eraseFromParent();
3878 return Legalized;
3879}
3880
3881LegalizerHelper::LegalizeResult
3882LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
3883 LLT NarrowTy) {
3884 if (TypeIdx != 1)
3885 return UnableToLegalize;
3886
3887 Register DstReg = MI.getOperand(0).getReg();
3888 Register SrcReg = MI.getOperand(1).getReg();
3889 LLT DstTy = MRI.getType(DstReg);
3890 LLT SrcTy = MRI.getType(SrcReg);
3891 unsigned NarrowSize = NarrowTy.getSizeInBits();
3892
3893 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
3894 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
3895
3896 MachineIRBuilder &B = MIRBuilder;
3897 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
3898 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
3899 auto C_0 = B.buildConstant(NarrowTy, 0);
3900 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3901 UnmergeSrc.getReg(1), C_0);
3902 auto LoCTLZ = IsUndef ?
3903 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
3904 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
3905 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
3906 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
3907 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
3908 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
3909
3910 MI.eraseFromParent();
3911 return Legalized;
3912 }
3913
3914 return UnableToLegalize;
3915}
3916
3917LegalizerHelper::LegalizeResult
3918LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
3919 LLT NarrowTy) {
3920 if (TypeIdx != 1)
3921 return UnableToLegalize;
3922
3923 Register DstReg = MI.getOperand(0).getReg();
3924 Register SrcReg = MI.getOperand(1).getReg();
3925 LLT DstTy = MRI.getType(DstReg);
3926 LLT SrcTy = MRI.getType(SrcReg);
3927 unsigned NarrowSize = NarrowTy.getSizeInBits();
3928
3929 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
3930 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
3931
3932 MachineIRBuilder &B = MIRBuilder;
3933 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
3934 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
3935 auto C_0 = B.buildConstant(NarrowTy, 0);
3936 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3937 UnmergeSrc.getReg(0), C_0);
3938 auto HiCTTZ = IsUndef ?
3939 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
3940 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
3941 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
3942 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
3943 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
3944 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
3945
3946 MI.eraseFromParent();
3947 return Legalized;
3948 }
3949
3950 return UnableToLegalize;
3951}
3952
3953LegalizerHelper::LegalizeResult
3954LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
3955 LLT NarrowTy) {
3956 if (TypeIdx != 1)
3957 return UnableToLegalize;
3958
3959 Register DstReg = MI.getOperand(0).getReg();
3960 LLT DstTy = MRI.getType(DstReg);
3961 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3962 unsigned NarrowSize = NarrowTy.getSizeInBits();
3963
3964 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
3965 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
3966
3967 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
3968 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
3969 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
3970
3971 MI.eraseFromParent();
3972 return Legalized;
3973 }
3974
3975 return UnableToLegalize;
3976}
3977
3978LegalizerHelper::LegalizeResult
3979LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3980 unsigned Opc = MI.getOpcode();
3981 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3982 auto isSupported = [this](const LegalityQuery &Q) {
3983 auto QAction = LI.getAction(Q).Action;
3984 return QAction == Legal || QAction == Libcall || QAction == Custom;
3985 };
3986 switch (Opc) {
3987 default:
3988 return UnableToLegalize;
3989 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3990 // This trivially expands to CTLZ.
3991 Observer.changingInstr(MI);
3992 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3993 Observer.changedInstr(MI);
3994 return Legalized;
3995 }
3996 case TargetOpcode::G_CTLZ: {
3997 Register DstReg = MI.getOperand(0).getReg();
3998 Register SrcReg = MI.getOperand(1).getReg();
3999 LLT DstTy = MRI.getType(DstReg);
4000 LLT SrcTy = MRI.getType(SrcReg);
4001 unsigned Len = SrcTy.getSizeInBits();
4002
4003 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4004 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
4005 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
4006 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
4007 auto ICmp = MIRBuilder.buildICmp(
4008 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
4009 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4010 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
4011 MI.eraseFromParent();
4012 return Legalized;
4013 }
4014 // for now, we do this:
4015 // NewLen = NextPowerOf2(Len);
4016 // x = x | (x >> 1);
4017 // x = x | (x >> 2);
4018 // ...
4019 // x = x | (x >>16);
4020 // x = x | (x >>32); // for 64-bit input
4021 // Upto NewLen/2
4022 // return Len - popcount(x);
4023 //
4024 // Ref: "Hacker's Delight" by Henry Warren
4025 Register Op = SrcReg;
4026 unsigned NewLen = PowerOf2Ceil(Len);
4027 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
4028 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
4029 auto MIBOp = MIRBuilder.buildOr(
4030 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
4031 Op = MIBOp.getReg(0);
4032 }
4033 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
4034 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
4035 MIBPop);
4036 MI.eraseFromParent();
4037 return Legalized;
4038 }
4039 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
4040 // This trivially expands to CTTZ.
4041 Observer.changingInstr(MI);
4042 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
4043 Observer.changedInstr(MI);
4044 return Legalized;
4045 }
4046 case TargetOpcode::G_CTTZ: {
4047 Register DstReg = MI.getOperand(0).getReg();
4048 Register SrcReg = MI.getOperand(1).getReg();
4049 LLT DstTy = MRI.getType(DstReg);
4050 LLT SrcTy = MRI.getType(SrcReg);
4051
4052 unsigned Len = SrcTy.getSizeInBits();
4053 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4054 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
4055 // zero.
4056 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
4057 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
4058 auto ICmp = MIRBuilder.buildICmp(
4059 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
4060 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4061 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
4062 MI.eraseFromParent();
4063 return Legalized;
4064 }
4065 // for now, we use: { return popcount(~x & (x - 1)); }
4066 // unless the target has ctlz but not ctpop, in which case we use:
4067 // { return 32 - nlz(~x & (x-1)); }
4068 // Ref: "Hacker's Delight" by Henry Warren
4069 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
4070 auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1);
4071 auto MIBTmp = MIRBuilder.buildAnd(
4072 Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1));
4073 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
4074 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
4075 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
4076 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
4077 MIRBuilder.buildCTLZ(Ty, MIBTmp));
4078 MI.eraseFromParent();
4079 return Legalized;
4080 }
4081 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
4082 MI.getOperand(1).setReg(MIBTmp.getReg(0));
4083 return Legalized;
4084 }
4085 case TargetOpcode::G_CTPOP: {
4086 unsigned Size = Ty.getSizeInBits();
4087 MachineIRBuilder &B = MIRBuilder;
4088
4089 // Count set bits in blocks of 2 bits. Default approach would be
4090 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
4091 // We use following formula instead:
4092 // B2Count = val - { (val >> 1) & 0x55555555 }
4093 // since it gives same result in blocks of 2 with one instruction less.
4094 auto C_1 = B.buildConstant(Ty, 1);
4095 auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1);
4096 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
4097 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
4098 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
4099 auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi);
4100
4101 // In order to get count in blocks of 4 add values from adjacent block of 2.
4102 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
4103 auto C_2 = B.buildConstant(Ty, 2);
4104 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
4105 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
4106 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
4107 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
4108 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
4109 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
4110
4111 // For count in blocks of 8 bits we don't have to mask high 4 bits before
4112 // addition since count value sits in range {0,...,8} and 4 bits are enough
4113 // to hold such binary values. After addition high 4 bits still hold count
4114 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
4115 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
4116 auto C_4 = B.buildConstant(Ty, 4);
4117 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
4118 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
4119 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
4120 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
4121 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
4122
4123 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm")((Size<=128 && "Scalar size is too large for CTPOP lower algorithm"
) ? static_cast<void> (0) : __assert_fail ("Size<=128 && \"Scalar size is too large for CTPOP lower algorithm\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4123, __PRETTY_FUNCTION__))
;
4124 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
4125 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
4126 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
4127 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
4128
4129 // Shift count result from 8 high bits to low bits.
4130 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
4131 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
4132
4133 MI.eraseFromParent();
4134 return Legalized;
4135 }
4136 }
4137}
4138
4139// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
4140// representation.
4141LegalizerHelper::LegalizeResult
4142LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
4143 Register Dst = MI.getOperand(0).getReg();
4144 Register Src = MI.getOperand(1).getReg();
4145 const LLT S64 = LLT::scalar(64);
4146 const LLT S32 = LLT::scalar(32);
4147 const LLT S1 = LLT::scalar(1);
4148
4149 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32)((MRI.getType(Src) == S64 && MRI.getType(Dst) == S32)
? static_cast<void> (0) : __assert_fail ("MRI.getType(Src) == S64 && MRI.getType(Dst) == S32"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4149, __PRETTY_FUNCTION__))
;
4150
4151 // unsigned cul2f(ulong u) {
4152 // uint lz = clz(u);
4153 // uint e = (u != 0) ? 127U + 63U - lz : 0;
4154 // u = (u << lz) & 0x7fffffffffffffffUL;
4155 // ulong t = u & 0xffffffffffUL;
4156 // uint v = (e << 23) | (uint)(u >> 40);
4157 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
4158 // return as_float(v + r);
4159 // }
4160
4161 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
4162 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
4163
4164 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
4165
4166 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
4167 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
4168
4169 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
4170 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
4171
4172 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
4173 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
4174
4175 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
4176
4177 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
4178 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
4179
4180 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
4181 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
4182 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
4183
4184 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
4185 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
4186 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
4187 auto One = MIRBuilder.buildConstant(S32, 1);
4188
4189 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
4190 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
4191 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
4192 MIRBuilder.buildAdd(Dst, V, R);
4193
4194 return Legalized;
4195}
4196
4197LegalizerHelper::LegalizeResult
4198LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4199 Register Dst = MI.getOperand(0).getReg();
4200 Register Src = MI.getOperand(1).getReg();
4201 LLT DstTy = MRI.getType(Dst);
4202 LLT SrcTy = MRI.getType(Src);
4203
4204 if (SrcTy == LLT::scalar(1)) {
4205 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
4206 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
4207 MIRBuilder.buildSelect(Dst, Src, True, False);
4208 MI.eraseFromParent();
4209 return Legalized;
4210 }
4211
4212 if (SrcTy != LLT::scalar(64))
4213 return UnableToLegalize;
4214
4215 if (DstTy == LLT::scalar(32)) {
4216 // TODO: SelectionDAG has several alternative expansions to port which may
4217 // be more reasonble depending on the available instructions. If a target
4218 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
4219 // intermediate type, this is probably worse.
4220 return lowerU64ToF32BitOps(MI);
4221 }
4222
4223 return UnableToLegalize;
4224}
4225
4226LegalizerHelper::LegalizeResult
4227LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4228 Register Dst = MI.getOperand(0).getReg();
4229 Register Src = MI.getOperand(1).getReg();
4230 LLT DstTy = MRI.getType(Dst);
4231 LLT SrcTy = MRI.getType(Src);
4232
4233 const LLT S64 = LLT::scalar(64);
4234 const LLT S32 = LLT::scalar(32);
4235 const LLT S1 = LLT::scalar(1);
4236
4237 if (SrcTy == S1) {
4238 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
4239 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
4240 MIRBuilder.buildSelect(Dst, Src, True, False);
4241 MI.eraseFromParent();
4242 return Legalized;
4243 }
4244
4245 if (SrcTy != S64)
4246 return UnableToLegalize;
4247
4248 if (DstTy == S32) {
4249 // signed cl2f(long l) {
4250 // long s = l >> 63;
4251 // float r = cul2f((l + s) ^ s);
4252 // return s ? -r : r;
4253 // }
4254 Register L = Src;
4255 auto SignBit = MIRBuilder.buildConstant(S64, 63);
4256 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
4257
4258 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
4259 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
4260 auto R = MIRBuilder.buildUITOFP(S32, Xor);
4261
4262 auto RNeg = MIRBuilder.buildFNeg(S32, R);
4263 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
4264 MIRBuilder.buildConstant(S64, 0));
4265 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
4266 return Legalized;
4267 }
4268
4269 return UnableToLegalize;
4270}
4271
4272LegalizerHelper::LegalizeResult
4273LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4274 Register Dst = MI.getOperand(0).getReg();
4275 Register Src = MI.getOperand(1).getReg();
4276 LLT DstTy = MRI.getType(Dst);
4277 LLT SrcTy = MRI.getType(Src);
4278 const LLT S64 = LLT::scalar(64);
4279 const LLT S32 = LLT::scalar(32);
4280
4281 if (SrcTy != S64 && SrcTy != S32)
4282 return UnableToLegalize;
4283 if (DstTy != S32 && DstTy != S64)
4284 return UnableToLegalize;
4285
4286 // FPTOSI gives same result as FPTOUI for positive signed integers.
4287 // FPTOUI needs to deal with fp values that convert to unsigned integers
4288 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
4289
4290 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
4291 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
4292 : APFloat::IEEEdouble(),
4293 APInt::getNullValue(SrcTy.getSizeInBits()));
4294 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
4295
4296 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
4297
4298 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
4299 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
4300 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
4301 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
4302 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
4303 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
4304 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
4305
4306 const LLT S1 = LLT::scalar(1);
4307
4308 MachineInstrBuilder FCMP =
4309 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
4310 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
4311
4312 MI.eraseFromParent();
4313 return Legalized;
4314}
4315
4316LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
4317 Register Dst = MI.getOperand(0).getReg();
4318 Register Src = MI.getOperand(1).getReg();
4319 LLT DstTy = MRI.getType(Dst);
4320 LLT SrcTy = MRI.getType(Src);
4321 const LLT S64 = LLT::scalar(64);
4322 const LLT S32 = LLT::scalar(32);
4323
4324 // FIXME: Only f32 to i64 conversions are supported.
4325 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
4326 return UnableToLegalize;
4327
4328 // Expand f32 -> i64 conversion
4329 // This algorithm comes from compiler-rt's implementation of fixsfdi:
4330 // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
4331
4332 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
4333
4334 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
4335 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
4336
4337 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
4338 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
4339
4340 auto SignMask = MIRBuilder.buildConstant(SrcTy,
4341 APInt::getSignMask(SrcEltBits));
4342 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
4343 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
4344 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
4345 Sign = MIRBuilder.buildSExt(DstTy, Sign);
4346
4347 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
4348 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
4349 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
4350
4351 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
4352 R = MIRBuilder.buildZExt(DstTy, R);
4353
4354 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
4355 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
4356 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
4357 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
4358
4359 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
4360 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
4361
4362 const LLT S1 = LLT::scalar(1);
4363 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
4364 S1, Exponent, ExponentLoBit);
4365
4366 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
4367
4368 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
4369 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
4370
4371 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
4372
4373 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
4374 S1, Exponent, ZeroSrcTy);
4375
4376 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
4377 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
4378
4379 MI.eraseFromParent();
4380 return Legalized;
4381}
4382
4383// f64 -> f16 conversion using round-to-nearest-even rounding mode.
4384LegalizerHelper::LegalizeResult
4385LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
4386 Register Dst = MI.getOperand(0).getReg();
4387 Register Src = MI.getOperand(1).getReg();
4388
4389 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
4390 return UnableToLegalize;
4391
4392 const unsigned ExpMask = 0x7ff;
4393 const unsigned ExpBiasf64 = 1023;
4394 const unsigned ExpBiasf16 = 15;
4395 const LLT S32 = LLT::scalar(32);
4396 const LLT S1 = LLT::scalar(1);
4397
4398 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
4399 Register U = Unmerge.getReg(0);
4400 Register UH = Unmerge.getReg(1);
4401
4402 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
4403
4404 // Subtract the fp64 exponent bias (1023) to get the real exponent and
4405 // add the f16 bias (15) to get the biased exponent for the f16 format.
4406 E = MIRBuilder.buildAdd(
4407 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
4408 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
4409
4410 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
4411 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
4412
4413 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
4414 MIRBuilder.buildConstant(S32, 0x1ff));
4415 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
4416
4417 auto Zero = MIRBuilder.buildConstant(S32, 0);
4418 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
4419 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
4420 M = MIRBuilder.buildOr(S32, M, Lo40Set);
4421
4422 // (M != 0 ? 0x0200 : 0) | 0x7c00;
4423 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
4424 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
4425 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
4426
4427 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
4428 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
4429
4430 // N = M | (E << 12);
4431 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
4432 auto N = MIRBuilder.buildOr(S32, M, EShl12);
4433
4434 // B = clamp(1-E, 0, 13);
4435 auto One = MIRBuilder.buildConstant(S32, 1);
4436 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
4437 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
4438 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
4439
4440 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
4441 MIRBuilder.buildConstant(S32, 0x1000));
4442
4443 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
4444 auto D0 = MIRBuilder.buildShl(S32, D, B);
4445
4446 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
4447 D0, SigSetHigh);
4448 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
4449 D = MIRBuilder.buildOr(S32, D, D1);
4450
4451 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
4452 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
4453
4454 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
4455 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
4456
4457 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
4458 MIRBuilder.buildConstant(S32, 3));
4459 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
4460
4461 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
4462 MIRBuilder.buildConstant(S32, 5));
4463 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
4464
4465 V1 = MIRBuilder.buildOr(S32, V0, V1);
4466 V = MIRBuilder.buildAdd(S32, V, V1);
4467
4468 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
4469 E, MIRBuilder.buildConstant(S32, 30));
4470 V = MIRBuilder.buildSelect(S32, CmpEGt30,
4471 MIRBuilder.buildConstant(S32, 0x7c00), V);
4472
4473 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
4474 E, MIRBuilder.buildConstant(S32, 1039));
4475 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
4476
4477 // Extract the sign bit.
4478 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
4479 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
4480
4481 // Insert the sign bit
4482 V = MIRBuilder.buildOr(S32, Sign, V);
4483
4484 MIRBuilder.buildTrunc(Dst, V);
4485 MI.eraseFromParent();
4486 return Legalized;
4487}
4488
4489LegalizerHelper::LegalizeResult
4490LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4491 Register Dst = MI.getOperand(0).getReg();
4492 Register Src = MI.getOperand(1).getReg();
4493
4494 LLT DstTy = MRI.getType(Dst);
4495 LLT SrcTy = MRI.getType(Src);
4496 const LLT S64 = LLT::scalar(64);
4497 const LLT S16 = LLT::scalar(16);
4498
4499 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
4500 return lowerFPTRUNC_F64_TO_F16(MI);
4501
4502 return UnableToLegalize;
4503}
4504
4505static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
4506 switch (Opc) {
4507 case TargetOpcode::G_SMIN:
4508 return CmpInst::ICMP_SLT;
4509 case TargetOpcode::G_SMAX:
4510 return CmpInst::ICMP_SGT;
4511 case TargetOpcode::G_UMIN:
4512 return CmpInst::ICMP_ULT;
4513 case TargetOpcode::G_UMAX:
4514 return CmpInst::ICMP_UGT;
4515 default:
4516 llvm_unreachable("not in integer min/max")::llvm::llvm_unreachable_internal("not in integer min/max", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4516)
;
4517 }
4518}
4519
4520LegalizerHelper::LegalizeResult
4521LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4522 Register Dst = MI.getOperand(0).getReg();
4523 Register Src0 = MI.getOperand(1).getReg();
4524 Register Src1 = MI.getOperand(2).getReg();
4525
4526 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
4527 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
4528
4529 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
4530 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
4531
4532 MI.eraseFromParent();
4533 return Legalized;
4534}
4535
4536LegalizerHelper::LegalizeResult
4537LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4538 Register Dst = MI.getOperand(0).getReg();
4539 Register Src0 = MI.getOperand(1).getReg();
4540 Register Src1 = MI.getOperand(2).getReg();
4541
4542 const LLT Src0Ty = MRI.getType(Src0);
4543 const LLT Src1Ty = MRI.getType(Src1);
4544
4545 const int Src0Size = Src0Ty.getScalarSizeInBits();
4546 const int Src1Size = Src1Ty.getScalarSizeInBits();
4547
4548 auto SignBitMask = MIRBuilder.buildConstant(
4549 Src0Ty, APInt::getSignMask(Src0Size));
4550
4551 auto NotSignBitMask = MIRBuilder.buildConstant(
4552 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
4553
4554 auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
4555 MachineInstr *Or;
4556
4557 if (Src0Ty == Src1Ty) {
4558 auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
4559 Or = MIRBuilder.buildOr(Dst, And0, And1);
4560 } else if (Src0Size > Src1Size) {
4561 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
4562 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
4563 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
4564 auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
4565 Or = MIRBuilder.buildOr(Dst, And0, And1);
4566 } else {
4567 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
4568 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
4569 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
4570 auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
4571 Or = MIRBuilder.buildOr(Dst, And0, And1);
4572 }
4573
4574 // Be careful about setting nsz/nnan/ninf on every instruction, since the
4575 // constants are a nan and -0.0, but the final result should preserve
4576 // everything.
4577 if (unsigned Flags = MI.getFlags())
4578 Or->setFlags(Flags);
4579
4580 MI.eraseFromParent();
4581 return Legalized;
4582}
4583
4584LegalizerHelper::LegalizeResult
4585LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
4586 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
4587 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
4588
4589 Register Dst = MI.getOperand(0).getReg();
4590 Register Src0 = MI.getOperand(1).getReg();
4591 Register Src1 = MI.getOperand(2).getReg();
4592 LLT Ty = MRI.getType(Dst);
4593
4594 if (!MI.getFlag(MachineInstr::FmNoNans)) {
4595 // Insert canonicalizes if it's possible we need to quiet to get correct
4596 // sNaN behavior.
4597
4598 // Note this must be done here, and not as an optimization combine in the
4599 // absence of a dedicate quiet-snan instruction as we're using an
4600 // omni-purpose G_FCANONICALIZE.
4601 if (!isKnownNeverSNaN(Src0, MRI))
4602 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
4603
4604 if (!isKnownNeverSNaN(Src1, MRI))
4605 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
4606 }
4607
4608 // If there are no nans, it's safe to simply replace this with the non-IEEE
4609 // version.
4610 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
4611 MI.eraseFromParent();
4612 return Legalized;
4613}
4614
4615LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
4616 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4617 Register DstReg = MI.getOperand(0).getReg();
4618 LLT Ty = MRI.getType(DstReg);
4619 unsigned Flags = MI.getFlags();
4620
4621 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
4622 Flags);
4623 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
4624 MI.eraseFromParent();
4625 return Legalized;
4626}
4627
4628LegalizerHelper::LegalizeResult
4629LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
4630 Register DstReg = MI.getOperand(0).getReg();
4631 Register SrcReg = MI.getOperand(1).getReg();
4632 unsigned Flags = MI.getFlags();
4633 LLT Ty = MRI.getType(DstReg);
4634 const LLT CondTy = Ty.changeElementSize(1);
4635
4636 // result = trunc(src);
4637 // if (src < 0.0 && src != result)
4638 // result += -1.0.
4639
4640 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
4641 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
4642
4643 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
4644 SrcReg, Zero, Flags);
4645 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
4646 SrcReg, Trunc, Flags);
4647 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
4648 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
4649
4650 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
4651 MI.eraseFromParent();
4652 return Legalized;
4653}
4654
4655LegalizerHelper::LegalizeResult
4656LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
4657 const unsigned NumDst = MI.getNumOperands() - 1;
4658 const Register SrcReg = MI.getOperand(NumDst).getReg();
4659 LLT SrcTy = MRI.getType(SrcReg);
4660
4661 Register Dst0Reg = MI.getOperand(0).getReg();
4662 LLT DstTy = MRI.getType(Dst0Reg);
4663
4664
4665 // Expand scalarizing unmerge as bitcast to integer and shift.
4666 if (!DstTy.isVector() && SrcTy.isVector() &&
4667 SrcTy.getElementType() == DstTy) {
4668 LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
4669 Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
4670
4671 MIRBuilder.buildTrunc(Dst0Reg, Cast);
4672
4673 const unsigned DstSize = DstTy.getSizeInBits();
4674 unsigned Offset = DstSize;
4675 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
4676 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
4677 auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
4678 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
4679 }
4680
4681 MI.eraseFromParent();
4682 return Legalized;
4683 }
4684
4685 return UnableToLegalize;
4686}
4687
4688LegalizerHelper::LegalizeResult
4689LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
4690 Register DstReg = MI.getOperand(0).getReg();
4691 Register Src0Reg = MI.getOperand(1).getReg();
4692 Register Src1Reg = MI.getOperand(2).getReg();
4693 LLT Src0Ty = MRI.getType(Src0Reg);
4694 LLT DstTy = MRI.getType(DstReg);
4695 LLT IdxTy = LLT::scalar(32);
4696
4697 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4698
4699 if (DstTy.isScalar()) {
4700 if (Src0Ty.isVector())
4701 return UnableToLegalize;
4702
4703 // This is just a SELECT.
4704 assert(Mask.size() == 1 && "Expected a single mask element")((Mask.size() == 1 && "Expected a single mask element"
) ? static_cast<void> (0) : __assert_fail ("Mask.size() == 1 && \"Expected a single mask element\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4704, __PRETTY_FUNCTION__))
;
4705 Register Val;
4706 if (Mask[0] < 0 || Mask[0] > 1)
4707 Val = MIRBuilder.buildUndef(DstTy).getReg(0);
4708 else
4709 Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
4710 MIRBuilder.buildCopy(DstReg, Val);
4711 MI.eraseFromParent();
4712 return Legalized;
4713 }
4714
4715 Register Undef;
4716 SmallVector<Register, 32> BuildVec;
4717 LLT EltTy = DstTy.getElementType();
4718
4719 for (int Idx : Mask) {
4720 if (Idx < 0) {
4721 if (!Undef.isValid())
4722 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
4723 BuildVec.push_back(Undef);
4724 continue;
4725 }
4726
4727 if (Src0Ty.isScalar()) {
4728 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
4729 } else {
4730 int NumElts = Src0Ty.getNumElements();
4731 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
4732 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
4733 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
4734 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
4735 BuildVec.push_back(Extract.getReg(0));
4736 }
4737 }
4738
4739 MIRBuilder.buildBuildVector(DstReg, BuildVec);
4740 MI.eraseFromParent();
4741 return Legalized;
4742}
4743
4744LegalizerHelper::LegalizeResult
4745LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
4746 Register Dst = MI.getOperand(0).getReg();
4747 Register AllocSize = MI.getOperand(1).getReg();
4748 unsigned Align = MI.getOperand(2).getImm();
4749
4750 const auto &MF = *MI.getMF();
4751 const auto &TLI = *MF.getSubtarget().getTargetLowering();
4752
4753 LLT PtrTy = MRI.getType(Dst);
4754 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
4755
4756 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
4757 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
4758 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
4759
4760 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4761 // have to generate an extra instruction to negate the alloc and then use
4762 // G_PTR_ADD to add the negative offset.
4763 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
4764 if (Align) {
4765 APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
4766 AlignMask.negate();
4767 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
4768 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
4769 }
4770
4771 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
4772 MIRBuilder.buildCopy(SPReg, SPTmp);
4773 MIRBuilder.buildCopy(Dst, SPTmp);
4774
4775 MI.eraseFromParent();
4776 return Legalized;
4777}
4778
4779LegalizerHelper::LegalizeResult
4780LegalizerHelper::lowerExtract(MachineInstr &MI) {
4781 Register Dst = MI.getOperand(0).getReg();
4782 Register Src = MI.getOperand(1).getReg();
4783 unsigned Offset = MI.getOperand(2).getImm();
4784
4785 LLT DstTy = MRI.getType(Dst);
4786 LLT SrcTy = MRI.getType(Src);
4787
4788 if (DstTy.isScalar() &&
4789 (SrcTy.isScalar() ||
4790 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
4791 LLT SrcIntTy = SrcTy;
4792 if (!SrcTy.isScalar()) {
4793 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
4794 Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
4795 }
4796
4797 if (Offset == 0)
4798 MIRBuilder.buildTrunc(Dst, Src);
4799 else {
4800 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
4801 auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
4802 MIRBuilder.buildTrunc(Dst, Shr);
4803 }
4804
4805 MI.eraseFromParent();
4806 return Legalized;
4807 }
4808
4809 return UnableToLegalize;
4810}
4811
4812LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
4813 Register Dst = MI.getOperand(0).getReg();
4814 Register Src = MI.getOperand(1).getReg();
4815 Register InsertSrc = MI.getOperand(2).getReg();
4816 uint64_t Offset = MI.getOperand(3).getImm();
4817
4818 LLT DstTy = MRI.getType(Src);
4819 LLT InsertTy = MRI.getType(InsertSrc);
4820
4821 if (InsertTy.isScalar() &&
4822 (DstTy.isScalar() ||
4823 (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
4824 LLT IntDstTy = DstTy;
4825 if (!DstTy.isScalar()) {
4826 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
4827 Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
4828 }
4829
4830 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
4831 if (Offset != 0) {
4832 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
4833 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
4834 }
4835
4836 APInt MaskVal = APInt::getBitsSetWithWrap(DstTy.getSizeInBits(),
4837 Offset + InsertTy.getSizeInBits(),
4838 Offset);
4839
4840 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
4841 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
4842 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
4843
4844 MIRBuilder.buildBitcast(Dst, Or);
4845 MI.eraseFromParent();
4846 return Legalized;
4847 }
4848
4849 return UnableToLegalize;
4850}
4851
4852LegalizerHelper::LegalizeResult
4853LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
4854 Register Dst0 = MI.getOperand(0).getReg();
4855 Register Dst1 = MI.getOperand(1).getReg();
4856 Register LHS = MI.getOperand(2).getReg();
4857 Register RHS = MI.getOperand(3).getReg();
4858 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
4859
4860 LLT Ty = MRI.getType(Dst0);
4861 LLT BoolTy = MRI.getType(Dst1);
4862
4863 if (IsAdd)
4864 MIRBuilder.buildAdd(Dst0, LHS, RHS);
4865 else
4866 MIRBuilder.buildSub(Dst0, LHS, RHS);
4867
4868 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
4869
4870 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4871
4872 // For an addition, the result should be less than one of the operands (LHS)
4873 // if and only if the other operand (RHS) is negative, otherwise there will
4874 // be overflow.
4875 // For a subtraction, the result should be less than one of the operands
4876 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
4877 // otherwise there will be overflow.
4878 auto ResultLowerThanLHS =
4879 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
4880 auto ConditionRHS = MIRBuilder.buildICmp(
4881 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
4882
4883 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
4884 MI.eraseFromParent();
4885 return Legalized;
4886}
4887
4888LegalizerHelper::LegalizeResult
4889LegalizerHelper::lowerBswap(MachineInstr &MI) {
4890 Register Dst = MI.getOperand(0).getReg();
4891 Register Src = MI.getOperand(1).getReg();
4892 const LLT Ty = MRI.getType(Src);
4893 unsigned SizeInBytes = Ty.getSizeInBytes();
4894 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
4895
4896 // Swap most and least significant byte, set remaining bytes in Res to zero.
4897 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
4898 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
4899 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4900 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
4901
4902 // Set i-th high/low byte in Res to i-th low/high byte from Src.
4903 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
4904 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
4905 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
4906 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
4907 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
4908 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
4909 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
4910 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
4911 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
4912 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
4913 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4914 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
4915 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
4916 }
4917 Res.getInstr()->getOperand(0).setReg(Dst);
4918
4919 MI.eraseFromParent();
4920 return Legalized;
4921}
4922
4923//{ (Src & Mask) >> N } | { (Src << N) & Mask }
4924static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
4925 MachineInstrBuilder Src, APInt Mask) {
4926 const LLT Ty = Dst.getLLTTy(*B.getMRI());
4927 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
4928 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
4929 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
4930 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
4931 return B.buildOr(Dst, LHS, RHS);
4932}
4933
4934LegalizerHelper::LegalizeResult
4935LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
4936 Register Dst = MI.getOperand(0).getReg();
4937 Register Src = MI.getOperand(1).getReg();
4938 const LLT Ty = MRI.getType(Src);
4939 unsigned Size = Ty.getSizeInBits();
4940
4941 MachineInstrBuilder BSWAP =
4942 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
4943
4944 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
4945 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
4946 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
4947 MachineInstrBuilder Swap4 =
4948 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
4949
4950 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
4951 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
4952 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
4953 MachineInstrBuilder Swap2 =
4954 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
4955
4956 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
4957 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
4958 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
4959 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
4960
4961 MI.eraseFromParent();
4962 return Legalized;
4963}
4964
4965LegalizerHelper::LegalizeResult
4966LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
4967 MachineFunction &MF = MIRBuilder.getMF();
4968 const TargetSubtargetInfo &STI = MF.getSubtarget();
4969 const TargetLowering *TLI = STI.getTargetLowering();
4970
4971 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
4972 int NameOpIdx = IsRead ? 1 : 0;
4973 int ValRegIndex = IsRead ? 0 : 1;
4974
4975 Register ValReg = MI.getOperand(ValRegIndex).getReg();
4976 const LLT Ty = MRI.getType(ValReg);
4977 const MDString *RegStr = cast<MDString>(
4978 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
4979
4980 Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
4981 if (!PhysReg.isValid())
4982 return UnableToLegalize;
4983
4984 if (IsRead)
4985 MIRBuilder.buildCopy(ValReg, PhysReg);
4986 else
4987 MIRBuilder.buildCopy(PhysReg, ValReg);
4988
4989 MI.eraseFromParent();
4990 return Legalized;
4991}