Bug Summary

File:llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Warning:line 1189, column 17
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LegalizerHelper.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/build-llvm/lib/CodeGen/GlobalISel -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/build-llvm/lib/CodeGen/GlobalISel -I /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel -I /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/build-llvm/lib/CodeGen/GlobalISel -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-06-13-111025-38230-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16#include "llvm/CodeGen/GlobalISel/CallLowering.h"
17#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
20#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21#include "llvm/CodeGen/GlobalISel/Utils.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/TargetFrameLowering.h"
24#include "llvm/CodeGen/TargetInstrInfo.h"
25#include "llvm/CodeGen/TargetLowering.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/CodeGen/TargetSubtargetInfo.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Support/raw_ostream.h"
32
33#define DEBUG_TYPE"legalizer" "legalizer"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace MIPatternMatch;
38
39/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
40///
41/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
42/// with any leftover piece as type \p LeftoverTy
43///
44/// Returns -1 in the first element of the pair if the breakdown is not
45/// satisfiable.
46static std::pair<int, int>
47getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
48 assert(!LeftoverTy.isValid() && "this is an out argument")(static_cast <bool> (!LeftoverTy.isValid() && "this is an out argument"
) ? void (0) : __assert_fail ("!LeftoverTy.isValid() && \"this is an out argument\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 48, __extension__ __PRETTY_FUNCTION__))
;
49
50 unsigned Size = OrigTy.getSizeInBits();
51 unsigned NarrowSize = NarrowTy.getSizeInBits();
52 unsigned NumParts = Size / NarrowSize;
53 unsigned LeftoverSize = Size - NumParts * NarrowSize;
54 assert(Size > NarrowSize)(static_cast <bool> (Size > NarrowSize) ? void (0) :
__assert_fail ("Size > NarrowSize", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 54, __extension__ __PRETTY_FUNCTION__))
;
55
56 if (LeftoverSize == 0)
57 return {NumParts, 0};
58
59 if (NarrowTy.isVector()) {
60 unsigned EltSize = OrigTy.getScalarSizeInBits();
61 if (LeftoverSize % EltSize != 0)
62 return {-1, -1};
63 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
64 } else {
65 LeftoverTy = LLT::scalar(LeftoverSize);
66 }
67
68 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
69 return std::make_pair(NumParts, NumLeftover);
70}
71
72static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
73
74 if (!Ty.isScalar())
75 return nullptr;
76
77 switch (Ty.getSizeInBits()) {
78 case 16:
79 return Type::getHalfTy(Ctx);
80 case 32:
81 return Type::getFloatTy(Ctx);
82 case 64:
83 return Type::getDoubleTy(Ctx);
84 case 80:
85 return Type::getX86_FP80Ty(Ctx);
86 case 128:
87 return Type::getFP128Ty(Ctx);
88 default:
89 return nullptr;
90 }
91}
92
93LegalizerHelper::LegalizerHelper(MachineFunction &MF,
94 GISelChangeObserver &Observer,
95 MachineIRBuilder &Builder)
96 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
97 LI(*MF.getSubtarget().getLegalizerInfo()),
98 TLI(*MF.getSubtarget().getTargetLowering()) { }
99
100LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
101 GISelChangeObserver &Observer,
102 MachineIRBuilder &B)
103 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
104 TLI(*MF.getSubtarget().getTargetLowering()) { }
105
106LegalizerHelper::LegalizeResult
107LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
108 LostDebugLocObserver &LocObserver) {
109 LLVM_DEBUG(dbgs() << "Legalizing: " << MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Legalizing: " << MI; }
} while (false)
;
110
111 MIRBuilder.setInstrAndDebugLoc(MI);
112
113 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
114 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
115 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
116 auto Step = LI.getAction(MI, MRI);
117 switch (Step.Action) {
118 case Legal:
119 LLVM_DEBUG(dbgs() << ".. Already legal\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Already legal\n"; } } while
(false)
;
120 return AlreadyLegal;
121 case Libcall:
122 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Convert to libcall\n"; }
} while (false)
;
123 return libcall(MI, LocObserver);
124 case NarrowScalar:
125 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Narrow scalar\n"; } } while
(false)
;
126 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
127 case WidenScalar:
128 LLVM_DEBUG(dbgs() << ".. Widen scalar\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Widen scalar\n"; } } while
(false)
;
129 return widenScalar(MI, Step.TypeIdx, Step.NewType);
130 case Bitcast:
131 LLVM_DEBUG(dbgs() << ".. Bitcast type\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Bitcast type\n"; } } while
(false)
;
132 return bitcast(MI, Step.TypeIdx, Step.NewType);
133 case Lower:
134 LLVM_DEBUG(dbgs() << ".. Lower\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Lower\n"; } } while (false
)
;
135 return lower(MI, Step.TypeIdx, Step.NewType);
136 case FewerElements:
137 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Reduce number of elements\n"
; } } while (false)
;
138 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
139 case MoreElements:
140 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Increase number of elements\n"
; } } while (false)
;
141 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
142 case Custom:
143 LLVM_DEBUG(dbgs() << ".. Custom legalization\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Custom legalization\n"; }
} while (false)
;
144 return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
145 default:
146 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. Unable to legalize\n"; }
} while (false)
;
147 return UnableToLegalize;
148 }
149}
150
151void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
152 SmallVectorImpl<Register> &VRegs) {
153 for (int i = 0; i < NumParts; ++i)
154 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
155 MIRBuilder.buildUnmerge(VRegs, Reg);
156}
157
158bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
159 LLT MainTy, LLT &LeftoverTy,
160 SmallVectorImpl<Register> &VRegs,
161 SmallVectorImpl<Register> &LeftoverRegs) {
162 assert(!LeftoverTy.isValid() && "this is an out argument")(static_cast <bool> (!LeftoverTy.isValid() && "this is an out argument"
) ? void (0) : __assert_fail ("!LeftoverTy.isValid() && \"this is an out argument\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 162, __extension__ __PRETTY_FUNCTION__))
;
163
164 unsigned RegSize = RegTy.getSizeInBits();
165 unsigned MainSize = MainTy.getSizeInBits();
166 unsigned NumParts = RegSize / MainSize;
167 unsigned LeftoverSize = RegSize - NumParts * MainSize;
168
169 // Use an unmerge when possible.
170 if (LeftoverSize == 0) {
171 for (unsigned I = 0; I < NumParts; ++I)
172 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
173 MIRBuilder.buildUnmerge(VRegs, Reg);
174 return true;
175 }
176
177 if (MainTy.isVector()) {
178 unsigned EltSize = MainTy.getScalarSizeInBits();
179 if (LeftoverSize % EltSize != 0)
180 return false;
181 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
182 } else {
183 LeftoverTy = LLT::scalar(LeftoverSize);
184 }
185
186 // For irregular sizes, extract the individual parts.
187 for (unsigned I = 0; I != NumParts; ++I) {
188 Register NewReg = MRI.createGenericVirtualRegister(MainTy);
189 VRegs.push_back(NewReg);
190 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
191 }
192
193 for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
194 Offset += LeftoverSize) {
195 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
196 LeftoverRegs.push_back(NewReg);
197 MIRBuilder.buildExtract(NewReg, Reg, Offset);
198 }
199
200 return true;
201}
202
203void LegalizerHelper::insertParts(Register DstReg,
204 LLT ResultTy, LLT PartTy,
205 ArrayRef<Register> PartRegs,
206 LLT LeftoverTy,
207 ArrayRef<Register> LeftoverRegs) {
208 if (!LeftoverTy.isValid()) {
209 assert(LeftoverRegs.empty())(static_cast <bool> (LeftoverRegs.empty()) ? void (0) :
__assert_fail ("LeftoverRegs.empty()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 209, __extension__ __PRETTY_FUNCTION__))
;
210
211 if (!ResultTy.isVector()) {
212 MIRBuilder.buildMerge(DstReg, PartRegs);
213 return;
214 }
215
216 if (PartTy.isVector())
217 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
218 else
219 MIRBuilder.buildBuildVector(DstReg, PartRegs);
220 return;
221 }
222
223 SmallVector<Register> GCDRegs;
224 LLT GCDTy;
225 for (Register PartReg : PartRegs)
226 GCDTy = extractGCDType(GCDRegs, ResultTy, LeftoverTy, PartReg);
227
228 for (Register PartReg : LeftoverRegs)
229 extractGCDType(GCDRegs, ResultTy, LeftoverTy, PartReg);
230
231 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
232 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
233}
234
235/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
236static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
237 const MachineInstr &MI) {
238 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES)(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 238, __extension__ __PRETTY_FUNCTION__))
;
239
240 const int StartIdx = Regs.size();
241 const int NumResults = MI.getNumOperands() - 1;
242 Regs.resize(Regs.size() + NumResults);
243 for (int I = 0; I != NumResults; ++I)
244 Regs[StartIdx + I] = MI.getOperand(I).getReg();
245}
246
247void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
248 LLT GCDTy, Register SrcReg) {
249 LLT SrcTy = MRI.getType(SrcReg);
250 if (SrcTy == GCDTy) {
251 // If the source already evenly divides the result type, we don't need to do
252 // anything.
253 Parts.push_back(SrcReg);
254 } else {
255 // Need to split into common type sized pieces.
256 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
257 getUnmergeResults(Parts, *Unmerge);
258 }
259}
260
261LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
262 LLT NarrowTy, Register SrcReg) {
263 LLT SrcTy = MRI.getType(SrcReg);
264 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
265 extractGCDType(Parts, GCDTy, SrcReg);
266 return GCDTy;
267}
268
269LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
270 SmallVectorImpl<Register> &VRegs,
271 unsigned PadStrategy) {
272 LLT LCMTy = getLCMType(DstTy, NarrowTy);
273
274 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
275 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
276 int NumOrigSrc = VRegs.size();
277
278 Register PadReg;
279
280 // Get a value we can use to pad the source value if the sources won't evenly
281 // cover the result type.
282 if (NumOrigSrc < NumParts * NumSubParts) {
283 if (PadStrategy == TargetOpcode::G_ZEXT)
284 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
285 else if (PadStrategy == TargetOpcode::G_ANYEXT)
286 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
287 else {
288 assert(PadStrategy == TargetOpcode::G_SEXT)(static_cast <bool> (PadStrategy == TargetOpcode::G_SEXT
) ? void (0) : __assert_fail ("PadStrategy == TargetOpcode::G_SEXT"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 288, __extension__ __PRETTY_FUNCTION__))
;
289
290 // Shift the sign bit of the low register through the high register.
291 auto ShiftAmt =
292 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
293 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
294 }
295 }
296
297 // Registers for the final merge to be produced.
298 SmallVector<Register, 4> Remerge(NumParts);
299
300 // Registers needed for intermediate merges, which will be merged into a
301 // source for Remerge.
302 SmallVector<Register, 4> SubMerge(NumSubParts);
303
304 // Once we've fully read off the end of the original source bits, we can reuse
305 // the same high bits for remaining padding elements.
306 Register AllPadReg;
307
308 // Build merges to the LCM type to cover the original result type.
309 for (int I = 0; I != NumParts; ++I) {
310 bool AllMergePartsArePadding = true;
311
312 // Build the requested merges to the requested type.
313 for (int J = 0; J != NumSubParts; ++J) {
314 int Idx = I * NumSubParts + J;
315 if (Idx >= NumOrigSrc) {
316 SubMerge[J] = PadReg;
317 continue;
318 }
319
320 SubMerge[J] = VRegs[Idx];
321
322 // There are meaningful bits here we can't reuse later.
323 AllMergePartsArePadding = false;
324 }
325
326 // If we've filled up a complete piece with padding bits, we can directly
327 // emit the natural sized constant if applicable, rather than a merge of
328 // smaller constants.
329 if (AllMergePartsArePadding && !AllPadReg) {
330 if (PadStrategy == TargetOpcode::G_ANYEXT)
331 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
332 else if (PadStrategy == TargetOpcode::G_ZEXT)
333 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
334
335 // If this is a sign extension, we can't materialize a trivial constant
336 // with the right type and have to produce a merge.
337 }
338
339 if (AllPadReg) {
340 // Avoid creating additional instructions if we're just adding additional
341 // copies of padding bits.
342 Remerge[I] = AllPadReg;
343 continue;
344 }
345
346 if (NumSubParts == 1)
347 Remerge[I] = SubMerge[0];
348 else
349 Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
350
351 // In the sign extend padding case, re-use the first all-signbit merge.
352 if (AllMergePartsArePadding && !AllPadReg)
353 AllPadReg = Remerge[I];
354 }
355
356 VRegs = std::move(Remerge);
357 return LCMTy;
358}
359
360void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
361 ArrayRef<Register> RemergeRegs) {
362 LLT DstTy = MRI.getType(DstReg);
363
364 // Create the merge to the widened source, and extract the relevant bits into
365 // the result.
366
367 if (DstTy == LCMTy) {
368 MIRBuilder.buildMerge(DstReg, RemergeRegs);
369 return;
370 }
371
372 auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
373 if (DstTy.isScalar() && LCMTy.isScalar()) {
374 MIRBuilder.buildTrunc(DstReg, Remerge);
375 return;
376 }
377
378 if (LCMTy.isVector()) {
379 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
380 SmallVector<Register, 8> UnmergeDefs(NumDefs);
381 UnmergeDefs[0] = DstReg;
382 for (unsigned I = 1; I != NumDefs; ++I)
383 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
384
385 MIRBuilder.buildUnmerge(UnmergeDefs,
386 MIRBuilder.buildMerge(LCMTy, RemergeRegs));
387 return;
388 }
389
390 llvm_unreachable("unhandled case")::llvm::llvm_unreachable_internal("unhandled case", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 390)
;
391}
392
393static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
394#define RTLIBCASE_INT(LibcallPrefix)do { switch (Size) { case 32: return RTLIB::LibcallPrefix32; case
64: return RTLIB::LibcallPrefix64; case 128: return RTLIB::LibcallPrefix128
; default: ::llvm::llvm_unreachable_internal("unexpected size"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 394); } } while (0)
\
395 do { \
396 switch (Size) { \
397 case 32: \
398 return RTLIB::LibcallPrefix##32; \
399 case 64: \
400 return RTLIB::LibcallPrefix##64; \
401 case 128: \
402 return RTLIB::LibcallPrefix##128; \
403 default: \
404 llvm_unreachable("unexpected size")::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 404)
; \
405 } \
406 } while (0)
407
408#define RTLIBCASE(LibcallPrefix)do { switch (Size) { case 32: return RTLIB::LibcallPrefix32; case
64: return RTLIB::LibcallPrefix64; case 80: return RTLIB::LibcallPrefix80
; case 128: return RTLIB::LibcallPrefix128; default: ::llvm::
llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 408); } } while (0)
\
409 do { \
410 switch (Size) { \
411 case 32: \
412 return RTLIB::LibcallPrefix##32; \
413 case 64: \
414 return RTLIB::LibcallPrefix##64; \
415 case 80: \
416 return RTLIB::LibcallPrefix##80; \
417 case 128: \
418 return RTLIB::LibcallPrefix##128; \
419 default: \
420 llvm_unreachable("unexpected size")::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 420)
; \
421 } \
422 } while (0)
423
424 switch (Opcode) {
425 case TargetOpcode::G_SDIV:
426 RTLIBCASE_INT(SDIV_I)do { switch (Size) { case 32: return RTLIB::SDIV_I32; case 64
: return RTLIB::SDIV_I64; case 128: return RTLIB::SDIV_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 426); } } while (0)
;
427 case TargetOpcode::G_UDIV:
428 RTLIBCASE_INT(UDIV_I)do { switch (Size) { case 32: return RTLIB::UDIV_I32; case 64
: return RTLIB::UDIV_I64; case 128: return RTLIB::UDIV_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 428); } } while (0)
;
429 case TargetOpcode::G_SREM:
430 RTLIBCASE_INT(SREM_I)do { switch (Size) { case 32: return RTLIB::SREM_I32; case 64
: return RTLIB::SREM_I64; case 128: return RTLIB::SREM_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 430); } } while (0)
;
431 case TargetOpcode::G_UREM:
432 RTLIBCASE_INT(UREM_I)do { switch (Size) { case 32: return RTLIB::UREM_I32; case 64
: return RTLIB::UREM_I64; case 128: return RTLIB::UREM_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 432); } } while (0)
;
433 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
434 RTLIBCASE_INT(CTLZ_I)do { switch (Size) { case 32: return RTLIB::CTLZ_I32; case 64
: return RTLIB::CTLZ_I64; case 128: return RTLIB::CTLZ_I128; default
: ::llvm::llvm_unreachable_internal("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 434); } } while (0)
;
435 case TargetOpcode::G_FADD:
436 RTLIBCASE(ADD_F)do { switch (Size) { case 32: return RTLIB::ADD_F32; case 64:
return RTLIB::ADD_F64; case 80: return RTLIB::ADD_F80; case 128
: return RTLIB::ADD_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 436); } } while (0)
;
437 case TargetOpcode::G_FSUB:
438 RTLIBCASE(SUB_F)do { switch (Size) { case 32: return RTLIB::SUB_F32; case 64:
return RTLIB::SUB_F64; case 80: return RTLIB::SUB_F80; case 128
: return RTLIB::SUB_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 438); } } while (0)
;
439 case TargetOpcode::G_FMUL:
440 RTLIBCASE(MUL_F)do { switch (Size) { case 32: return RTLIB::MUL_F32; case 64:
return RTLIB::MUL_F64; case 80: return RTLIB::MUL_F80; case 128
: return RTLIB::MUL_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 440); } } while (0)
;
441 case TargetOpcode::G_FDIV:
442 RTLIBCASE(DIV_F)do { switch (Size) { case 32: return RTLIB::DIV_F32; case 64:
return RTLIB::DIV_F64; case 80: return RTLIB::DIV_F80; case 128
: return RTLIB::DIV_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 442); } } while (0)
;
443 case TargetOpcode::G_FEXP:
444 RTLIBCASE(EXP_F)do { switch (Size) { case 32: return RTLIB::EXP_F32; case 64:
return RTLIB::EXP_F64; case 80: return RTLIB::EXP_F80; case 128
: return RTLIB::EXP_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 444); } } while (0)
;
445 case TargetOpcode::G_FEXP2:
446 RTLIBCASE(EXP2_F)do { switch (Size) { case 32: return RTLIB::EXP2_F32; case 64
: return RTLIB::EXP2_F64; case 80: return RTLIB::EXP2_F80; case
128: return RTLIB::EXP2_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 446); } } while (0)
;
447 case TargetOpcode::G_FREM:
448 RTLIBCASE(REM_F)do { switch (Size) { case 32: return RTLIB::REM_F32; case 64:
return RTLIB::REM_F64; case 80: return RTLIB::REM_F80; case 128
: return RTLIB::REM_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 448); } } while (0)
;
449 case TargetOpcode::G_FPOW:
450 RTLIBCASE(POW_F)do { switch (Size) { case 32: return RTLIB::POW_F32; case 64:
return RTLIB::POW_F64; case 80: return RTLIB::POW_F80; case 128
: return RTLIB::POW_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 450); } } while (0)
;
451 case TargetOpcode::G_FMA:
452 RTLIBCASE(FMA_F)do { switch (Size) { case 32: return RTLIB::FMA_F32; case 64:
return RTLIB::FMA_F64; case 80: return RTLIB::FMA_F80; case 128
: return RTLIB::FMA_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 452); } } while (0)
;
453 case TargetOpcode::G_FSIN:
454 RTLIBCASE(SIN_F)do { switch (Size) { case 32: return RTLIB::SIN_F32; case 64:
return RTLIB::SIN_F64; case 80: return RTLIB::SIN_F80; case 128
: return RTLIB::SIN_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 454); } } while (0)
;
455 case TargetOpcode::G_FCOS:
456 RTLIBCASE(COS_F)do { switch (Size) { case 32: return RTLIB::COS_F32; case 64:
return RTLIB::COS_F64; case 80: return RTLIB::COS_F80; case 128
: return RTLIB::COS_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 456); } } while (0)
;
457 case TargetOpcode::G_FLOG10:
458 RTLIBCASE(LOG10_F)do { switch (Size) { case 32: return RTLIB::LOG10_F32; case 64
: return RTLIB::LOG10_F64; case 80: return RTLIB::LOG10_F80; case
128: return RTLIB::LOG10_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 458); } } while (0)
;
459 case TargetOpcode::G_FLOG:
460 RTLIBCASE(LOG_F)do { switch (Size) { case 32: return RTLIB::LOG_F32; case 64:
return RTLIB::LOG_F64; case 80: return RTLIB::LOG_F80; case 128
: return RTLIB::LOG_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 460); } } while (0)
;
461 case TargetOpcode::G_FLOG2:
462 RTLIBCASE(LOG2_F)do { switch (Size) { case 32: return RTLIB::LOG2_F32; case 64
: return RTLIB::LOG2_F64; case 80: return RTLIB::LOG2_F80; case
128: return RTLIB::LOG2_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 462); } } while (0)
;
463 case TargetOpcode::G_FCEIL:
464 RTLIBCASE(CEIL_F)do { switch (Size) { case 32: return RTLIB::CEIL_F32; case 64
: return RTLIB::CEIL_F64; case 80: return RTLIB::CEIL_F80; case
128: return RTLIB::CEIL_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 464); } } while (0)
;
465 case TargetOpcode::G_FFLOOR:
466 RTLIBCASE(FLOOR_F)do { switch (Size) { case 32: return RTLIB::FLOOR_F32; case 64
: return RTLIB::FLOOR_F64; case 80: return RTLIB::FLOOR_F80; case
128: return RTLIB::FLOOR_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 466); } } while (0)
;
467 case TargetOpcode::G_FMINNUM:
468 RTLIBCASE(FMIN_F)do { switch (Size) { case 32: return RTLIB::FMIN_F32; case 64
: return RTLIB::FMIN_F64; case 80: return RTLIB::FMIN_F80; case
128: return RTLIB::FMIN_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 468); } } while (0)
;
469 case TargetOpcode::G_FMAXNUM:
470 RTLIBCASE(FMAX_F)do { switch (Size) { case 32: return RTLIB::FMAX_F32; case 64
: return RTLIB::FMAX_F64; case 80: return RTLIB::FMAX_F80; case
128: return RTLIB::FMAX_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 470); } } while (0)
;
471 case TargetOpcode::G_FSQRT:
472 RTLIBCASE(SQRT_F)do { switch (Size) { case 32: return RTLIB::SQRT_F32; case 64
: return RTLIB::SQRT_F64; case 80: return RTLIB::SQRT_F80; case
128: return RTLIB::SQRT_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 472); } } while (0)
;
473 case TargetOpcode::G_FRINT:
474 RTLIBCASE(RINT_F)do { switch (Size) { case 32: return RTLIB::RINT_F32; case 64
: return RTLIB::RINT_F64; case 80: return RTLIB::RINT_F80; case
128: return RTLIB::RINT_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 474); } } while (0)
;
475 case TargetOpcode::G_FNEARBYINT:
476 RTLIBCASE(NEARBYINT_F)do { switch (Size) { case 32: return RTLIB::NEARBYINT_F32; case
64: return RTLIB::NEARBYINT_F64; case 80: return RTLIB::NEARBYINT_F80
; case 128: return RTLIB::NEARBYINT_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 476); } } while (0)
;
477 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
478 RTLIBCASE(ROUNDEVEN_F)do { switch (Size) { case 32: return RTLIB::ROUNDEVEN_F32; case
64: return RTLIB::ROUNDEVEN_F64; case 80: return RTLIB::ROUNDEVEN_F80
; case 128: return RTLIB::ROUNDEVEN_F128; default: ::llvm::llvm_unreachable_internal
("unexpected size", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 478); } } while (0)
;
479 }
480 llvm_unreachable("Unknown libcall function")::llvm::llvm_unreachable_internal("Unknown libcall function",
"/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 480)
;
481}
482
483/// True if an instruction is in tail position in its caller. Intended for
484/// legalizing libcalls as tail calls when possible.
485static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
486 MachineInstr &MI) {
487 MachineBasicBlock &MBB = *MI.getParent();
488 const Function &F = MBB.getParent()->getFunction();
489
490 // Conservatively require the attributes of the call to match those of
491 // the return. Ignore NoAlias and NonNull because they don't affect the
492 // call sequence.
493 AttributeList CallerAttrs = F.getAttributes();
494 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
495 .removeAttribute(Attribute::NoAlias)
496 .removeAttribute(Attribute::NonNull)
497 .hasAttributes())
498 return false;
499
500 // It's not safe to eliminate the sign / zero extension of the return value.
501 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
502 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
503 return false;
504
505 // Only tail call if the following instruction is a standard return.
506 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
507 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
508 return false;
509
510 return true;
511}
512
513LegalizerHelper::LegalizeResult
514llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
515 const CallLowering::ArgInfo &Result,
516 ArrayRef<CallLowering::ArgInfo> Args,
517 const CallingConv::ID CC) {
518 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
519
520 CallLowering::CallLoweringInfo Info;
521 Info.CallConv = CC;
522 Info.Callee = MachineOperand::CreateES(Name);
523 Info.OrigRet = Result;
524 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
525 if (!CLI.lowerCall(MIRBuilder, Info))
526 return LegalizerHelper::UnableToLegalize;
527
528 return LegalizerHelper::Legalized;
529}
530
531LegalizerHelper::LegalizeResult
532llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
533 const CallLowering::ArgInfo &Result,
534 ArrayRef<CallLowering::ArgInfo> Args) {
535 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
536 const char *Name = TLI.getLibcallName(Libcall);
537 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
538 return createLibcall(MIRBuilder, Name, Result, Args, CC);
539}
540
541// Useful for libcalls where all operands have the same type.
542static LegalizerHelper::LegalizeResult
543simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
544 Type *OpType) {
545 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
546
547 SmallVector<CallLowering::ArgInfo, 3> Args;
548 for (unsigned i = 1; i < MI.getNumOperands(); i++)
549 Args.push_back({MI.getOperand(i).getReg(), OpType});
550 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
551 Args);
552}
553
554LegalizerHelper::LegalizeResult
555llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
556 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
557 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
558
559 SmallVector<CallLowering::ArgInfo, 3> Args;
560 // Add all the args, except for the last which is an imm denoting 'tail'.
561 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
562 Register Reg = MI.getOperand(i).getReg();
563
564 // Need derive an IR type for call lowering.
565 LLT OpLLT = MRI.getType(Reg);
566 Type *OpTy = nullptr;
567 if (OpLLT.isPointer())
568 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
569 else
570 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
571 Args.push_back({Reg, OpTy});
572 }
573
574 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
575 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
576 RTLIB::Libcall RTLibcall;
577 unsigned Opc = MI.getOpcode();
578 switch (Opc) {
579 case TargetOpcode::G_BZERO:
580 RTLibcall = RTLIB::BZERO;
581 break;
582 case TargetOpcode::G_MEMCPY:
583 RTLibcall = RTLIB::MEMCPY;
584 break;
585 case TargetOpcode::G_MEMMOVE:
586 RTLibcall = RTLIB::MEMMOVE;
587 break;
588 case TargetOpcode::G_MEMSET:
589 RTLibcall = RTLIB::MEMSET;
590 break;
591 default:
592 return LegalizerHelper::UnableToLegalize;
593 }
594 const char *Name = TLI.getLibcallName(RTLibcall);
595
596 // Unsupported libcall on the target.
597 if (!Name) {
598 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. .. Could not find libcall name for "
<< MIRBuilder.getTII().getName(Opc) << "\n"; } }
while (false)
599 << MIRBuilder.getTII().getName(Opc) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << ".. .. Could not find libcall name for "
<< MIRBuilder.getTII().getName(Opc) << "\n"; } }
while (false)
;
600 return LegalizerHelper::UnableToLegalize;
601 }
602
603 CallLowering::CallLoweringInfo Info;
604 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
605 Info.Callee = MachineOperand::CreateES(Name);
606 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
607 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
608 isLibCallInTailPosition(MIRBuilder.getTII(), MI);
609
610 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
611 if (!CLI.lowerCall(MIRBuilder, Info))
612 return LegalizerHelper::UnableToLegalize;
613
614
615 if (Info.LoweredTailCall) {
616 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?")(static_cast <bool> (Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"
) ? void (0) : __assert_fail ("Info.IsTailCall && \"Lowered tail call when it wasn't a tail call?\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 616, __extension__ __PRETTY_FUNCTION__))
;
617
618 // Check debug locations before removing the return.
619 LocObserver.checkpoint(true);
620
621 // We must have a return following the call (or debug insts) to get past
622 // isLibCallInTailPosition.
623 do {
624 MachineInstr *Next = MI.getNextNode();
625 assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&(static_cast <bool> (Next && (Next->isReturn
() || Next->isDebugInstr()) && "Expected instr following MI to be return or debug inst?"
) ? void (0) : __assert_fail ("Next && (Next->isReturn() || Next->isDebugInstr()) && \"Expected instr following MI to be return or debug inst?\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 626, __extension__ __PRETTY_FUNCTION__))
626 "Expected instr following MI to be return or debug inst?")(static_cast <bool> (Next && (Next->isReturn
() || Next->isDebugInstr()) && "Expected instr following MI to be return or debug inst?"
) ? void (0) : __assert_fail ("Next && (Next->isReturn() || Next->isDebugInstr()) && \"Expected instr following MI to be return or debug inst?\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 626, __extension__ __PRETTY_FUNCTION__))
;
627 // We lowered a tail call, so the call is now the return from the block.
628 // Delete the old return.
629 Next->eraseFromParent();
630 } while (MI.getNextNode());
631
632 // We expect to lose the debug location from the return.
633 LocObserver.checkpoint(false);
634 }
635
636 return LegalizerHelper::Legalized;
637}
638
639static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
640 Type *FromType) {
641 auto ToMVT = MVT::getVT(ToType);
642 auto FromMVT = MVT::getVT(FromType);
643
644 switch (Opcode) {
645 case TargetOpcode::G_FPEXT:
646 return RTLIB::getFPEXT(FromMVT, ToMVT);
647 case TargetOpcode::G_FPTRUNC:
648 return RTLIB::getFPROUND(FromMVT, ToMVT);
649 case TargetOpcode::G_FPTOSI:
650 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
651 case TargetOpcode::G_FPTOUI:
652 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
653 case TargetOpcode::G_SITOFP:
654 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
655 case TargetOpcode::G_UITOFP:
656 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
657 }
658 llvm_unreachable("Unsupported libcall function")::llvm::llvm_unreachable_internal("Unsupported libcall function"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 658)
;
659}
660
661static LegalizerHelper::LegalizeResult
662conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
663 Type *FromType) {
664 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
665 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
666 {{MI.getOperand(1).getReg(), FromType}});
667}
668
669LegalizerHelper::LegalizeResult
670LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
671 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
672 unsigned Size = LLTy.getSizeInBits();
673 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
674
675 switch (MI.getOpcode()) {
676 default:
677 return UnableToLegalize;
678 case TargetOpcode::G_SDIV:
679 case TargetOpcode::G_UDIV:
680 case TargetOpcode::G_SREM:
681 case TargetOpcode::G_UREM:
682 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
683 Type *HLTy = IntegerType::get(Ctx, Size);
684 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
685 if (Status != Legalized)
686 return Status;
687 break;
688 }
689 case TargetOpcode::G_FADD:
690 case TargetOpcode::G_FSUB:
691 case TargetOpcode::G_FMUL:
692 case TargetOpcode::G_FDIV:
693 case TargetOpcode::G_FMA:
694 case TargetOpcode::G_FPOW:
695 case TargetOpcode::G_FREM:
696 case TargetOpcode::G_FCOS:
697 case TargetOpcode::G_FSIN:
698 case TargetOpcode::G_FLOG10:
699 case TargetOpcode::G_FLOG:
700 case TargetOpcode::G_FLOG2:
701 case TargetOpcode::G_FEXP:
702 case TargetOpcode::G_FEXP2:
703 case TargetOpcode::G_FCEIL:
704 case TargetOpcode::G_FFLOOR:
705 case TargetOpcode::G_FMINNUM:
706 case TargetOpcode::G_FMAXNUM:
707 case TargetOpcode::G_FSQRT:
708 case TargetOpcode::G_FRINT:
709 case TargetOpcode::G_FNEARBYINT:
710 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
711 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
712 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
713 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "No libcall available for type "
<< LLTy << ".\n"; } } while (false)
;
714 return UnableToLegalize;
715 }
716 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
717 if (Status != Legalized)
718 return Status;
719 break;
720 }
721 case TargetOpcode::G_FPEXT:
722 case TargetOpcode::G_FPTRUNC: {
723 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
724 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
725 if (!FromTy || !ToTy)
726 return UnableToLegalize;
727 LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
728 if (Status != Legalized)
729 return Status;
730 break;
731 }
732 case TargetOpcode::G_FPTOSI:
733 case TargetOpcode::G_FPTOUI: {
734 // FIXME: Support other types
735 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
736 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
737 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
738 return UnableToLegalize;
739 LegalizeResult Status = conversionLibcall(
740 MI, MIRBuilder,
741 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
742 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
743 if (Status != Legalized)
744 return Status;
745 break;
746 }
747 case TargetOpcode::G_SITOFP:
748 case TargetOpcode::G_UITOFP: {
749 // FIXME: Support other types
750 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
751 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
752 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
753 return UnableToLegalize;
754 LegalizeResult Status = conversionLibcall(
755 MI, MIRBuilder,
756 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
757 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
758 if (Status != Legalized)
759 return Status;
760 break;
761 }
762 case TargetOpcode::G_BZERO:
763 case TargetOpcode::G_MEMCPY:
764 case TargetOpcode::G_MEMMOVE:
765 case TargetOpcode::G_MEMSET: {
766 LegalizeResult Result =
767 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
768 if (Result != Legalized)
769 return Result;
770 MI.eraseFromParent();
771 return Result;
772 }
773 }
774
775 MI.eraseFromParent();
776 return Legalized;
777}
778
779LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
780 unsigned TypeIdx,
781 LLT NarrowTy) {
782 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
783 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1
Calling 'LLT::getSizeInBits'
4
Returning from 'LLT::getSizeInBits'
5
'NarrowSize' initialized to 0
784
785 switch (MI.getOpcode()) {
6
Control jumps to 'case G_BITREVERSE:' at line 1188
786 default:
787 return UnableToLegalize;
788 case TargetOpcode::G_IMPLICIT_DEF: {
789 Register DstReg = MI.getOperand(0).getReg();
790 LLT DstTy = MRI.getType(DstReg);
791
792 // If SizeOp0 is not an exact multiple of NarrowSize, emit
793 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
794 // FIXME: Although this would also be legal for the general case, it causes
795 // a lot of regressions in the emitted code (superfluous COPYs, artifact
796 // combines not being hit). This seems to be a problem related to the
797 // artifact combiner.
798 if (SizeOp0 % NarrowSize != 0) {
799 LLT ImplicitTy = NarrowTy;
800 if (DstTy.isVector())
801 ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
802
803 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
804 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
805
806 MI.eraseFromParent();
807 return Legalized;
808 }
809
810 int NumParts = SizeOp0 / NarrowSize;
811
812 SmallVector<Register, 2> DstRegs;
813 for (int i = 0; i < NumParts; ++i)
814 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
815
816 if (DstTy.isVector())
817 MIRBuilder.buildBuildVector(DstReg, DstRegs);
818 else
819 MIRBuilder.buildMerge(DstReg, DstRegs);
820 MI.eraseFromParent();
821 return Legalized;
822 }
823 case TargetOpcode::G_CONSTANT: {
824 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
825 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
826 unsigned TotalSize = Ty.getSizeInBits();
827 unsigned NarrowSize = NarrowTy.getSizeInBits();
828 int NumParts = TotalSize / NarrowSize;
829
830 SmallVector<Register, 4> PartRegs;
831 for (int I = 0; I != NumParts; ++I) {
832 unsigned Offset = I * NarrowSize;
833 auto K = MIRBuilder.buildConstant(NarrowTy,
834 Val.lshr(Offset).trunc(NarrowSize));
835 PartRegs.push_back(K.getReg(0));
836 }
837
838 LLT LeftoverTy;
839 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
840 SmallVector<Register, 1> LeftoverRegs;
841 if (LeftoverBits != 0) {
842 LeftoverTy = LLT::scalar(LeftoverBits);
843 auto K = MIRBuilder.buildConstant(
844 LeftoverTy,
845 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
846 LeftoverRegs.push_back(K.getReg(0));
847 }
848
849 insertParts(MI.getOperand(0).getReg(),
850 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
851
852 MI.eraseFromParent();
853 return Legalized;
854 }
855 case TargetOpcode::G_SEXT:
856 case TargetOpcode::G_ZEXT:
857 case TargetOpcode::G_ANYEXT:
858 return narrowScalarExt(MI, TypeIdx, NarrowTy);
859 case TargetOpcode::G_TRUNC: {
860 if (TypeIdx != 1)
861 return UnableToLegalize;
862
863 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
864 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
865 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Can't narrow trunc to type "
<< NarrowTy << "\n"; } } while (false)
;
866 return UnableToLegalize;
867 }
868
869 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
870 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
871 MI.eraseFromParent();
872 return Legalized;
873 }
874
875 case TargetOpcode::G_FREEZE:
876 return reduceOperationWidth(MI, TypeIdx, NarrowTy);
877 case TargetOpcode::G_ADD:
878 case TargetOpcode::G_SUB:
879 case TargetOpcode::G_SADDO:
880 case TargetOpcode::G_SSUBO:
881 case TargetOpcode::G_SADDE:
882 case TargetOpcode::G_SSUBE:
883 case TargetOpcode::G_UADDO:
884 case TargetOpcode::G_USUBO:
885 case TargetOpcode::G_UADDE:
886 case TargetOpcode::G_USUBE:
887 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
888 case TargetOpcode::G_MUL:
889 case TargetOpcode::G_UMULH:
890 return narrowScalarMul(MI, NarrowTy);
891 case TargetOpcode::G_EXTRACT:
892 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
893 case TargetOpcode::G_INSERT:
894 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
895 case TargetOpcode::G_LOAD: {
896 auto &MMO = **MI.memoperands_begin();
897 Register DstReg = MI.getOperand(0).getReg();
898 LLT DstTy = MRI.getType(DstReg);
899 if (DstTy.isVector())
900 return UnableToLegalize;
901
902 if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
903 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
904 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
905 MIRBuilder.buildAnyExt(DstReg, TmpReg);
906 MI.eraseFromParent();
907 return Legalized;
908 }
909
910 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
911 }
912 case TargetOpcode::G_ZEXTLOAD:
913 case TargetOpcode::G_SEXTLOAD: {
914 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
915 Register DstReg = MI.getOperand(0).getReg();
916 Register PtrReg = MI.getOperand(1).getReg();
917
918 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
919 auto &MMO = **MI.memoperands_begin();
920 unsigned MemSize = MMO.getSizeInBits();
921
922 if (MemSize == NarrowSize) {
923 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
924 } else if (MemSize < NarrowSize) {
925 MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
926 } else if (MemSize > NarrowSize) {
927 // FIXME: Need to split the load.
928 return UnableToLegalize;
929 }
930
931 if (ZExt)
932 MIRBuilder.buildZExt(DstReg, TmpReg);
933 else
934 MIRBuilder.buildSExt(DstReg, TmpReg);
935
936 MI.eraseFromParent();
937 return Legalized;
938 }
939 case TargetOpcode::G_STORE: {
940 const auto &MMO = **MI.memoperands_begin();
941
942 Register SrcReg = MI.getOperand(0).getReg();
943 LLT SrcTy = MRI.getType(SrcReg);
944 if (SrcTy.isVector())
945 return UnableToLegalize;
946
947 int NumParts = SizeOp0 / NarrowSize;
948 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
949 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
950 if (SrcTy.isVector() && LeftoverBits != 0)
951 return UnableToLegalize;
952
953 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
954 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
955 auto &MMO = **MI.memoperands_begin();
956 MIRBuilder.buildTrunc(TmpReg, SrcReg);
957 MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
958 MI.eraseFromParent();
959 return Legalized;
960 }
961
962 return reduceLoadStoreWidth(MI, 0, NarrowTy);
963 }
964 case TargetOpcode::G_SELECT:
965 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
966 case TargetOpcode::G_AND:
967 case TargetOpcode::G_OR:
968 case TargetOpcode::G_XOR: {
969 // Legalize bitwise operation:
970 // A = BinOp<Ty> B, C
971 // into:
972 // B1, ..., BN = G_UNMERGE_VALUES B
973 // C1, ..., CN = G_UNMERGE_VALUES C
974 // A1 = BinOp<Ty/N> B1, C2
975 // ...
976 // AN = BinOp<Ty/N> BN, CN
977 // A = G_MERGE_VALUES A1, ..., AN
978 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
979 }
980 case TargetOpcode::G_SHL:
981 case TargetOpcode::G_LSHR:
982 case TargetOpcode::G_ASHR:
983 return narrowScalarShift(MI, TypeIdx, NarrowTy);
984 case TargetOpcode::G_CTLZ:
985 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
986 case TargetOpcode::G_CTTZ:
987 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
988 case TargetOpcode::G_CTPOP:
989 if (TypeIdx == 1)
990 switch (MI.getOpcode()) {
991 case TargetOpcode::G_CTLZ:
992 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
993 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
994 case TargetOpcode::G_CTTZ:
995 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
996 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
997 case TargetOpcode::G_CTPOP:
998 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
999 default:
1000 return UnableToLegalize;
1001 }
1002
1003 Observer.changingInstr(MI);
1004 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1005 Observer.changedInstr(MI);
1006 return Legalized;
1007 case TargetOpcode::G_INTTOPTR:
1008 if (TypeIdx != 1)
1009 return UnableToLegalize;
1010
1011 Observer.changingInstr(MI);
1012 narrowScalarSrc(MI, NarrowTy, 1);
1013 Observer.changedInstr(MI);
1014 return Legalized;
1015 case TargetOpcode::G_PTRTOINT:
1016 if (TypeIdx != 0)
1017 return UnableToLegalize;
1018
1019 Observer.changingInstr(MI);
1020 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1021 Observer.changedInstr(MI);
1022 return Legalized;
1023 case TargetOpcode::G_PHI: {
1024 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1025 // NarrowSize.
1026 if (SizeOp0 % NarrowSize != 0)
1027 return UnableToLegalize;
1028
1029 unsigned NumParts = SizeOp0 / NarrowSize;
1030 SmallVector<Register, 2> DstRegs(NumParts);
1031 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1032 Observer.changingInstr(MI);
1033 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1034 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1035 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1036 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1037 SrcRegs[i / 2]);
1038 }
1039 MachineBasicBlock &MBB = *MI.getParent();
1040 MIRBuilder.setInsertPt(MBB, MI);
1041 for (unsigned i = 0; i < NumParts; ++i) {
1042 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1043 MachineInstrBuilder MIB =
1044 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1045 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1046 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1047 }
1048 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1049 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1050 Observer.changedInstr(MI);
1051 MI.eraseFromParent();
1052 return Legalized;
1053 }
1054 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1055 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1056 if (TypeIdx != 2)
1057 return UnableToLegalize;
1058
1059 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1060 Observer.changingInstr(MI);
1061 narrowScalarSrc(MI, NarrowTy, OpIdx);
1062 Observer.changedInstr(MI);
1063 return Legalized;
1064 }
1065 case TargetOpcode::G_ICMP: {
1066 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1067 if (NarrowSize * 2 != SrcSize)
1068 return UnableToLegalize;
1069
1070 Observer.changingInstr(MI);
1071 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1072 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1073 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1074
1075 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1076 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1077 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1078
1079 CmpInst::Predicate Pred =
1080 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1081 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1082
1083 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1084 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1085 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1086 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1087 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1088 MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1089 } else {
1090 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1091 MachineInstrBuilder CmpHEQ =
1092 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1093 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1094 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1095 MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1096 }
1097 Observer.changedInstr(MI);
1098 MI.eraseFromParent();
1099 return Legalized;
1100 }
1101 case TargetOpcode::G_SEXT_INREG: {
1102 if (TypeIdx != 0)
1103 return UnableToLegalize;
1104
1105 int64_t SizeInBits = MI.getOperand(2).getImm();
1106
1107 // So long as the new type has more bits than the bits we're extending we
1108 // don't need to break it apart.
1109 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1110 Observer.changingInstr(MI);
1111 // We don't lose any non-extension bits by truncating the src and
1112 // sign-extending the dst.
1113 MachineOperand &MO1 = MI.getOperand(1);
1114 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1115 MO1.setReg(TruncMIB.getReg(0));
1116
1117 MachineOperand &MO2 = MI.getOperand(0);
1118 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1119 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1120 MIRBuilder.buildSExt(MO2, DstExt);
1121 MO2.setReg(DstExt);
1122 Observer.changedInstr(MI);
1123 return Legalized;
1124 }
1125
1126 // Break it apart. Components below the extension point are unmodified. The
1127 // component containing the extension point becomes a narrower SEXT_INREG.
1128 // Components above it are ashr'd from the component containing the
1129 // extension point.
1130 if (SizeOp0 % NarrowSize != 0)
1131 return UnableToLegalize;
1132 int NumParts = SizeOp0 / NarrowSize;
1133
1134 // List the registers where the destination will be scattered.
1135 SmallVector<Register, 2> DstRegs;
1136 // List the registers where the source will be split.
1137 SmallVector<Register, 2> SrcRegs;
1138
1139 // Create all the temporary registers.
1140 for (int i = 0; i < NumParts; ++i) {
1141 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1142
1143 SrcRegs.push_back(SrcReg);
1144 }
1145
1146 // Explode the big arguments into smaller chunks.
1147 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1148
1149 Register AshrCstReg =
1150 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1151 .getReg(0);
1152 Register FullExtensionReg = 0;
1153 Register PartialExtensionReg = 0;
1154
1155 // Do the operation on each small part.
1156 for (int i = 0; i < NumParts; ++i) {
1157 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1158 DstRegs.push_back(SrcRegs[i]);
1159 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1160 assert(PartialExtensionReg &&(static_cast <bool> (PartialExtensionReg && "Expected to visit partial extension before full"
) ? void (0) : __assert_fail ("PartialExtensionReg && \"Expected to visit partial extension before full\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1161, __extension__ __PRETTY_FUNCTION__))
1161 "Expected to visit partial extension before full")(static_cast <bool> (PartialExtensionReg && "Expected to visit partial extension before full"
) ? void (0) : __assert_fail ("PartialExtensionReg && \"Expected to visit partial extension before full\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1161, __extension__ __PRETTY_FUNCTION__))
;
1162 if (FullExtensionReg) {
1163 DstRegs.push_back(FullExtensionReg);
1164 continue;
1165 }
1166 DstRegs.push_back(
1167 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1168 .getReg(0));
1169 FullExtensionReg = DstRegs.back();
1170 } else {
1171 DstRegs.push_back(
1172 MIRBuilder
1173 .buildInstr(
1174 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1175 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1176 .getReg(0));
1177 PartialExtensionReg = DstRegs.back();
1178 }
1179 }
1180
1181 // Gather the destination registers into the final destination.
1182 Register DstReg = MI.getOperand(0).getReg();
1183 MIRBuilder.buildMerge(DstReg, DstRegs);
1184 MI.eraseFromParent();
1185 return Legalized;
1186 }
1187 case TargetOpcode::G_BSWAP:
1188 case TargetOpcode::G_BITREVERSE: {
1189 if (SizeOp0 % NarrowSize != 0)
7
Division by zero
1190 return UnableToLegalize;
1191
1192 Observer.changingInstr(MI);
1193 SmallVector<Register, 2> SrcRegs, DstRegs;
1194 unsigned NumParts = SizeOp0 / NarrowSize;
1195 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1196
1197 for (unsigned i = 0; i < NumParts; ++i) {
1198 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1199 {SrcRegs[NumParts - 1 - i]});
1200 DstRegs.push_back(DstPart.getReg(0));
1201 }
1202
1203 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1204
1205 Observer.changedInstr(MI);
1206 MI.eraseFromParent();
1207 return Legalized;
1208 }
1209 case TargetOpcode::G_PTR_ADD:
1210 case TargetOpcode::G_PTRMASK: {
1211 if (TypeIdx != 1)
1212 return UnableToLegalize;
1213 Observer.changingInstr(MI);
1214 narrowScalarSrc(MI, NarrowTy, 2);
1215 Observer.changedInstr(MI);
1216 return Legalized;
1217 }
1218 case TargetOpcode::G_FPTOUI:
1219 case TargetOpcode::G_FPTOSI:
1220 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1221 case TargetOpcode::G_FPEXT:
1222 if (TypeIdx != 0)
1223 return UnableToLegalize;
1224 Observer.changingInstr(MI);
1225 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1226 Observer.changedInstr(MI);
1227 return Legalized;
1228 }
1229}
1230
1231Register LegalizerHelper::coerceToScalar(Register Val) {
1232 LLT Ty = MRI.getType(Val);
1233 if (Ty.isScalar())
1234 return Val;
1235
1236 const DataLayout &DL = MIRBuilder.getDataLayout();
1237 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1238 if (Ty.isPointer()) {
1239 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1240 return Register();
1241 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1242 }
1243
1244 Register NewVal = Val;
1245
1246 assert(Ty.isVector())(static_cast <bool> (Ty.isVector()) ? void (0) : __assert_fail
("Ty.isVector()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1246, __extension__ __PRETTY_FUNCTION__))
;
1247 LLT EltTy = Ty.getElementType();
1248 if (EltTy.isPointer())
1249 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1250 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1251}
1252
1253void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1254 unsigned OpIdx, unsigned ExtOpcode) {
1255 MachineOperand &MO = MI.getOperand(OpIdx);
1256 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1257 MO.setReg(ExtB.getReg(0));
1258}
1259
1260void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1261 unsigned OpIdx) {
1262 MachineOperand &MO = MI.getOperand(OpIdx);
1263 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1264 MO.setReg(ExtB.getReg(0));
1265}
1266
1267void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1268 unsigned OpIdx, unsigned TruncOpcode) {
1269 MachineOperand &MO = MI.getOperand(OpIdx);
1270 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1271 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1272 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1273 MO.setReg(DstExt);
1274}
1275
1276void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1277 unsigned OpIdx, unsigned ExtOpcode) {
1278 MachineOperand &MO = MI.getOperand(OpIdx);
1279 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1280 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1281 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1282 MO.setReg(DstTrunc);
1283}
1284
1285void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1286 unsigned OpIdx) {
1287 MachineOperand &MO = MI.getOperand(OpIdx);
1288 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1289 MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1290}
1291
1292void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1293 unsigned OpIdx) {
1294 MachineOperand &MO = MI.getOperand(OpIdx);
1295
1296 LLT OldTy = MRI.getType(MO.getReg());
1297 unsigned OldElts = OldTy.getNumElements();
1298 unsigned NewElts = MoreTy.getNumElements();
1299
1300 unsigned NumParts = NewElts / OldElts;
1301
1302 // Use concat_vectors if the result is a multiple of the number of elements.
1303 if (NumParts * OldElts == NewElts) {
1304 SmallVector<Register, 8> Parts;
1305 Parts.push_back(MO.getReg());
1306
1307 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1308 for (unsigned I = 1; I != NumParts; ++I)
1309 Parts.push_back(ImpDef);
1310
1311 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1312 MO.setReg(Concat.getReg(0));
1313 return;
1314 }
1315
1316 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1317 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1318 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1319 MO.setReg(MoreReg);
1320}
1321
1322void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1323 MachineOperand &Op = MI.getOperand(OpIdx);
1324 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1325}
1326
1327void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1328 MachineOperand &MO = MI.getOperand(OpIdx);
1329 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1330 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1331 MIRBuilder.buildBitcast(MO, CastDst);
1332 MO.setReg(CastDst);
1333}
1334
1335LegalizerHelper::LegalizeResult
1336LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1337 LLT WideTy) {
1338 if (TypeIdx != 1)
1339 return UnableToLegalize;
1340
1341 Register DstReg = MI.getOperand(0).getReg();
1342 LLT DstTy = MRI.getType(DstReg);
1343 if (DstTy.isVector())
1344 return UnableToLegalize;
1345
1346 Register Src1 = MI.getOperand(1).getReg();
1347 LLT SrcTy = MRI.getType(Src1);
1348 const int DstSize = DstTy.getSizeInBits();
1349 const int SrcSize = SrcTy.getSizeInBits();
1350 const int WideSize = WideTy.getSizeInBits();
1351 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1352
1353 unsigned NumOps = MI.getNumOperands();
1354 unsigned NumSrc = MI.getNumOperands() - 1;
1355 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1356
1357 if (WideSize >= DstSize) {
1358 // Directly pack the bits in the target type.
1359 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1360
1361 for (unsigned I = 2; I != NumOps; ++I) {
1362 const unsigned Offset = (I - 1) * PartSize;
1363
1364 Register SrcReg = MI.getOperand(I).getReg();
1365 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize))(static_cast <bool> (MRI.getType(SrcReg) == LLT::scalar
(PartSize)) ? void (0) : __assert_fail ("MRI.getType(SrcReg) == LLT::scalar(PartSize)"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1365, __extension__ __PRETTY_FUNCTION__))
;
1366
1367 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1368
1369 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1370 MRI.createGenericVirtualRegister(WideTy);
1371
1372 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1373 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1374 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1375 ResultReg = NextResult;
1376 }
1377
1378 if (WideSize > DstSize)
1379 MIRBuilder.buildTrunc(DstReg, ResultReg);
1380 else if (DstTy.isPointer())
1381 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1382
1383 MI.eraseFromParent();
1384 return Legalized;
1385 }
1386
1387 // Unmerge the original values to the GCD type, and recombine to the next
1388 // multiple greater than the original type.
1389 //
1390 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1391 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1392 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1393 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1394 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1395 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1396 // %12:_(s12) = G_MERGE_VALUES %10, %11
1397 //
1398 // Padding with undef if necessary:
1399 //
1400 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1401 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1402 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1403 // %7:_(s2) = G_IMPLICIT_DEF
1404 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1405 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1406 // %10:_(s12) = G_MERGE_VALUES %8, %9
1407
1408 const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1409 LLT GCDTy = LLT::scalar(GCD);
1410
1411 SmallVector<Register, 8> Parts;
1412 SmallVector<Register, 8> NewMergeRegs;
1413 SmallVector<Register, 8> Unmerges;
1414 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1415
1416 // Decompose the original operands if they don't evenly divide.
1417 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1418 Register SrcReg = MI.getOperand(I).getReg();
1419 if (GCD == SrcSize) {
1420 Unmerges.push_back(SrcReg);
1421 } else {
1422 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1423 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1424 Unmerges.push_back(Unmerge.getReg(J));
1425 }
1426 }
1427
1428 // Pad with undef to the next size that is a multiple of the requested size.
1429 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1430 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1431 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1432 Unmerges.push_back(UndefReg);
1433 }
1434
1435 const int PartsPerGCD = WideSize / GCD;
1436
1437 // Build merges of each piece.
1438 ArrayRef<Register> Slicer(Unmerges);
1439 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1440 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1441 NewMergeRegs.push_back(Merge.getReg(0));
1442 }
1443
1444 // A truncate may be necessary if the requested type doesn't evenly divide the
1445 // original result type.
1446 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1447 MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1448 } else {
1449 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1450 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1451 }
1452
1453 MI.eraseFromParent();
1454 return Legalized;
1455}
1456
1457Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
1458 Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1459 LLT OrigTy = MRI.getType(OrigReg);
1460 LLT LCMTy = getLCMType(WideTy, OrigTy);
1461
1462 const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1463 const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1464
1465 Register UnmergeSrc = WideReg;
1466
1467 // Create a merge to the LCM type, padding with undef
1468 // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1469 // =>
1470 // %1:_(<4 x s32>) = G_FOO
1471 // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1472 // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1473 // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1474 if (NumMergeParts > 1) {
1475 Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1476 SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1477 MergeParts[0] = WideReg;
1478 UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1479 }
1480
1481 // Unmerge to the original register and pad with dead defs.
1482 SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1483 UnmergeResults[0] = OrigReg;
1484 for (int I = 1; I != NumUnmergeParts; ++I)
1485 UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1486
1487 MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1488 return WideReg;
1489}
1490
1491LegalizerHelper::LegalizeResult
1492LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1493 LLT WideTy) {
1494 if (TypeIdx != 0)
1495 return UnableToLegalize;
1496
1497 int NumDst = MI.getNumOperands() - 1;
1498 Register SrcReg = MI.getOperand(NumDst).getReg();
1499 LLT SrcTy = MRI.getType(SrcReg);
1500 if (SrcTy.isVector())
1501 return UnableToLegalize;
1502
1503 Register Dst0Reg = MI.getOperand(0).getReg();
1504 LLT DstTy = MRI.getType(Dst0Reg);
1505 if (!DstTy.isScalar())
1506 return UnableToLegalize;
1507
1508 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1509 if (SrcTy.isPointer()) {
1510 const DataLayout &DL = MIRBuilder.getDataLayout();
1511 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1512 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Not casting non-integral address space integer\n"
; } } while (false)
1513 dbgs() << "Not casting non-integral address space integer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Not casting non-integral address space integer\n"
; } } while (false)
;
1514 return UnableToLegalize;
1515 }
1516
1517 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1518 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1519 }
1520
1521 // Widen SrcTy to WideTy. This does not affect the result, but since the
1522 // user requested this size, it is probably better handled than SrcTy and
1523 // should reduce the total number of legalization artifacts
1524 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1525 SrcTy = WideTy;
1526 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1527 }
1528
1529 // Theres no unmerge type to target. Directly extract the bits from the
1530 // source type
1531 unsigned DstSize = DstTy.getSizeInBits();
1532
1533 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1534 for (int I = 1; I != NumDst; ++I) {
1535 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1536 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1537 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1538 }
1539
1540 MI.eraseFromParent();
1541 return Legalized;
1542 }
1543
1544 // Extend the source to a wider type.
1545 LLT LCMTy = getLCMType(SrcTy, WideTy);
1546
1547 Register WideSrc = SrcReg;
1548 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1549 // TODO: If this is an integral address space, cast to integer and anyext.
1550 if (SrcTy.isPointer()) {
1551 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Widening pointer source types not implemented\n"
; } } while (false)
;
1552 return UnableToLegalize;
1553 }
1554
1555 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1556 }
1557
1558 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1559
1560 // Create a sequence of unmerges and merges to the original results. Since we
1561 // may have widened the source, we will need to pad the results with dead defs
1562 // to cover the source register.
1563 // e.g. widen s48 to s64:
1564 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1565 //
1566 // =>
1567 // %4:_(s192) = G_ANYEXT %0:_(s96)
1568 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1569 // ; unpack to GCD type, with extra dead defs
1570 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1571 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1572 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1573 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1574 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1575 const LLT GCDTy = getGCDType(WideTy, DstTy);
1576 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1577 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1578
1579 // Directly unmerge to the destination without going through a GCD type
1580 // if possible
1581 if (PartsPerRemerge == 1) {
1582 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1583
1584 for (int I = 0; I != NumUnmerge; ++I) {
1585 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1586
1587 for (int J = 0; J != PartsPerUnmerge; ++J) {
1588 int Idx = I * PartsPerUnmerge + J;
1589 if (Idx < NumDst)
1590 MIB.addDef(MI.getOperand(Idx).getReg());
1591 else {
1592 // Create dead def for excess components.
1593 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1594 }
1595 }
1596
1597 MIB.addUse(Unmerge.getReg(I));
1598 }
1599 } else {
1600 SmallVector<Register, 16> Parts;
1601 for (int J = 0; J != NumUnmerge; ++J)
1602 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1603
1604 SmallVector<Register, 8> RemergeParts;
1605 for (int I = 0; I != NumDst; ++I) {
1606 for (int J = 0; J < PartsPerRemerge; ++J) {
1607 const int Idx = I * PartsPerRemerge + J;
1608 RemergeParts.emplace_back(Parts[Idx]);
1609 }
1610
1611 MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1612 RemergeParts.clear();
1613 }
1614 }
1615
1616 MI.eraseFromParent();
1617 return Legalized;
1618}
1619
1620LegalizerHelper::LegalizeResult
1621LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1622 LLT WideTy) {
1623 Register DstReg = MI.getOperand(0).getReg();
1624 Register SrcReg = MI.getOperand(1).getReg();
1625 LLT SrcTy = MRI.getType(SrcReg);
1626
1627 LLT DstTy = MRI.getType(DstReg);
1628 unsigned Offset = MI.getOperand(2).getImm();
1629
1630 if (TypeIdx == 0) {
1631 if (SrcTy.isVector() || DstTy.isVector())
1632 return UnableToLegalize;
1633
1634 SrcOp Src(SrcReg);
1635 if (SrcTy.isPointer()) {
1636 // Extracts from pointers can be handled only if they are really just
1637 // simple integers.
1638 const DataLayout &DL = MIRBuilder.getDataLayout();
1639 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1640 return UnableToLegalize;
1641
1642 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1643 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1644 SrcTy = SrcAsIntTy;
1645 }
1646
1647 if (DstTy.isPointer())
1648 return UnableToLegalize;
1649
1650 if (Offset == 0) {
1651 // Avoid a shift in the degenerate case.
1652 MIRBuilder.buildTrunc(DstReg,
1653 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1654 MI.eraseFromParent();
1655 return Legalized;
1656 }
1657
1658 // Do a shift in the source type.
1659 LLT ShiftTy = SrcTy;
1660 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1661 Src = MIRBuilder.buildAnyExt(WideTy, Src);
1662 ShiftTy = WideTy;
1663 }
1664
1665 auto LShr = MIRBuilder.buildLShr(
1666 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1667 MIRBuilder.buildTrunc(DstReg, LShr);
1668 MI.eraseFromParent();
1669 return Legalized;
1670 }
1671
1672 if (SrcTy.isScalar()) {
1673 Observer.changingInstr(MI);
1674 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1675 Observer.changedInstr(MI);
1676 return Legalized;
1677 }
1678
1679 if (!SrcTy.isVector())
1680 return UnableToLegalize;
1681
1682 if (DstTy != SrcTy.getElementType())
1683 return UnableToLegalize;
1684
1685 if (Offset % SrcTy.getScalarSizeInBits() != 0)
1686 return UnableToLegalize;
1687
1688 Observer.changingInstr(MI);
1689 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1690
1691 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1692 Offset);
1693 widenScalarDst(MI, WideTy.getScalarType(), 0);
1694 Observer.changedInstr(MI);
1695 return Legalized;
1696}
1697
1698LegalizerHelper::LegalizeResult
1699LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1700 LLT WideTy) {
1701 if (TypeIdx != 0 || WideTy.isVector())
1702 return UnableToLegalize;
1703 Observer.changingInstr(MI);
1704 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1705 widenScalarDst(MI, WideTy);
1706 Observer.changedInstr(MI);
1707 return Legalized;
1708}
1709
1710LegalizerHelper::LegalizeResult
1711LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1712 LLT WideTy) {
1713 if (TypeIdx == 1)
1714 return UnableToLegalize; // TODO
1715
1716 unsigned Opcode;
1717 unsigned ExtOpcode;
1718 Optional<Register> CarryIn = None;
1719 switch (MI.getOpcode()) {
1720 default:
1721 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 1721)
;
1722 case TargetOpcode::G_SADDO:
1723 Opcode = TargetOpcode::G_ADD;
1724 ExtOpcode = TargetOpcode::G_SEXT;
1725 break;
1726 case TargetOpcode::G_SSUBO:
1727 Opcode = TargetOpcode::G_SUB;
1728 ExtOpcode = TargetOpcode::G_SEXT;
1729 break;
1730 case TargetOpcode::G_UADDO:
1731 Opcode = TargetOpcode::G_ADD;
1732 ExtOpcode = TargetOpcode::G_ZEXT;
1733 break;
1734 case TargetOpcode::G_USUBO:
1735 Opcode = TargetOpcode::G_SUB;
1736 ExtOpcode = TargetOpcode::G_ZEXT;
1737 break;
1738 case TargetOpcode::G_SADDE:
1739 Opcode = TargetOpcode::G_UADDE;
1740 ExtOpcode = TargetOpcode::G_SEXT;
1741 CarryIn = MI.getOperand(4).getReg();
1742 break;
1743 case TargetOpcode::G_SSUBE:
1744 Opcode = TargetOpcode::G_USUBE;
1745 ExtOpcode = TargetOpcode::G_SEXT;
1746 CarryIn = MI.getOperand(4).getReg();
1747 break;
1748 case TargetOpcode::G_UADDE:
1749 Opcode = TargetOpcode::G_UADDE;
1750 ExtOpcode = TargetOpcode::G_ZEXT;
1751 CarryIn = MI.getOperand(4).getReg();
1752 break;
1753 case TargetOpcode::G_USUBE:
1754 Opcode = TargetOpcode::G_USUBE;
1755 ExtOpcode = TargetOpcode::G_ZEXT;
1756 CarryIn = MI.getOperand(4).getReg();
1757 break;
1758 }
1759
1760 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1761 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1762 // Do the arithmetic in the larger type.
1763 Register NewOp;
1764 if (CarryIn) {
1765 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1766 NewOp = MIRBuilder
1767 .buildInstr(Opcode, {WideTy, CarryOutTy},
1768 {LHSExt, RHSExt, *CarryIn})
1769 .getReg(0);
1770 } else {
1771 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1772 }
1773 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1774 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1775 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1776 // There is no overflow if the ExtOp is the same as NewOp.
1777 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1778 // Now trunc the NewOp to the original result.
1779 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1780 MI.eraseFromParent();
1781 return Legalized;
1782}
1783
1784LegalizerHelper::LegalizeResult
1785LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1786 LLT WideTy) {
1787 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1788 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1789 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1790 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1791 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1792 // We can convert this to:
1793 // 1. Any extend iN to iM
1794 // 2. SHL by M-N
1795 // 3. [US][ADD|SUB|SHL]SAT
1796 // 4. L/ASHR by M-N
1797 //
1798 // It may be more efficient to lower this to a min and a max operation in
1799 // the higher precision arithmetic if the promoted operation isn't legal,
1800 // but this decision is up to the target's lowering request.
1801 Register DstReg = MI.getOperand(0).getReg();
1802
1803 unsigned NewBits = WideTy.getScalarSizeInBits();
1804 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1805
1806 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1807 // must not left shift the RHS to preserve the shift amount.
1808 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1809 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1810 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1811 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1812 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1813 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1814
1815 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1816 {ShiftL, ShiftR}, MI.getFlags());
1817
1818 // Use a shift that will preserve the number of sign bits when the trunc is
1819 // folded away.
1820 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1821 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1822
1823 MIRBuilder.buildTrunc(DstReg, Result);
1824 MI.eraseFromParent();
1825 return Legalized;
1826}
1827
1828LegalizerHelper::LegalizeResult
1829LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1830 LLT WideTy) {
1831 if (TypeIdx == 1)
1832 return UnableToLegalize;
1833
1834 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1835 Register Result = MI.getOperand(0).getReg();
1836 Register OriginalOverflow = MI.getOperand(1).getReg();
1837 Register LHS = MI.getOperand(2).getReg();
1838 Register RHS = MI.getOperand(3).getReg();
1839 LLT SrcTy = MRI.getType(LHS);
1840 LLT OverflowTy = MRI.getType(OriginalOverflow);
1841 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1842
1843 // To determine if the result overflowed in the larger type, we extend the
1844 // input to the larger type, do the multiply (checking if it overflows),
1845 // then also check the high bits of the result to see if overflow happened
1846 // there.
1847 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1848 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1849 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1850
1851 auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1852 {LeftOperand, RightOperand});
1853 auto Mul = Mulo->getOperand(0);
1854 MIRBuilder.buildTrunc(Result, Mul);
1855
1856 MachineInstrBuilder ExtResult;
1857 // Overflow occurred if it occurred in the larger type, or if the high part
1858 // of the result does not zero/sign-extend the low part. Check this second
1859 // possibility first.
1860 if (IsSigned) {
1861 // For signed, overflow occurred when the high part does not sign-extend
1862 // the low part.
1863 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
1864 } else {
1865 // Unsigned overflow occurred when the high part does not zero-extend the
1866 // low part.
1867 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
1868 }
1869
1870 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
1871 // so we don't need to check the overflow result of larger type Mulo.
1872 if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
1873 auto Overflow =
1874 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
1875 // Finally check if the multiplication in the larger type itself overflowed.
1876 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
1877 } else {
1878 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
1879 }
1880 MI.eraseFromParent();
1881 return Legalized;
1882}
1883
1884LegalizerHelper::LegalizeResult
1885LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1886 switch (MI.getOpcode()) {
1887 default:
1888 return UnableToLegalize;
1889 case TargetOpcode::G_EXTRACT:
1890 return widenScalarExtract(MI, TypeIdx, WideTy);
1891 case TargetOpcode::G_INSERT:
1892 return widenScalarInsert(MI, TypeIdx, WideTy);
1893 case TargetOpcode::G_MERGE_VALUES:
1894 return widenScalarMergeValues(MI, TypeIdx, WideTy);
1895 case TargetOpcode::G_UNMERGE_VALUES:
1896 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1897 case TargetOpcode::G_SADDO:
1898 case TargetOpcode::G_SSUBO:
1899 case TargetOpcode::G_UADDO:
1900 case TargetOpcode::G_USUBO:
1901 case TargetOpcode::G_SADDE:
1902 case TargetOpcode::G_SSUBE:
1903 case TargetOpcode::G_UADDE:
1904 case TargetOpcode::G_USUBE:
1905 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
1906 case TargetOpcode::G_UMULO:
1907 case TargetOpcode::G_SMULO:
1908 return widenScalarMulo(MI, TypeIdx, WideTy);
1909 case TargetOpcode::G_SADDSAT:
1910 case TargetOpcode::G_SSUBSAT:
1911 case TargetOpcode::G_SSHLSAT:
1912 case TargetOpcode::G_UADDSAT:
1913 case TargetOpcode::G_USUBSAT:
1914 case TargetOpcode::G_USHLSAT:
1915 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
1916 case TargetOpcode::G_CTTZ:
1917 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1918 case TargetOpcode::G_CTLZ:
1919 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1920 case TargetOpcode::G_CTPOP: {
1921 if (TypeIdx == 0) {
1922 Observer.changingInstr(MI);
1923 widenScalarDst(MI, WideTy, 0);
1924 Observer.changedInstr(MI);
1925 return Legalized;
1926 }
1927
1928 Register SrcReg = MI.getOperand(1).getReg();
1929
1930 // First ZEXT the input.
1931 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1932 LLT CurTy = MRI.getType(SrcReg);
1933 if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1934 // The count is the same in the larger type except if the original
1935 // value was zero. This can be handled by setting the bit just off
1936 // the top of the original type.
1937 auto TopBit =
1938 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1939 MIBSrc = MIRBuilder.buildOr(
1940 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1941 }
1942
1943 // Perform the operation at the larger size.
1944 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1945 // This is already the correct result for CTPOP and CTTZs
1946 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1947 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1948 // The correct result is NewOp - (Difference in widety and current ty).
1949 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1950 MIBNewOp = MIRBuilder.buildSub(
1951 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1952 }
1953
1954 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1955 MI.eraseFromParent();
1956 return Legalized;
1957 }
1958 case TargetOpcode::G_BSWAP: {
1959 Observer.changingInstr(MI);
1960 Register DstReg = MI.getOperand(0).getReg();
1961
1962 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1963 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1964 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1965 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1966
1967 MI.getOperand(0).setReg(DstExt);
1968
1969 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1970
1971 LLT Ty = MRI.getType(DstReg);
1972 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1973 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1974 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1975
1976 MIRBuilder.buildTrunc(DstReg, ShrReg);
1977 Observer.changedInstr(MI);
1978 return Legalized;
1979 }
1980 case TargetOpcode::G_BITREVERSE: {
1981 Observer.changingInstr(MI);
1982
1983 Register DstReg = MI.getOperand(0).getReg();
1984 LLT Ty = MRI.getType(DstReg);
1985 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1986
1987 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1988 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1989 MI.getOperand(0).setReg(DstExt);
1990 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1991
1992 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1993 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1994 MIRBuilder.buildTrunc(DstReg, Shift);
1995 Observer.changedInstr(MI);
1996 return Legalized;
1997 }
1998 case TargetOpcode::G_FREEZE:
1999 Observer.changingInstr(MI);
2000 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2001 widenScalarDst(MI, WideTy);
2002 Observer.changedInstr(MI);
2003 return Legalized;
2004
2005 case TargetOpcode::G_ABS:
2006 Observer.changingInstr(MI);
2007 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2008 widenScalarDst(MI, WideTy);
2009 Observer.changedInstr(MI);
2010 return Legalized;
2011
2012 case TargetOpcode::G_ADD:
2013 case TargetOpcode::G_AND:
2014 case TargetOpcode::G_MUL:
2015 case TargetOpcode::G_OR:
2016 case TargetOpcode::G_XOR:
2017 case TargetOpcode::G_SUB:
2018 // Perform operation at larger width (any extension is fines here, high bits
2019 // don't affect the result) and then truncate the result back to the
2020 // original type.
2021 Observer.changingInstr(MI);
2022 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2023 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2024 widenScalarDst(MI, WideTy);
2025 Observer.changedInstr(MI);
2026 return Legalized;
2027
2028 case TargetOpcode::G_SHL:
2029 Observer.changingInstr(MI);
2030
2031 if (TypeIdx == 0) {
2032 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2033 widenScalarDst(MI, WideTy);
2034 } else {
2035 assert(TypeIdx == 1)(static_cast <bool> (TypeIdx == 1) ? void (0) : __assert_fail
("TypeIdx == 1", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2035, __extension__ __PRETTY_FUNCTION__))
;
2036 // The "number of bits to shift" operand must preserve its value as an
2037 // unsigned integer:
2038 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2039 }
2040
2041 Observer.changedInstr(MI);
2042 return Legalized;
2043
2044 case TargetOpcode::G_SDIV:
2045 case TargetOpcode::G_SREM:
2046 case TargetOpcode::G_SMIN:
2047 case TargetOpcode::G_SMAX:
2048 Observer.changingInstr(MI);
2049 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2050 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2051 widenScalarDst(MI, WideTy);
2052 Observer.changedInstr(MI);
2053 return Legalized;
2054
2055 case TargetOpcode::G_SDIVREM:
2056 Observer.changingInstr(MI);
2057 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2058 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2059 widenScalarDst(MI, WideTy);
2060 widenScalarDst(MI, WideTy, 1);
2061 Observer.changedInstr(MI);
2062 return Legalized;
2063
2064 case TargetOpcode::G_ASHR:
2065 case TargetOpcode::G_LSHR:
2066 Observer.changingInstr(MI);
2067
2068 if (TypeIdx == 0) {
2069 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2070 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2071
2072 widenScalarSrc(MI, WideTy, 1, CvtOp);
2073 widenScalarDst(MI, WideTy);
2074 } else {
2075 assert(TypeIdx == 1)(static_cast <bool> (TypeIdx == 1) ? void (0) : __assert_fail
("TypeIdx == 1", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2075, __extension__ __PRETTY_FUNCTION__))
;
2076 // The "number of bits to shift" operand must preserve its value as an
2077 // unsigned integer:
2078 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2079 }
2080
2081 Observer.changedInstr(MI);
2082 return Legalized;
2083 case TargetOpcode::G_UDIV:
2084 case TargetOpcode::G_UREM:
2085 case TargetOpcode::G_UMIN:
2086 case TargetOpcode::G_UMAX:
2087 Observer.changingInstr(MI);
2088 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2089 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2090 widenScalarDst(MI, WideTy);
2091 Observer.changedInstr(MI);
2092 return Legalized;
2093
2094 case TargetOpcode::G_UDIVREM:
2095 Observer.changingInstr(MI);
2096 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2097 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2098 widenScalarDst(MI, WideTy);
2099 widenScalarDst(MI, WideTy, 1);
2100 Observer.changedInstr(MI);
2101 return Legalized;
2102
2103 case TargetOpcode::G_SELECT:
2104 Observer.changingInstr(MI);
2105 if (TypeIdx == 0) {
2106 // Perform operation at larger width (any extension is fine here, high
2107 // bits don't affect the result) and then truncate the result back to the
2108 // original type.
2109 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2110 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2111 widenScalarDst(MI, WideTy);
2112 } else {
2113 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2114 // Explicit extension is required here since high bits affect the result.
2115 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2116 }
2117 Observer.changedInstr(MI);
2118 return Legalized;
2119
2120 case TargetOpcode::G_FPTOSI:
2121 case TargetOpcode::G_FPTOUI:
2122 Observer.changingInstr(MI);
2123
2124 if (TypeIdx == 0)
2125 widenScalarDst(MI, WideTy);
2126 else
2127 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2128
2129 Observer.changedInstr(MI);
2130 return Legalized;
2131 case TargetOpcode::G_SITOFP:
2132 Observer.changingInstr(MI);
2133
2134 if (TypeIdx == 0)
2135 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2136 else
2137 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2138
2139 Observer.changedInstr(MI);
2140 return Legalized;
2141 case TargetOpcode::G_UITOFP:
2142 Observer.changingInstr(MI);
2143
2144 if (TypeIdx == 0)
2145 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2146 else
2147 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2148
2149 Observer.changedInstr(MI);
2150 return Legalized;
2151 case TargetOpcode::G_LOAD:
2152 case TargetOpcode::G_SEXTLOAD:
2153 case TargetOpcode::G_ZEXTLOAD:
2154 Observer.changingInstr(MI);
2155 widenScalarDst(MI, WideTy);
2156 Observer.changedInstr(MI);
2157 return Legalized;
2158
2159 case TargetOpcode::G_STORE: {
2160 if (TypeIdx != 0)
2161 return UnableToLegalize;
2162
2163 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2164 if (!Ty.isScalar())
2165 return UnableToLegalize;
2166
2167 Observer.changingInstr(MI);
2168
2169 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2170 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2171 widenScalarSrc(MI, WideTy, 0, ExtType);
2172
2173 Observer.changedInstr(MI);
2174 return Legalized;
2175 }
2176 case TargetOpcode::G_CONSTANT: {
2177 MachineOperand &SrcMO = MI.getOperand(1);
2178 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2179 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2180 MRI.getType(MI.getOperand(0).getReg()));
2181 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||(static_cast <bool> ((ExtOpc == TargetOpcode::G_ZEXT ||
ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT
) && "Illegal Extend") ? void (0) : __assert_fail ("(ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && \"Illegal Extend\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2183, __extension__ __PRETTY_FUNCTION__))
2182 ExtOpc == TargetOpcode::G_ANYEXT) &&(static_cast <bool> ((ExtOpc == TargetOpcode::G_ZEXT ||
ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT
) && "Illegal Extend") ? void (0) : __assert_fail ("(ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && \"Illegal Extend\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2183, __extension__ __PRETTY_FUNCTION__))
2183 "Illegal Extend")(static_cast <bool> ((ExtOpc == TargetOpcode::G_ZEXT ||
ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT
) && "Illegal Extend") ? void (0) : __assert_fail ("(ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ANYEXT) && \"Illegal Extend\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2183, __extension__ __PRETTY_FUNCTION__))
;
2184 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2185 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2186 ? SrcVal.sext(WideTy.getSizeInBits())
2187 : SrcVal.zext(WideTy.getSizeInBits());
2188 Observer.changingInstr(MI);
2189 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2190
2191 widenScalarDst(MI, WideTy);
2192 Observer.changedInstr(MI);
2193 return Legalized;
2194 }
2195 case TargetOpcode::G_FCONSTANT: {
2196 MachineOperand &SrcMO = MI.getOperand(1);
2197 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2198 APFloat Val = SrcMO.getFPImm()->getValueAPF();
2199 bool LosesInfo;
2200 switch (WideTy.getSizeInBits()) {
2201 case 32:
2202 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
2203 &LosesInfo);
2204 break;
2205 case 64:
2206 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
2207 &LosesInfo);
2208 break;
2209 default:
2210 return UnableToLegalize;
2211 }
2212
2213 assert(!LosesInfo && "extend should always be lossless")(static_cast <bool> (!LosesInfo && "extend should always be lossless"
) ? void (0) : __assert_fail ("!LosesInfo && \"extend should always be lossless\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2213, __extension__ __PRETTY_FUNCTION__))
;
2214
2215 Observer.changingInstr(MI);
2216 SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2217
2218 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2219 Observer.changedInstr(MI);
2220 return Legalized;
2221 }
2222 case TargetOpcode::G_IMPLICIT_DEF: {
2223 Observer.changingInstr(MI);
2224 widenScalarDst(MI, WideTy);
2225 Observer.changedInstr(MI);
2226 return Legalized;
2227 }
2228 case TargetOpcode::G_BRCOND:
2229 Observer.changingInstr(MI);
2230 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2231 Observer.changedInstr(MI);
2232 return Legalized;
2233
2234 case TargetOpcode::G_FCMP:
2235 Observer.changingInstr(MI);
2236 if (TypeIdx == 0)
2237 widenScalarDst(MI, WideTy);
2238 else {
2239 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2240 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2241 }
2242 Observer.changedInstr(MI);
2243 return Legalized;
2244
2245 case TargetOpcode::G_ICMP:
2246 Observer.changingInstr(MI);
2247 if (TypeIdx == 0)
2248 widenScalarDst(MI, WideTy);
2249 else {
2250 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2251 MI.getOperand(1).getPredicate()))
2252 ? TargetOpcode::G_SEXT
2253 : TargetOpcode::G_ZEXT;
2254 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2255 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2256 }
2257 Observer.changedInstr(MI);
2258 return Legalized;
2259
2260 case TargetOpcode::G_PTR_ADD:
2261 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD")(static_cast <bool> (TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD"
) ? void (0) : __assert_fail ("TypeIdx == 1 && \"unable to legalize pointer of G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2261, __extension__ __PRETTY_FUNCTION__))
;
2262 Observer.changingInstr(MI);
2263 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2264 Observer.changedInstr(MI);
2265 return Legalized;
2266
2267 case TargetOpcode::G_PHI: {
2268 assert(TypeIdx == 0 && "Expecting only Idx 0")(static_cast <bool> (TypeIdx == 0 && "Expecting only Idx 0"
) ? void (0) : __assert_fail ("TypeIdx == 0 && \"Expecting only Idx 0\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2268, __extension__ __PRETTY_FUNCTION__))
;
2269
2270 Observer.changingInstr(MI);
2271 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2272 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2273 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2274 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2275 }
2276
2277 MachineBasicBlock &MBB = *MI.getParent();
2278 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2279 widenScalarDst(MI, WideTy);
2280 Observer.changedInstr(MI);
2281 return Legalized;
2282 }
2283 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2284 if (TypeIdx == 0) {
2285 Register VecReg = MI.getOperand(1).getReg();
2286 LLT VecTy = MRI.getType(VecReg);
2287 Observer.changingInstr(MI);
2288
2289 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
2290 WideTy.getSizeInBits()),
2291 1, TargetOpcode::G_SEXT);
2292
2293 widenScalarDst(MI, WideTy, 0);
2294 Observer.changedInstr(MI);
2295 return Legalized;
2296 }
2297
2298 if (TypeIdx != 2)
2299 return UnableToLegalize;
2300 Observer.changingInstr(MI);
2301 // TODO: Probably should be zext
2302 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2303 Observer.changedInstr(MI);
2304 return Legalized;
2305 }
2306 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2307 if (TypeIdx == 1) {
2308 Observer.changingInstr(MI);
2309
2310 Register VecReg = MI.getOperand(1).getReg();
2311 LLT VecTy = MRI.getType(VecReg);
2312 LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2313
2314 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2315 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2316 widenScalarDst(MI, WideVecTy, 0);
2317 Observer.changedInstr(MI);
2318 return Legalized;
2319 }
2320
2321 if (TypeIdx == 2) {
2322 Observer.changingInstr(MI);
2323 // TODO: Probably should be zext
2324 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2325 Observer.changedInstr(MI);
2326 return Legalized;
2327 }
2328
2329 return UnableToLegalize;
2330 }
2331 case TargetOpcode::G_FADD:
2332 case TargetOpcode::G_FMUL:
2333 case TargetOpcode::G_FSUB:
2334 case TargetOpcode::G_FMA:
2335 case TargetOpcode::G_FMAD:
2336 case TargetOpcode::G_FNEG:
2337 case TargetOpcode::G_FABS:
2338 case TargetOpcode::G_FCANONICALIZE:
2339 case TargetOpcode::G_FMINNUM:
2340 case TargetOpcode::G_FMAXNUM:
2341 case TargetOpcode::G_FMINNUM_IEEE:
2342 case TargetOpcode::G_FMAXNUM_IEEE:
2343 case TargetOpcode::G_FMINIMUM:
2344 case TargetOpcode::G_FMAXIMUM:
2345 case TargetOpcode::G_FDIV:
2346 case TargetOpcode::G_FREM:
2347 case TargetOpcode::G_FCEIL:
2348 case TargetOpcode::G_FFLOOR:
2349 case TargetOpcode::G_FCOS:
2350 case TargetOpcode::G_FSIN:
2351 case TargetOpcode::G_FLOG10:
2352 case TargetOpcode::G_FLOG:
2353 case TargetOpcode::G_FLOG2:
2354 case TargetOpcode::G_FRINT:
2355 case TargetOpcode::G_FNEARBYINT:
2356 case TargetOpcode::G_FSQRT:
2357 case TargetOpcode::G_FEXP:
2358 case TargetOpcode::G_FEXP2:
2359 case TargetOpcode::G_FPOW:
2360 case TargetOpcode::G_INTRINSIC_TRUNC:
2361 case TargetOpcode::G_INTRINSIC_ROUND:
2362 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2363 assert(TypeIdx == 0)(static_cast <bool> (TypeIdx == 0) ? void (0) : __assert_fail
("TypeIdx == 0", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 2363, __extension__ __PRETTY_FUNCTION__))
;
2364 Observer.changingInstr(MI);
2365
2366 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2367 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2368
2369 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2370 Observer.changedInstr(MI);
2371 return Legalized;
2372 case TargetOpcode::G_FPOWI: {
2373 if (TypeIdx != 0)
2374 return UnableToLegalize;
2375 Observer.changingInstr(MI);
2376 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2377 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2378 Observer.changedInstr(MI);
2379 return Legalized;
2380 }
2381 case TargetOpcode::G_INTTOPTR:
2382 if (TypeIdx != 1)
2383 return UnableToLegalize;
2384
2385 Observer.changingInstr(MI);
2386 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2387 Observer.changedInstr(MI);
2388 return Legalized;
2389 case TargetOpcode::G_PTRTOINT:
2390 if (TypeIdx != 0)
2391 return UnableToLegalize;
2392
2393 Observer.changingInstr(MI);
2394 widenScalarDst(MI, WideTy, 0);
2395 Observer.changedInstr(MI);
2396 return Legalized;
2397 case TargetOpcode::G_BUILD_VECTOR: {
2398 Observer.changingInstr(MI);
2399
2400 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2401 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2402 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2403
2404 // Avoid changing the result vector type if the source element type was
2405 // requested.
2406 if (TypeIdx == 1) {
2407 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2408 } else {
2409 widenScalarDst(MI, WideTy, 0);
2410 }
2411
2412 Observer.changedInstr(MI);
2413 return Legalized;
2414 }
2415 case TargetOpcode::G_SEXT_INREG:
2416 if (TypeIdx != 0)
2417 return UnableToLegalize;
2418
2419 Observer.changingInstr(MI);
2420 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2421 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2422 Observer.changedInstr(MI);
2423 return Legalized;
2424 case TargetOpcode::G_PTRMASK: {
2425 if (TypeIdx != 1)
2426 return UnableToLegalize;
2427 Observer.changingInstr(MI);
2428 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2429 Observer.changedInstr(MI);
2430 return Legalized;
2431 }
2432 }
2433}
2434
2435static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2436 MachineIRBuilder &B, Register Src, LLT Ty) {
2437 auto Unmerge = B.buildUnmerge(Ty, Src);
2438 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2439 Pieces.push_back(Unmerge.getReg(I));
2440}
2441
2442LegalizerHelper::LegalizeResult
2443LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2444 Register Dst = MI.getOperand(0).getReg();
2445 Register Src = MI.getOperand(1).getReg();
2446 LLT DstTy = MRI.getType(Dst);
2447 LLT SrcTy = MRI.getType(Src);
2448
2449 if (SrcTy.isVector()) {
2450 LLT SrcEltTy = SrcTy.getElementType();
2451 SmallVector<Register, 8> SrcRegs;
2452
2453 if (DstTy.isVector()) {
2454 int NumDstElt = DstTy.getNumElements();
2455 int NumSrcElt = SrcTy.getNumElements();
2456
2457 LLT DstEltTy = DstTy.getElementType();
2458 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2459 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2460
2461 // If there's an element size mismatch, insert intermediate casts to match
2462 // the result element type.
2463 if (NumSrcElt < NumDstElt) { // Source element type is larger.
2464 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2465 //
2466 // =>
2467 //
2468 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2469 // %3:_(<2 x s8>) = G_BITCAST %2
2470 // %4:_(<2 x s8>) = G_BITCAST %3
2471 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2472 DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2473 SrcPartTy = SrcEltTy;
2474 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2475 //
2476 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2477 //
2478 // =>
2479 //
2480 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2481 // %3:_(s16) = G_BITCAST %2
2482 // %4:_(s16) = G_BITCAST %3
2483 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2484 SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2485 DstCastTy = DstEltTy;
2486 }
2487
2488 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2489 for (Register &SrcReg : SrcRegs)
2490 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2491 } else
2492 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2493
2494 MIRBuilder.buildMerge(Dst, SrcRegs);
2495 MI.eraseFromParent();
2496 return Legalized;
2497 }
2498
2499 if (DstTy.isVector()) {
2500 SmallVector<Register, 8> SrcRegs;
2501 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2502 MIRBuilder.buildMerge(Dst, SrcRegs);
2503 MI.eraseFromParent();
2504 return Legalized;
2505 }
2506
2507 return UnableToLegalize;
2508}
2509
2510/// Figure out the bit offset into a register when coercing a vector index for
2511/// the wide element type. This is only for the case when promoting vector to
2512/// one with larger elements.
2513//
2514///
2515/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2516/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2517static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
2518 Register Idx,
2519 unsigned NewEltSize,
2520 unsigned OldEltSize) {
2521 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2522 LLT IdxTy = B.getMRI()->getType(Idx);
2523
2524 // Now figure out the amount we need to shift to get the target bits.
2525 auto OffsetMask = B.buildConstant(
2526 IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2527 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2528 return B.buildShl(IdxTy, OffsetIdx,
2529 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2530}
2531
2532/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2533/// is casting to a vector with a smaller element size, perform multiple element
2534/// extracts and merge the results. If this is coercing to a vector with larger
2535/// elements, index the bitcasted vector and extract the target element with bit
2536/// operations. This is intended to force the indexing in the native register
2537/// size for architectures that can dynamically index the register file.
2538LegalizerHelper::LegalizeResult
2539LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
2540 LLT CastTy) {
2541 if (TypeIdx != 1)
2542 return UnableToLegalize;
2543
2544 Register Dst = MI.getOperand(0).getReg();
2545 Register SrcVec = MI.getOperand(1).getReg();
2546 Register Idx = MI.getOperand(2).getReg();
2547 LLT SrcVecTy = MRI.getType(SrcVec);
2548 LLT IdxTy = MRI.getType(Idx);
2549
2550 LLT SrcEltTy = SrcVecTy.getElementType();
2551 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2552 unsigned OldNumElts = SrcVecTy.getNumElements();
2553
2554 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2555 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2556
2557 const unsigned NewEltSize = NewEltTy.getSizeInBits();
2558 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2559 if (NewNumElts > OldNumElts) {
2560 // Decreasing the vector element size
2561 //
2562 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2563 // =>
2564 // v4i32:castx = bitcast x:v2i64
2565 //
2566 // i64 = bitcast
2567 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2568 // (i32 (extract_vector_elt castx, (2 * y + 1)))
2569 //
2570 if (NewNumElts % OldNumElts != 0)
2571 return UnableToLegalize;
2572
2573 // Type of the intermediate result vector.
2574 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2575 LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
2576
2577 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2578
2579 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2580 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2581
2582 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2583 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2584 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2585 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2586 NewOps[I] = Elt.getReg(0);
2587 }
2588
2589 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2590 MIRBuilder.buildBitcast(Dst, NewVec);
2591 MI.eraseFromParent();
2592 return Legalized;
2593 }
2594
2595 if (NewNumElts < OldNumElts) {
2596 if (NewEltSize % OldEltSize != 0)
2597 return UnableToLegalize;
2598
2599 // This only depends on powers of 2 because we use bit tricks to figure out
2600 // the bit offset we need to shift to get the target element. A general
2601 // expansion could emit division/multiply.
2602 if (!isPowerOf2_32(NewEltSize / OldEltSize))
2603 return UnableToLegalize;
2604
2605 // Increasing the vector element size.
2606 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2607 //
2608 // =>
2609 //
2610 // %cast = G_BITCAST %vec
2611 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2612 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2613 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2614 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2615 // %elt_bits = G_LSHR %wide_elt, %offset_bits
2616 // %elt = G_TRUNC %elt_bits
2617
2618 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2619 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2620
2621 // Divide to get the index in the wider element type.
2622 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2623
2624 Register WideElt = CastVec;
2625 if (CastTy.isVector()) {
2626 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2627 ScaledIdx).getReg(0);
2628 }
2629
2630 // Compute the bit offset into the register of the target element.
2631 Register OffsetBits = getBitcastWiderVectorElementOffset(
2632 MIRBuilder, Idx, NewEltSize, OldEltSize);
2633
2634 // Shift the wide element to get the target element.
2635 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2636 MIRBuilder.buildTrunc(Dst, ExtractedBits);
2637 MI.eraseFromParent();
2638 return Legalized;
2639 }
2640
2641 return UnableToLegalize;
2642}
2643
2644/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2645/// TargetReg, while preserving other bits in \p TargetReg.
2646///
2647/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2648static Register buildBitFieldInsert(MachineIRBuilder &B,
2649 Register TargetReg, Register InsertReg,
2650 Register OffsetBits) {
2651 LLT TargetTy = B.getMRI()->getType(TargetReg);
2652 LLT InsertTy = B.getMRI()->getType(InsertReg);
2653 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2654 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2655
2656 // Produce a bitmask of the value to insert
2657 auto EltMask = B.buildConstant(
2658 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2659 InsertTy.getSizeInBits()));
2660 // Shift it into position
2661 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2662 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2663
2664 // Clear out the bits in the wide element
2665 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2666
2667 // The value to insert has all zeros already, so stick it into the masked
2668 // wide element.
2669 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2670}
2671
2672/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2673/// is increasing the element size, perform the indexing in the target element
2674/// type, and use bit operations to insert at the element position. This is
2675/// intended for architectures that can dynamically index the register file and
2676/// want to force indexing in the native register size.
2677LegalizerHelper::LegalizeResult
2678LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
2679 LLT CastTy) {
2680 if (TypeIdx != 0)
2681 return UnableToLegalize;
2682
2683 Register Dst = MI.getOperand(0).getReg();
2684 Register SrcVec = MI.getOperand(1).getReg();
2685 Register Val = MI.getOperand(2).getReg();
2686 Register Idx = MI.getOperand(3).getReg();
2687
2688 LLT VecTy = MRI.getType(Dst);
2689 LLT IdxTy = MRI.getType(Idx);
2690
2691 LLT VecEltTy = VecTy.getElementType();
2692 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2693 const unsigned NewEltSize = NewEltTy.getSizeInBits();
2694 const unsigned OldEltSize = VecEltTy.getSizeInBits();
2695
2696 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2697 unsigned OldNumElts = VecTy.getNumElements();
2698
2699 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2700 if (NewNumElts < OldNumElts) {
2701 if (NewEltSize % OldEltSize != 0)
2702 return UnableToLegalize;
2703
2704 // This only depends on powers of 2 because we use bit tricks to figure out
2705 // the bit offset we need to shift to get the target element. A general
2706 // expansion could emit division/multiply.
2707 if (!isPowerOf2_32(NewEltSize / OldEltSize))
2708 return UnableToLegalize;
2709
2710 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2711 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2712
2713 // Divide to get the index in the wider element type.
2714 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2715
2716 Register ExtractedElt = CastVec;
2717 if (CastTy.isVector()) {
2718 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2719 ScaledIdx).getReg(0);
2720 }
2721
2722 // Compute the bit offset into the register of the target element.
2723 Register OffsetBits = getBitcastWiderVectorElementOffset(
2724 MIRBuilder, Idx, NewEltSize, OldEltSize);
2725
2726 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2727 Val, OffsetBits);
2728 if (CastTy.isVector()) {
2729 InsertedElt = MIRBuilder.buildInsertVectorElement(
2730 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2731 }
2732
2733 MIRBuilder.buildBitcast(Dst, InsertedElt);
2734 MI.eraseFromParent();
2735 return Legalized;
2736 }
2737
2738 return UnableToLegalize;
2739}
2740
2741LegalizerHelper::LegalizeResult
2742LegalizerHelper::lowerLoad(MachineInstr &MI) {
2743 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2744 Register DstReg = MI.getOperand(0).getReg();
2745 Register PtrReg = MI.getOperand(1).getReg();
2746 LLT DstTy = MRI.getType(DstReg);
2747 auto &MMO = **MI.memoperands_begin();
2748
2749 if (DstTy.getSizeInBits() != MMO.getSizeInBits())
2750 return UnableToLegalize;
2751
2752 if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2753 // This load needs splitting into power of 2 sized loads.
2754 if (DstTy.isVector())
2755 return UnableToLegalize;
2756 if (isPowerOf2_32(DstTy.getSizeInBits()))
2757 return UnableToLegalize; // Don't know what we're being asked to do.
2758
2759 // Our strategy here is to generate anyextending loads for the smaller
2760 // types up to next power-2 result type, and then combine the two larger
2761 // result values together, before truncating back down to the non-pow-2
2762 // type.
2763 // E.g. v1 = i24 load =>
2764 // v2 = i32 zextload (2 byte)
2765 // v3 = i32 load (1 byte)
2766 // v4 = i32 shl v3, 16
2767 // v5 = i32 or v4, v2
2768 // v1 = i24 trunc v5
2769 // By doing this we generate the correct truncate which should get
2770 // combined away as an artifact with a matching extend.
2771 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2772 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2773
2774 MachineFunction &MF = MIRBuilder.getMF();
2775 MachineMemOperand *LargeMMO =
2776 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2777 MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2778 &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2779
2780 LLT PtrTy = MRI.getType(PtrReg);
2781 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2782 LLT AnyExtTy = LLT::scalar(AnyExtSize);
2783 auto LargeLoad = MIRBuilder.buildLoadInstr(
2784 TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO);
2785
2786 auto OffsetCst = MIRBuilder.buildConstant(
2787 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2788 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2789 auto SmallPtr =
2790 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
2791 auto SmallLoad = MIRBuilder.buildLoad(AnyExtTy, SmallPtr,
2792 *SmallMMO);
2793
2794 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2795 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2796 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2797 MIRBuilder.buildTrunc(DstReg, {Or});
2798 MI.eraseFromParent();
2799 return Legalized;
2800 }
2801
2802 return UnableToLegalize;
2803}
2804
2805LegalizerHelper::LegalizeResult
2806LegalizerHelper::lowerStore(MachineInstr &MI) {
2807 // Lower a non-power of 2 store into multiple pow-2 stores.
2808 // E.g. split an i24 store into an i16 store + i8 store.
2809 // We do this by first extending the stored value to the next largest power
2810 // of 2 type, and then using truncating stores to store the components.
2811 // By doing this, likewise with G_LOAD, generate an extend that can be
2812 // artifact-combined away instead of leaving behind extracts.
2813 Register SrcReg = MI.getOperand(0).getReg();
2814 Register PtrReg = MI.getOperand(1).getReg();
2815 LLT SrcTy = MRI.getType(SrcReg);
2816 MachineMemOperand &MMO = **MI.memoperands_begin();
2817 if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2818 return UnableToLegalize;
2819 if (SrcTy.isVector())
2820 return UnableToLegalize;
2821 if (isPowerOf2_32(SrcTy.getSizeInBits()))
2822 return UnableToLegalize; // Don't know what we're being asked to do.
2823
2824 // Extend to the next pow-2.
2825 const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2826 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2827
2828 // Obtain the smaller value by shifting away the larger value.
2829 uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2830 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2831 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2832 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2833
2834 // Generate the PtrAdd and truncating stores.
2835 LLT PtrTy = MRI.getType(PtrReg);
2836 auto OffsetCst = MIRBuilder.buildConstant(
2837 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2838 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2839 auto SmallPtr =
2840 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
2841
2842 MachineFunction &MF = MIRBuilder.getMF();
2843 MachineMemOperand *LargeMMO =
2844 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2845 MachineMemOperand *SmallMMO =
2846 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2847 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
2848 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
2849 MI.eraseFromParent();
2850 return Legalized;
2851}
2852
2853LegalizerHelper::LegalizeResult
2854LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2855 switch (MI.getOpcode()) {
2856 case TargetOpcode::G_LOAD: {
2857 if (TypeIdx != 0)
2858 return UnableToLegalize;
2859
2860 Observer.changingInstr(MI);
2861 bitcastDst(MI, CastTy, 0);
2862 Observer.changedInstr(MI);
2863 return Legalized;
2864 }
2865 case TargetOpcode::G_STORE: {
2866 if (TypeIdx != 0)
2867 return UnableToLegalize;
2868
2869 Observer.changingInstr(MI);
2870 bitcastSrc(MI, CastTy, 0);
2871 Observer.changedInstr(MI);
2872 return Legalized;
2873 }
2874 case TargetOpcode::G_SELECT: {
2875 if (TypeIdx != 0)
2876 return UnableToLegalize;
2877
2878 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2879 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "bitcast action not implemented for vector select\n"
; } } while (false)
2880 dbgs() << "bitcast action not implemented for vector select\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "bitcast action not implemented for vector select\n"
; } } while (false)
;
2881 return UnableToLegalize;
2882 }
2883
2884 Observer.changingInstr(MI);
2885 bitcastSrc(MI, CastTy, 2);
2886 bitcastSrc(MI, CastTy, 3);
2887 bitcastDst(MI, CastTy, 0);
2888 Observer.changedInstr(MI);
2889 return Legalized;
2890 }
2891 case TargetOpcode::G_AND:
2892 case TargetOpcode::G_OR:
2893 case TargetOpcode::G_XOR: {
2894 Observer.changingInstr(MI);
2895 bitcastSrc(MI, CastTy, 1);
2896 bitcastSrc(MI, CastTy, 2);
2897 bitcastDst(MI, CastTy, 0);
2898 Observer.changedInstr(MI);
2899 return Legalized;
2900 }
2901 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2902 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
2903 case TargetOpcode::G_INSERT_VECTOR_ELT:
2904 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
2905 default:
2906 return UnableToLegalize;
2907 }
2908}
2909
2910// Legalize an instruction by changing the opcode in place.
2911void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
2912 Observer.changingInstr(MI);
2913 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
2914 Observer.changedInstr(MI);
2915}
2916
2917LegalizerHelper::LegalizeResult
2918LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
2919 using namespace TargetOpcode;
2920
2921 switch(MI.getOpcode()) {
2922 default:
2923 return UnableToLegalize;
2924 case TargetOpcode::G_BITCAST:
2925 return lowerBitcast(MI);
2926 case TargetOpcode::G_SREM:
2927 case TargetOpcode::G_UREM: {
2928 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2929 auto Quot =
2930 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2931 {MI.getOperand(1), MI.getOperand(2)});
2932
2933 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2934 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2935 MI.eraseFromParent();
2936 return Legalized;
2937 }
2938 case TargetOpcode::G_SADDO:
2939 case TargetOpcode::G_SSUBO:
2940 return lowerSADDO_SSUBO(MI);
2941 case TargetOpcode::G_UMULH:
2942 case TargetOpcode::G_SMULH:
2943 return lowerSMULH_UMULH(MI);
2944 case TargetOpcode::G_SMULO:
2945 case TargetOpcode::G_UMULO: {
2946 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2947 // result.
2948 Register Res = MI.getOperand(0).getReg();
2949 Register Overflow = MI.getOperand(1).getReg();
2950 Register LHS = MI.getOperand(2).getReg();
2951 Register RHS = MI.getOperand(3).getReg();
2952 LLT Ty = MRI.getType(Res);
2953
2954 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2955 ? TargetOpcode::G_SMULH
2956 : TargetOpcode::G_UMULH;
2957
2958 Observer.changingInstr(MI);
2959 const auto &TII = MIRBuilder.getTII();
2960 MI.setDesc(TII.get(TargetOpcode::G_MUL));
2961 MI.RemoveOperand(1);
2962 Observer.changedInstr(MI);
2963
2964 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2965 auto Zero = MIRBuilder.buildConstant(Ty, 0);
2966
2967 // Move insert point forward so we can use the Res register if needed.
2968 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2969
2970 // For *signed* multiply, overflow is detected by checking:
2971 // (hi != (lo >> bitwidth-1))
2972 if (Opcode == TargetOpcode::G_SMULH) {
2973 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2974 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2975 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2976 } else {
2977 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2978 }
2979 return Legalized;
2980 }
2981 case TargetOpcode::G_FNEG: {
2982 Register Res = MI.getOperand(0).getReg();
2983 LLT Ty = MRI.getType(Res);
2984
2985 // TODO: Handle vector types once we are able to
2986 // represent them.
2987 if (Ty.isVector())
2988 return UnableToLegalize;
2989 auto SignMask =
2990 MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
2991 Register SubByReg = MI.getOperand(1).getReg();
2992 MIRBuilder.buildXor(Res, SubByReg, SignMask);
2993 MI.eraseFromParent();
2994 return Legalized;
2995 }
2996 case TargetOpcode::G_FSUB: {
2997 Register Res = MI.getOperand(0).getReg();
2998 LLT Ty = MRI.getType(Res);
2999
3000 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3001 // First, check if G_FNEG is marked as Lower. If so, we may
3002 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3003 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3004 return UnableToLegalize;
3005 Register LHS = MI.getOperand(1).getReg();
3006 Register RHS = MI.getOperand(2).getReg();
3007 Register Neg = MRI.createGenericVirtualRegister(Ty);
3008 MIRBuilder.buildFNeg(Neg, RHS);
3009 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3010 MI.eraseFromParent();
3011 return Legalized;
3012 }
3013 case TargetOpcode::G_FMAD:
3014 return lowerFMad(MI);
3015 case TargetOpcode::G_FFLOOR:
3016 return lowerFFloor(MI);
3017 case TargetOpcode::G_INTRINSIC_ROUND:
3018 return lowerIntrinsicRound(MI);
3019 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3020 // Since round even is the assumed rounding mode for unconstrained FP
3021 // operations, rint and roundeven are the same operation.
3022 changeOpcode(MI, TargetOpcode::G_FRINT);
3023 return Legalized;
3024 }
3025 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3026 Register OldValRes = MI.getOperand(0).getReg();
3027 Register SuccessRes = MI.getOperand(1).getReg();
3028 Register Addr = MI.getOperand(2).getReg();
3029 Register CmpVal = MI.getOperand(3).getReg();
3030 Register NewVal = MI.getOperand(4).getReg();
3031 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3032 **MI.memoperands_begin());
3033 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3034 MI.eraseFromParent();
3035 return Legalized;
3036 }
3037 case TargetOpcode::G_LOAD:
3038 case TargetOpcode::G_SEXTLOAD:
3039 case TargetOpcode::G_ZEXTLOAD:
3040 return lowerLoad(MI);
3041 case TargetOpcode::G_STORE:
3042 return lowerStore(MI);
3043 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3044 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3045 case TargetOpcode::G_CTLZ:
3046 case TargetOpcode::G_CTTZ:
3047 case TargetOpcode::G_CTPOP:
3048 return lowerBitCount(MI);
3049 case G_UADDO: {
3050 Register Res = MI.getOperand(0).getReg();
3051 Register CarryOut = MI.getOperand(1).getReg();
3052 Register LHS = MI.getOperand(2).getReg();
3053 Register RHS = MI.getOperand(3).getReg();
3054
3055 MIRBuilder.buildAdd(Res, LHS, RHS);
3056 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3057
3058 MI.eraseFromParent();
3059 return Legalized;
3060 }
3061 case G_UADDE: {
3062 Register Res = MI.getOperand(0).getReg();
3063 Register CarryOut = MI.getOperand(1).getReg();
3064 Register LHS = MI.getOperand(2).getReg();
3065 Register RHS = MI.getOperand(3).getReg();
3066 Register CarryIn = MI.getOperand(4).getReg();
3067 LLT Ty = MRI.getType(Res);
3068
3069 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3070 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3071 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3072 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3073
3074 MI.eraseFromParent();
3075 return Legalized;
3076 }
3077 case G_USUBO: {
3078 Register Res = MI.getOperand(0).getReg();
3079 Register BorrowOut = MI.getOperand(1).getReg();
3080 Register LHS = MI.getOperand(2).getReg();
3081 Register RHS = MI.getOperand(3).getReg();
3082
3083 MIRBuilder.buildSub(Res, LHS, RHS);
3084 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3085
3086 MI.eraseFromParent();
3087 return Legalized;
3088 }
3089 case G_USUBE: {
3090 Register Res = MI.getOperand(0).getReg();
3091 Register BorrowOut = MI.getOperand(1).getReg();
3092 Register LHS = MI.getOperand(2).getReg();
3093 Register RHS = MI.getOperand(3).getReg();
3094 Register BorrowIn = MI.getOperand(4).getReg();
3095 const LLT CondTy = MRI.getType(BorrowOut);
3096 const LLT Ty = MRI.getType(Res);
3097
3098 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3099 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3100 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3101
3102 auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3103 auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3104 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3105
3106 MI.eraseFromParent();
3107 return Legalized;
3108 }
3109 case G_UITOFP:
3110 return lowerUITOFP(MI);
3111 case G_SITOFP:
3112 return lowerSITOFP(MI);
3113 case G_FPTOUI:
3114 return lowerFPTOUI(MI);
3115 case G_FPTOSI:
3116 return lowerFPTOSI(MI);
3117 case G_FPTRUNC:
3118 return lowerFPTRUNC(MI);
3119 case G_FPOWI:
3120 return lowerFPOWI(MI);
3121 case G_SMIN:
3122 case G_SMAX:
3123 case G_UMIN:
3124 case G_UMAX:
3125 return lowerMinMax(MI);
3126 case G_FCOPYSIGN:
3127 return lowerFCopySign(MI);
3128 case G_FMINNUM:
3129 case G_FMAXNUM:
3130 return lowerFMinNumMaxNum(MI);
3131 case G_MERGE_VALUES:
3132 return lowerMergeValues(MI);
3133 case G_UNMERGE_VALUES:
3134 return lowerUnmergeValues(MI);
3135 case TargetOpcode::G_SEXT_INREG: {
3136 assert(MI.getOperand(2).isImm() && "Expected immediate")(static_cast <bool> (MI.getOperand(2).isImm() &&
"Expected immediate") ? void (0) : __assert_fail ("MI.getOperand(2).isImm() && \"Expected immediate\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3136, __extension__ __PRETTY_FUNCTION__))
;
3137 int64_t SizeInBits = MI.getOperand(2).getImm();
3138
3139 Register DstReg = MI.getOperand(0).getReg();
3140 Register SrcReg = MI.getOperand(1).getReg();
3141 LLT DstTy = MRI.getType(DstReg);
3142 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3143
3144 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3145 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3146 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3147 MI.eraseFromParent();
3148 return Legalized;
3149 }
3150 case G_EXTRACT_VECTOR_ELT:
3151 case G_INSERT_VECTOR_ELT:
3152 return lowerExtractInsertVectorElt(MI);
3153 case G_SHUFFLE_VECTOR:
3154 return lowerShuffleVector(MI);
3155 case G_DYN_STACKALLOC:
3156 return lowerDynStackAlloc(MI);
3157 case G_EXTRACT:
3158 return lowerExtract(MI);
3159 case G_INSERT:
3160 return lowerInsert(MI);
3161 case G_BSWAP:
3162 return lowerBswap(MI);
3163 case G_BITREVERSE:
3164 return lowerBitreverse(MI);
3165 case G_READ_REGISTER:
3166 case G_WRITE_REGISTER:
3167 return lowerReadWriteRegister(MI);
3168 case G_UADDSAT:
3169 case G_USUBSAT: {
3170 // Try to make a reasonable guess about which lowering strategy to use. The
3171 // target can override this with custom lowering and calling the
3172 // implementation functions.
3173 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3174 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3175 return lowerAddSubSatToMinMax(MI);
3176 return lowerAddSubSatToAddoSubo(MI);
3177 }
3178 case G_SADDSAT:
3179 case G_SSUBSAT: {
3180 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3181
3182 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3183 // since it's a shorter expansion. However, we would need to figure out the
3184 // preferred boolean type for the carry out for the query.
3185 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3186 return lowerAddSubSatToMinMax(MI);
3187 return lowerAddSubSatToAddoSubo(MI);
3188 }
3189 case G_SSHLSAT:
3190 case G_USHLSAT:
3191 return lowerShlSat(MI);
3192 case G_ABS:
3193 return lowerAbsToAddXor(MI);
3194 case G_SELECT:
3195 return lowerSelect(MI);
3196 case G_SDIVREM:
3197 case G_UDIVREM:
3198 return lowerDIVREM(MI);
3199 case G_FSHL:
3200 case G_FSHR:
3201 return lowerFunnelShift(MI);
3202 case G_ROTL:
3203 case G_ROTR:
3204 return lowerRotate(MI);
3205 }
3206}
3207
3208Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
3209 Align MinAlign) const {
3210 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3211 // datalayout for the preferred alignment. Also there should be a target hook
3212 // for this to allow targets to reduce the alignment and ignore the
3213 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3214 // the type.
3215 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3216}
3217
3218MachineInstrBuilder
3219LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
3220 MachinePointerInfo &PtrInfo) {
3221 MachineFunction &MF = MIRBuilder.getMF();
3222 const DataLayout &DL = MIRBuilder.getDataLayout();
3223 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3224
3225 unsigned AddrSpace = DL.getAllocaAddrSpace();
3226 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3227
3228 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3229 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3230}
3231
3232static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
3233 LLT VecTy) {
3234 int64_t IdxVal;
3235 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3236 return IdxReg;
3237
3238 LLT IdxTy = B.getMRI()->getType(IdxReg);
3239 unsigned NElts = VecTy.getNumElements();
3240 if (isPowerOf2_32(NElts)) {
3241 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3242 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3243 }
3244
3245 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3246 .getReg(0);
3247}
3248
3249Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
3250 Register Index) {
3251 LLT EltTy = VecTy.getElementType();
3252
3253 // Calculate the element offset and add it to the pointer.
3254 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3255 assert(EltSize * 8 == EltTy.getSizeInBits() &&(static_cast <bool> (EltSize * 8 == EltTy.getSizeInBits
() && "Converting bits to bytes lost precision") ? void
(0) : __assert_fail ("EltSize * 8 == EltTy.getSizeInBits() && \"Converting bits to bytes lost precision\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3256, __extension__ __PRETTY_FUNCTION__))
3256 "Converting bits to bytes lost precision")(static_cast <bool> (EltSize * 8 == EltTy.getSizeInBits
() && "Converting bits to bytes lost precision") ? void
(0) : __assert_fail ("EltSize * 8 == EltTy.getSizeInBits() && \"Converting bits to bytes lost precision\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3256, __extension__ __PRETTY_FUNCTION__))
;
3257
3258 Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3259
3260 LLT IdxTy = MRI.getType(Index);
3261 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3262 MIRBuilder.buildConstant(IdxTy, EltSize));
3263
3264 LLT PtrTy = MRI.getType(VecPtr);
3265 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3266}
3267
3268LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
3269 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3270 Register DstReg = MI.getOperand(0).getReg();
3271 LLT DstTy = MRI.getType(DstReg);
3272 LLT LCMTy = getLCMType(DstTy, NarrowTy);
3273
3274 unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3275
3276 auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3277 SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3278
3279 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3280 MI.eraseFromParent();
3281 return Legalized;
3282}
3283
3284// Handle splitting vector operations which need to have the same number of
3285// elements in each type index, but each type index may have a different element
3286// type.
3287//
3288// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3289// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3290// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3291//
3292// Also handles some irregular breakdown cases, e.g.
3293// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3294// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3295// s64 = G_SHL s64, s32
3296LegalizerHelper::LegalizeResult
3297LegalizerHelper::fewerElementsVectorMultiEltType(
3298 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3299 if (TypeIdx != 0)
3300 return UnableToLegalize;
3301
3302 const LLT NarrowTy0 = NarrowTyArg;
3303 const unsigned NewNumElts =
3304 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
3305
3306 const Register DstReg = MI.getOperand(0).getReg();
3307 LLT DstTy = MRI.getType(DstReg);
3308 LLT LeftoverTy0;
3309
3310 // All of the operands need to have the same number of elements, so if we can
3311 // determine a type breakdown for the result type, we can for all of the
3312 // source types.
3313 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3314 if (NumParts < 0)
3315 return UnableToLegalize;
3316
3317 SmallVector<MachineInstrBuilder, 4> NewInsts;
3318
3319 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3320 SmallVector<Register, 4> PartRegs, LeftoverRegs;
3321
3322 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3323 Register SrcReg = MI.getOperand(I).getReg();
3324 LLT SrcTyI = MRI.getType(SrcReg);
3325 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
3326 LLT LeftoverTyI;
3327
3328 // Split this operand into the requested typed registers, and any leftover
3329 // required to reproduce the original type.
3330 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3331 LeftoverRegs))
3332 return UnableToLegalize;
3333
3334 if (I == 1) {
3335 // For the first operand, create an instruction for each part and setup
3336 // the result.
3337 for (Register PartReg : PartRegs) {
3338 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3339 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3340 .addDef(PartDstReg)
3341 .addUse(PartReg));
3342 DstRegs.push_back(PartDstReg);
3343 }
3344
3345 for (Register LeftoverReg : LeftoverRegs) {
3346 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3347 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3348 .addDef(PartDstReg)
3349 .addUse(LeftoverReg));
3350 LeftoverDstRegs.push_back(PartDstReg);
3351 }
3352 } else {
3353 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size())(static_cast <bool> (NewInsts.size() == PartRegs.size()
+ LeftoverRegs.size()) ? void (0) : __assert_fail ("NewInsts.size() == PartRegs.size() + LeftoverRegs.size()"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3353, __extension__ __PRETTY_FUNCTION__))
;
3354
3355 // Add the newly created operand splits to the existing instructions. The
3356 // odd-sized pieces are ordered after the requested NarrowTyArg sized
3357 // pieces.
3358 unsigned InstCount = 0;
3359 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3360 NewInsts[InstCount++].addUse(PartRegs[J]);
3361 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3362 NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3363 }
3364
3365 PartRegs.clear();
3366 LeftoverRegs.clear();
3367 }
3368
3369 // Insert the newly built operations and rebuild the result register.
3370 for (auto &MIB : NewInsts)
3371 MIRBuilder.insertInstr(MIB);
3372
3373 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3374
3375 MI.eraseFromParent();
3376 return Legalized;
3377}
3378
3379LegalizerHelper::LegalizeResult
3380LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
3381 LLT NarrowTy) {
3382 if (TypeIdx != 0)
3383 return UnableToLegalize;
3384
3385 Register DstReg = MI.getOperand(0).getReg();
3386 Register SrcReg = MI.getOperand(1).getReg();
3387 LLT DstTy = MRI.getType(DstReg);
3388 LLT SrcTy = MRI.getType(SrcReg);
3389
3390 LLT NarrowTy0 = NarrowTy;
3391 LLT NarrowTy1;
3392 unsigned NumParts;
3393
3394 if (NarrowTy.isVector()) {
3395 // Uneven breakdown not handled.
3396 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3397 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3398 return UnableToLegalize;
3399
3400 NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
3401 } else {
3402 NumParts = DstTy.getNumElements();
3403 NarrowTy1 = SrcTy.getElementType();
3404 }
3405
3406 SmallVector<Register, 4> SrcRegs, DstRegs;
3407 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3408
3409 for (unsigned I = 0; I < NumParts; ++I) {
3410 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3411 MachineInstr *NewInst =
3412 MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3413
3414 NewInst->setFlags(MI.getFlags());
3415 DstRegs.push_back(DstReg);
3416 }
3417
3418 if (NarrowTy.isVector())
3419 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3420 else
3421 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3422
3423 MI.eraseFromParent();
3424 return Legalized;
3425}
3426
3427LegalizerHelper::LegalizeResult
3428LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
3429 LLT NarrowTy) {
3430 Register DstReg = MI.getOperand(0).getReg();
3431 Register Src0Reg = MI.getOperand(2).getReg();
3432 LLT DstTy = MRI.getType(DstReg);
3433 LLT SrcTy = MRI.getType(Src0Reg);
3434
3435 unsigned NumParts;
3436 LLT NarrowTy0, NarrowTy1;
3437
3438 if (TypeIdx == 0) {
3439 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3440 unsigned OldElts = DstTy.getNumElements();
3441
3442 NarrowTy0 = NarrowTy;
3443 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3444 NarrowTy1 = NarrowTy.isVector() ?
3445 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
3446 SrcTy.getElementType();
3447
3448 } else {
3449 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3450 unsigned OldElts = SrcTy.getNumElements();
3451
3452 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3453 NarrowTy.getNumElements();
3454 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
3455 DstTy.getScalarSizeInBits());
3456 NarrowTy1 = NarrowTy;
3457 }
3458
3459 // FIXME: Don't know how to handle the situation where the small vectors
3460 // aren't all the same size yet.
3461 if (NarrowTy1.isVector() &&
3462 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3463 return UnableToLegalize;
3464
3465 CmpInst::Predicate Pred
3466 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3467
3468 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3469 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3470 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3471
3472 for (unsigned I = 0; I < NumParts; ++I) {
3473 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3474 DstRegs.push_back(DstReg);
3475
3476 if (MI.getOpcode() == TargetOpcode::G_ICMP)
3477 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3478 else {
3479 MachineInstr *NewCmp
3480 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3481 NewCmp->setFlags(MI.getFlags());
3482 }
3483 }
3484
3485 if (NarrowTy1.isVector())
3486 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3487 else
3488 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3489
3490 MI.eraseFromParent();
3491 return Legalized;
3492}
3493
3494LegalizerHelper::LegalizeResult
3495LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
3496 LLT NarrowTy) {
3497 Register DstReg = MI.getOperand(0).getReg();
3498 Register CondReg = MI.getOperand(1).getReg();
3499
3500 unsigned NumParts = 0;
3501 LLT NarrowTy0, NarrowTy1;
3502
3503 LLT DstTy = MRI.getType(DstReg);
3504 LLT CondTy = MRI.getType(CondReg);
3505 unsigned Size = DstTy.getSizeInBits();
3506
3507 assert(TypeIdx == 0 || CondTy.isVector())(static_cast <bool> (TypeIdx == 0 || CondTy.isVector())
? void (0) : __assert_fail ("TypeIdx == 0 || CondTy.isVector()"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3507, __extension__ __PRETTY_FUNCTION__))
;
3508
3509 if (TypeIdx == 0) {
3510 NarrowTy0 = NarrowTy;
3511 NarrowTy1 = CondTy;
3512
3513 unsigned NarrowSize = NarrowTy0.getSizeInBits();
3514 // FIXME: Don't know how to handle the situation where the small vectors
3515 // aren't all the same size yet.
3516 if (Size % NarrowSize != 0)
3517 return UnableToLegalize;
3518
3519 NumParts = Size / NarrowSize;
3520
3521 // Need to break down the condition type
3522 if (CondTy.isVector()) {
3523 if (CondTy.getNumElements() == NumParts)
3524 NarrowTy1 = CondTy.getElementType();
3525 else
3526 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3527 CondTy.getScalarSizeInBits());
3528 }
3529 } else {
3530 NumParts = CondTy.getNumElements();
3531 if (NarrowTy.isVector()) {
3532 // TODO: Handle uneven breakdown.
3533 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3534 return UnableToLegalize;
3535
3536 return UnableToLegalize;
3537 } else {
3538 NarrowTy0 = DstTy.getElementType();
3539 NarrowTy1 = NarrowTy;
3540 }
3541 }
3542
3543 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3544 if (CondTy.isVector())
3545 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3546
3547 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3548 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3549
3550 for (unsigned i = 0; i < NumParts; ++i) {
3551 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3552 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3553 Src1Regs[i], Src2Regs[i]);
3554 DstRegs.push_back(DstReg);
3555 }
3556
3557 if (NarrowTy0.isVector())
3558 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3559 else
3560 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3561
3562 MI.eraseFromParent();
3563 return Legalized;
3564}
3565
3566LegalizerHelper::LegalizeResult
3567LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3568 LLT NarrowTy) {
3569 const Register DstReg = MI.getOperand(0).getReg();
3570 LLT PhiTy = MRI.getType(DstReg);
3571 LLT LeftoverTy;
3572
3573 // All of the operands need to have the same number of elements, so if we can
3574 // determine a type breakdown for the result type, we can for all of the
3575 // source types.
3576 int NumParts, NumLeftover;
3577 std::tie(NumParts, NumLeftover)
3578 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3579 if (NumParts < 0)
3580 return UnableToLegalize;
3581
3582 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3583 SmallVector<MachineInstrBuilder, 4> NewInsts;
3584
3585 const int TotalNumParts = NumParts + NumLeftover;
3586
3587 // Insert the new phis in the result block first.
3588 for (int I = 0; I != TotalNumParts; ++I) {
3589 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3590 Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3591 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3592 .addDef(PartDstReg));
3593 if (I < NumParts)
3594 DstRegs.push_back(PartDstReg);
3595 else
3596 LeftoverDstRegs.push_back(PartDstReg);
3597 }
3598
3599 MachineBasicBlock *MBB = MI.getParent();
3600 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
3601 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3602
3603 SmallVector<Register, 4> PartRegs, LeftoverRegs;
3604
3605 // Insert code to extract the incoming values in each predecessor block.
3606 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3607 PartRegs.clear();
3608 LeftoverRegs.clear();
3609
3610 Register SrcReg = MI.getOperand(I).getReg();
3611 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3612 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3613
3614 LLT Unused;
3615 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3616 LeftoverRegs))
3617 return UnableToLegalize;
3618
3619 // Add the newly created operand splits to the existing instructions. The
3620 // odd-sized pieces are ordered after the requested NarrowTyArg sized
3621 // pieces.
3622 for (int J = 0; J != TotalNumParts; ++J) {
3623 MachineInstrBuilder MIB = NewInsts[J];
3624 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3625 MIB.addMBB(&OpMBB);
3626 }
3627 }
3628
3629 MI.eraseFromParent();
3630 return Legalized;
3631}
3632
3633LegalizerHelper::LegalizeResult
3634LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
3635 unsigned TypeIdx,
3636 LLT NarrowTy) {
3637 if (TypeIdx != 1)
3638 return UnableToLegalize;
3639
3640 const int NumDst = MI.getNumOperands() - 1;
3641 const Register SrcReg = MI.getOperand(NumDst).getReg();
3642 LLT SrcTy = MRI.getType(SrcReg);
3643
3644 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3645
3646 // TODO: Create sequence of extracts.
3647 if (DstTy == NarrowTy)
3648 return UnableToLegalize;
3649
3650 LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3651 if (DstTy == GCDTy) {
3652 // This would just be a copy of the same unmerge.
3653 // TODO: Create extracts, pad with undef and create intermediate merges.
3654 return UnableToLegalize;
3655 }
3656
3657 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3658 const int NumUnmerge = Unmerge->getNumOperands() - 1;
3659 const int PartsPerUnmerge = NumDst / NumUnmerge;
3660
3661 for (int I = 0; I != NumUnmerge; ++I) {
3662 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3663
3664 for (int J = 0; J != PartsPerUnmerge; ++J)
3665 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3666 MIB.addUse(Unmerge.getReg(I));
3667 }
3668
3669 MI.eraseFromParent();
3670 return Legalized;
3671}
3672
3673LegalizerHelper::LegalizeResult
3674LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
3675 LLT NarrowTy) {
3676 Register Result = MI.getOperand(0).getReg();
3677 Register Overflow = MI.getOperand(1).getReg();
3678 Register LHS = MI.getOperand(2).getReg();
3679 Register RHS = MI.getOperand(3).getReg();
3680
3681 LLT SrcTy = MRI.getType(LHS);
3682 if (!SrcTy.isVector())
3683 return UnableToLegalize;
3684
3685 LLT ElementType = SrcTy.getElementType();
3686 LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
3687 const int NumResult = SrcTy.getNumElements();
3688 LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3689
3690 // Unmerge the operands to smaller parts of GCD type.
3691 auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
3692 auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
3693
3694 const int NumOps = UnmergeLHS->getNumOperands() - 1;
3695 const int PartsPerUnmerge = NumResult / NumOps;
3696 LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
3697 LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
3698
3699 // Perform the operation over unmerged parts.
3700 SmallVector<Register, 8> ResultParts;
3701 SmallVector<Register, 8> OverflowParts;
3702 for (int I = 0; I != NumOps; ++I) {
3703 Register Operand1 = UnmergeLHS->getOperand(I).getReg();
3704 Register Operand2 = UnmergeRHS->getOperand(I).getReg();
3705 auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
3706 {Operand1, Operand2});
3707 ResultParts.push_back(PartMul->getOperand(0).getReg());
3708 OverflowParts.push_back(PartMul->getOperand(1).getReg());
3709 }
3710
3711 LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
3712 LLT OverflowLCMTy =
3713 LLT::scalarOrVector(ResultLCMTy.getNumElements(), OverflowElementTy);
3714
3715 // Recombine the pieces to the original result and overflow registers.
3716 buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
3717 buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
3718 MI.eraseFromParent();
3719 return Legalized;
3720}
3721
3722// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3723// a vector
3724//
3725// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3726// undef as necessary.
3727//
3728// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3729// -> <2 x s16>
3730//
3731// %4:_(s16) = G_IMPLICIT_DEF
3732// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3733// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3734// %7:_(<2 x s16>) = G_IMPLICIT_DEF
3735// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3736// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3737LegalizerHelper::LegalizeResult
3738LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
3739 LLT NarrowTy) {
3740 Register DstReg = MI.getOperand(0).getReg();
3741 LLT DstTy = MRI.getType(DstReg);
3742 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3743 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3744
3745 // Break into a common type
3746 SmallVector<Register, 16> Parts;
3747 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3748 extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3749
3750 // Build the requested new merge, padding with undef.
3751 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3752 TargetOpcode::G_ANYEXT);
3753
3754 // Pack into the original result register.
3755 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3756
3757 MI.eraseFromParent();
3758 return Legalized;
3759}
3760
3761LegalizerHelper::LegalizeResult
3762LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
3763 unsigned TypeIdx,
3764 LLT NarrowVecTy) {
3765 Register DstReg = MI.getOperand(0).getReg();
3766 Register SrcVec = MI.getOperand(1).getReg();
3767 Register InsertVal;
3768 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3769
3770 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index")(static_cast <bool> ((IsInsert ? TypeIdx == 0 : TypeIdx
== 1) && "not a vector type index") ? void (0) : __assert_fail
("(IsInsert ? TypeIdx == 0 : TypeIdx == 1) && \"not a vector type index\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3770, __extension__ __PRETTY_FUNCTION__))
;
3771 if (IsInsert)
3772 InsertVal = MI.getOperand(2).getReg();
3773
3774 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3775
3776 // TODO: Handle total scalarization case.
3777 if (!NarrowVecTy.isVector())
3778 return UnableToLegalize;
3779
3780 LLT VecTy = MRI.getType(SrcVec);
3781
3782 // If the index is a constant, we can really break this down as you would
3783 // expect, and index into the target size pieces.
3784 int64_t IdxVal;
3785 auto MaybeCst =
3786 getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
3787 /*HandleFConstants*/ false);
3788 if (MaybeCst) {
3789 IdxVal = MaybeCst->Value.getSExtValue();
3790 // Avoid out of bounds indexing the pieces.
3791 if (IdxVal >= VecTy.getNumElements()) {
3792 MIRBuilder.buildUndef(DstReg);
3793 MI.eraseFromParent();
3794 return Legalized;
3795 }
3796
3797 SmallVector<Register, 8> VecParts;
3798 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3799
3800 // Build a sequence of NarrowTy pieces in VecParts for this operand.
3801 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3802 TargetOpcode::G_ANYEXT);
3803
3804 unsigned NewNumElts = NarrowVecTy.getNumElements();
3805
3806 LLT IdxTy = MRI.getType(Idx);
3807 int64_t PartIdx = IdxVal / NewNumElts;
3808 auto NewIdx =
3809 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3810
3811 if (IsInsert) {
3812 LLT PartTy = MRI.getType(VecParts[PartIdx]);
3813
3814 // Use the adjusted index to insert into one of the subvectors.
3815 auto InsertPart = MIRBuilder.buildInsertVectorElement(
3816 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3817 VecParts[PartIdx] = InsertPart.getReg(0);
3818
3819 // Recombine the inserted subvector with the others to reform the result
3820 // vector.
3821 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3822 } else {
3823 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3824 }
3825
3826 MI.eraseFromParent();
3827 return Legalized;
3828 }
3829
3830 // With a variable index, we can't perform the operation in a smaller type, so
3831 // we're forced to expand this.
3832 //
3833 // TODO: We could emit a chain of compare/select to figure out which piece to
3834 // index.
3835 return lowerExtractInsertVectorElt(MI);
3836}
3837
3838LegalizerHelper::LegalizeResult
3839LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3840 LLT NarrowTy) {
3841 // FIXME: Don't know how to handle secondary types yet.
3842 if (TypeIdx != 0)
3843 return UnableToLegalize;
3844
3845 MachineMemOperand *MMO = *MI.memoperands_begin();
3846
3847 // This implementation doesn't work for atomics. Give up instead of doing
3848 // something invalid.
3849 if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3850 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3851 return UnableToLegalize;
3852
3853 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3854 Register ValReg = MI.getOperand(0).getReg();
3855 Register AddrReg = MI.getOperand(1).getReg();
3856 LLT ValTy = MRI.getType(ValReg);
3857
3858 // FIXME: Do we need a distinct NarrowMemory legalize action?
3859 if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3860 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Can't narrow extload/truncstore\n"
; } } while (false)
;
3861 return UnableToLegalize;
3862 }
3863
3864 int NumParts = -1;
3865 int NumLeftover = -1;
3866 LLT LeftoverTy;
3867 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3868 if (IsLoad) {
3869 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3870 } else {
3871 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3872 NarrowLeftoverRegs)) {
3873 NumParts = NarrowRegs.size();
3874 NumLeftover = NarrowLeftoverRegs.size();
3875 }
3876 }
3877
3878 if (NumParts == -1)
3879 return UnableToLegalize;
3880
3881 LLT PtrTy = MRI.getType(AddrReg);
3882 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
3883
3884 unsigned TotalSize = ValTy.getSizeInBits();
3885
3886 // Split the load/store into PartTy sized pieces starting at Offset. If this
3887 // is a load, return the new registers in ValRegs. For a store, each elements
3888 // of ValRegs should be PartTy. Returns the next offset that needs to be
3889 // handled.
3890 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3891 unsigned Offset) -> unsigned {
3892 MachineFunction &MF = MIRBuilder.getMF();
3893 unsigned PartSize = PartTy.getSizeInBits();
3894 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3895 Offset += PartSize, ++Idx) {
3896 unsigned ByteSize = PartSize / 8;
3897 unsigned ByteOffset = Offset / 8;
3898 Register NewAddrReg;
3899
3900 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3901
3902 MachineMemOperand *NewMMO =
3903 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3904
3905 if (IsLoad) {
3906 Register Dst = MRI.createGenericVirtualRegister(PartTy);
3907 ValRegs.push_back(Dst);
3908 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3909 } else {
3910 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3911 }
3912 }
3913
3914 return Offset;
3915 };
3916
3917 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3918
3919 // Handle the rest of the register if this isn't an even type breakdown.
3920 if (LeftoverTy.isValid())
3921 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3922
3923 if (IsLoad) {
3924 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3925 LeftoverTy, NarrowLeftoverRegs);
3926 }
3927
3928 MI.eraseFromParent();
3929 return Legalized;
3930}
3931
3932LegalizerHelper::LegalizeResult
3933LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
3934 LLT NarrowTy) {
3935 assert(TypeIdx == 0 && "only one type index expected")(static_cast <bool> (TypeIdx == 0 && "only one type index expected"
) ? void (0) : __assert_fail ("TypeIdx == 0 && \"only one type index expected\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3935, __extension__ __PRETTY_FUNCTION__))
;
3936
3937 const unsigned Opc = MI.getOpcode();
3938 const int NumDefOps = MI.getNumExplicitDefs();
3939 const int NumSrcOps = MI.getNumOperands() - NumDefOps;
3940 const unsigned Flags = MI.getFlags();
3941 const unsigned NarrowSize = NarrowTy.getSizeInBits();
3942 const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3943
3944 assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "(static_cast <bool> (MI.getNumOperands() <= 4 &&
"expected instruction with either 1 " "result and 1-3 sources or 2 results and "
"1-2 sources") ? void (0) : __assert_fail ("MI.getNumOperands() <= 4 && \"expected instruction with either 1 \" \"result and 1-3 sources or 2 results and \" \"1-2 sources\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3946, __extension__ __PRETTY_FUNCTION__))
3945 "result and 1-3 sources or 2 results and "(static_cast <bool> (MI.getNumOperands() <= 4 &&
"expected instruction with either 1 " "result and 1-3 sources or 2 results and "
"1-2 sources") ? void (0) : __assert_fail ("MI.getNumOperands() <= 4 && \"expected instruction with either 1 \" \"result and 1-3 sources or 2 results and \" \"1-2 sources\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3946, __extension__ __PRETTY_FUNCTION__))
3946 "1-2 sources")(static_cast <bool> (MI.getNumOperands() <= 4 &&
"expected instruction with either 1 " "result and 1-3 sources or 2 results and "
"1-2 sources") ? void (0) : __assert_fail ("MI.getNumOperands() <= 4 && \"expected instruction with either 1 \" \"result and 1-3 sources or 2 results and \" \"1-2 sources\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 3946, __extension__ __PRETTY_FUNCTION__))
;
3947
3948 SmallVector<Register, 2> DstRegs;
3949 for (int I = 0; I < NumDefOps; ++I)
3950 DstRegs.push_back(MI.getOperand(I).getReg());
3951
3952 // First of all check whether we are narrowing (changing the element type)
3953 // or reducing the vector elements
3954 const LLT DstTy = MRI.getType(DstRegs[0]);
3955 const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3956
3957 SmallVector<Register, 8> ExtractedRegs[3];
3958 SmallVector<Register, 8> Parts;
3959
3960 unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3961
3962 // Break down all the sources into NarrowTy pieces we can operate on. This may
3963 // involve creating merges to a wider type, padded with undef.
3964 for (int I = 0; I != NumSrcOps; ++I) {
3965 Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
3966 LLT SrcTy = MRI.getType(SrcReg);
3967
3968 // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3969 // For fewerElements, this is a smaller vector with the same element type.
3970 LLT OpNarrowTy;
3971 if (IsNarrow) {
3972 OpNarrowTy = NarrowScalarTy;
3973
3974 // In case of narrowing, we need to cast vectors to scalars for this to
3975 // work properly
3976 // FIXME: Can we do without the bitcast here if we're narrowing?
3977 if (SrcTy.isVector()) {
3978 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3979 SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
3980 }
3981 } else {
3982 OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
3983 }
3984
3985 LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
3986
3987 // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
3988 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
3989 TargetOpcode::G_ANYEXT);
3990 }
3991
3992 SmallVector<Register, 8> ResultRegs[2];
3993
3994 // Input operands for each sub-instruction.
3995 SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
3996
3997 int NumParts = ExtractedRegs[0].size();
3998 const unsigned DstSize = DstTy.getSizeInBits();
3999 const LLT DstScalarTy = LLT::scalar(DstSize);
4000
4001 // Narrowing needs to use scalar types
4002 LLT DstLCMTy, NarrowDstTy;
4003 if (IsNarrow) {
4004 DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
4005 NarrowDstTy = NarrowScalarTy;
4006 } else {
4007 DstLCMTy = getLCMType(DstTy, NarrowTy);
4008 NarrowDstTy = NarrowTy;
4009 }
4010
4011 // We widened the source registers to satisfy merge/unmerge size
4012 // constraints. We'll have some extra fully undef parts.
4013 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
4014
4015 for (int I = 0; I != NumRealParts; ++I) {
4016 // Emit this instruction on each of the split pieces.
4017 for (int J = 0; J != NumSrcOps; ++J)
4018 InputRegs[J] = ExtractedRegs[J][I];
4019
4020 MachineInstrBuilder Inst;
4021 if (NumDefOps == 1)
4022 Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
4023 else
4024 Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
4025 Flags);
4026
4027 for (int J = 0; J != NumDefOps; ++J)
4028 ResultRegs[J].push_back(Inst.getReg(J));
4029 }
4030
4031 // Fill out the widened result with undef instead of creating instructions
4032 // with undef inputs.
4033 int NumUndefParts = NumParts - NumRealParts;
4034 if (NumUndefParts != 0) {
4035 Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
4036 for (int I = 0; I != NumDefOps; ++I)
4037 ResultRegs[I].append(NumUndefParts, Undef);
4038 }
4039
4040 // Extract the possibly padded result. Use a scratch register if we need to do
4041 // a final bitcast, otherwise use the original result register.
4042 Register MergeDstReg;
4043 for (int I = 0; I != NumDefOps; ++I) {
4044 if (IsNarrow && DstTy.isVector())
4045 MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
4046 else
4047 MergeDstReg = DstRegs[I];
4048
4049 buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
4050
4051 // Recast to vector if we narrowed a vector
4052 if (IsNarrow && DstTy.isVector())
4053 MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
4054 }
4055
4056 MI.eraseFromParent();
4057 return Legalized;
4058}
4059
4060LegalizerHelper::LegalizeResult
4061LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
4062 LLT NarrowTy) {
4063 Register DstReg = MI.getOperand(0).getReg();
4064 Register SrcReg = MI.getOperand(1).getReg();
4065 int64_t Imm = MI.getOperand(2).getImm();
4066
4067 LLT DstTy = MRI.getType(DstReg);
4068
4069 SmallVector<Register, 8> Parts;
4070 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4071 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
4072
4073 for (Register &R : Parts)
4074 R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
4075
4076 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4077
4078 MI.eraseFromParent();
4079 return Legalized;
4080}
4081
4082LegalizerHelper::LegalizeResult
4083LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
4084 LLT NarrowTy) {
4085 using namespace TargetOpcode;
4086
4087 switch (MI.getOpcode()) {
4088 case G_IMPLICIT_DEF:
4089 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
4090 case G_TRUNC:
4091 case G_AND:
4092 case G_OR:
4093 case G_XOR:
4094 case G_ADD:
4095 case G_SUB:
4096 case G_MUL:
4097 case G_PTR_ADD:
4098 case G_SMULH:
4099 case G_UMULH:
4100 case G_FADD:
4101 case G_FMUL:
4102 case G_FSUB:
4103 case G_FNEG:
4104 case G_FABS:
4105 case G_FCANONICALIZE:
4106 case G_FDIV:
4107 case G_FREM:
4108 case G_FMA:
4109 case G_FMAD:
4110 case G_FPOW:
4111 case G_FEXP:
4112 case G_FEXP2:
4113 case G_FLOG:
4114 case G_FLOG2:
4115 case G_FLOG10:
4116 case G_FNEARBYINT:
4117 case G_FCEIL:
4118 case G_FFLOOR:
4119 case G_FRINT:
4120 case G_INTRINSIC_ROUND:
4121 case G_INTRINSIC_ROUNDEVEN:
4122 case G_INTRINSIC_TRUNC:
4123 case G_FCOS:
4124 case G_FSIN:
4125 case G_FSQRT:
4126 case G_BSWAP:
4127 case G_BITREVERSE:
4128 case G_SDIV:
4129 case G_UDIV:
4130 case G_SREM:
4131 case G_UREM:
4132 case G_SDIVREM:
4133 case G_UDIVREM:
4134 case G_SMIN:
4135 case G_SMAX:
4136 case G_UMIN:
4137 case G_UMAX:
4138 case G_ABS:
4139 case G_FMINNUM:
4140 case G_FMAXNUM:
4141 case G_FMINNUM_IEEE:
4142 case G_FMAXNUM_IEEE:
4143 case G_FMINIMUM:
4144 case G_FMAXIMUM:
4145 case G_FSHL:
4146 case G_FSHR:
4147 case G_FREEZE:
4148 case G_SADDSAT:
4149 case G_SSUBSAT:
4150 case G_UADDSAT:
4151 case G_USUBSAT:
4152 return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4153 case G_UMULO:
4154 case G_SMULO:
4155 return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
4156 case G_SHL:
4157 case G_LSHR:
4158 case G_ASHR:
4159 case G_SSHLSAT:
4160 case G_USHLSAT:
4161 case G_CTLZ:
4162 case G_CTLZ_ZERO_UNDEF:
4163 case G_CTTZ:
4164 case G_CTTZ_ZERO_UNDEF:
4165 case G_CTPOP:
4166 case G_FCOPYSIGN:
4167 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4168 case G_ZEXT:
4169 case G_SEXT:
4170 case G_ANYEXT:
4171 case G_FPEXT:
4172 case G_FPTRUNC:
4173 case G_SITOFP:
4174 case G_UITOFP:
4175 case G_FPTOSI:
4176 case G_FPTOUI:
4177 case G_INTTOPTR:
4178 case G_PTRTOINT:
4179 case G_ADDRSPACE_CAST:
4180 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4181 case G_ICMP:
4182 case G_FCMP:
4183 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4184 case G_SELECT:
4185 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4186 case G_PHI:
4187 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4188 case G_UNMERGE_VALUES:
4189 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4190 case G_BUILD_VECTOR:
4191 assert(TypeIdx == 0 && "not a vector type index")(static_cast <bool> (TypeIdx == 0 && "not a vector type index"
) ? void (0) : __assert_fail ("TypeIdx == 0 && \"not a vector type index\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4191, __extension__ __PRETTY_FUNCTION__))
;
4192 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4193 case G_CONCAT_VECTORS:
4194 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4195 return UnableToLegalize;
4196 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4197 case G_EXTRACT_VECTOR_ELT:
4198 case G_INSERT_VECTOR_ELT:
4199 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4200 case G_LOAD:
4201 case G_STORE:
4202 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
4203 case G_SEXT_INREG:
4204 return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4205 GISEL_VECREDUCE_CASES_NONSEQcase TargetOpcode::G_VECREDUCE_FADD: case TargetOpcode::G_VECREDUCE_FMUL
: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMIN
: case TargetOpcode::G_VECREDUCE_ADD: case TargetOpcode::G_VECREDUCE_MUL
: case TargetOpcode::G_VECREDUCE_AND: case TargetOpcode::G_VECREDUCE_OR
: case TargetOpcode::G_VECREDUCE_XOR: case TargetOpcode::G_VECREDUCE_SMAX
: case TargetOpcode::G_VECREDUCE_SMIN: case TargetOpcode::G_VECREDUCE_UMAX
: case TargetOpcode::G_VECREDUCE_UMIN:
4206 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4207 case G_SHUFFLE_VECTOR:
4208 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4209 default:
4210 return UnableToLegalize;
4211 }
4212}
4213
4214LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
4215 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4216 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR)(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR
) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4216, __extension__ __PRETTY_FUNCTION__))
;
4217 if (TypeIdx != 0)
4218 return UnableToLegalize;
4219
4220 Register DstReg = MI.getOperand(0).getReg();
4221 Register Src1Reg = MI.getOperand(1).getReg();
4222 Register Src2Reg = MI.getOperand(2).getReg();
4223 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4224 LLT DstTy = MRI.getType(DstReg);
4225 LLT Src1Ty = MRI.getType(Src1Reg);
4226 LLT Src2Ty = MRI.getType(Src2Reg);
4227 // The shuffle should be canonicalized by now.
4228 if (DstTy != Src1Ty)
4229 return UnableToLegalize;
4230 if (DstTy != Src2Ty)
4231 return UnableToLegalize;
4232
4233 if (!isPowerOf2_32(DstTy.getNumElements()))
4234 return UnableToLegalize;
4235
4236 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4237 // Further legalization attempts will be needed to do split further.
4238 NarrowTy = DstTy.changeNumElements(DstTy.getNumElements() / 2);
4239 unsigned NewElts = NarrowTy.getNumElements();
4240
4241 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4242 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4243 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4244 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4245 SplitSrc2Regs[1]};
4246
4247 Register Hi, Lo;
4248
4249 // If Lo or Hi uses elements from at most two of the four input vectors, then
4250 // express it as a vector shuffle of those two inputs. Otherwise extract the
4251 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4252 SmallVector<int, 16> Ops;
4253 for (unsigned High = 0; High < 2; ++High) {
4254 Register &Output = High ? Hi : Lo;
4255
4256 // Build a shuffle mask for the output, discovering on the fly which
4257 // input vectors to use as shuffle operands (recorded in InputUsed).
4258 // If building a suitable shuffle vector proves too hard, then bail
4259 // out with useBuildVector set.
4260 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4261 unsigned FirstMaskIdx = High * NewElts;
4262 bool UseBuildVector = false;
4263 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4264 // The mask element. This indexes into the input.
4265 int Idx = Mask[FirstMaskIdx + MaskOffset];
4266
4267 // The input vector this mask element indexes into.
4268 unsigned Input = (unsigned)Idx / NewElts;
4269
4270 if (Input >= array_lengthof(Inputs)) {
4271 // The mask element does not index into any input vector.
4272 Ops.push_back(-1);
4273 continue;
4274 }
4275
4276 // Turn the index into an offset from the start of the input vector.
4277 Idx -= Input * NewElts;
4278
4279 // Find or create a shuffle vector operand to hold this input.
4280 unsigned OpNo;
4281 for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
4282 if (InputUsed[OpNo] == Input) {
4283 // This input vector is already an operand.
4284 break;
4285 } else if (InputUsed[OpNo] == -1U) {
4286 // Create a new operand for this input vector.
4287 InputUsed[OpNo] = Input;
4288 break;
4289 }
4290 }
4291
4292 if (OpNo >= array_lengthof(InputUsed)) {
4293 // More than two input vectors used! Give up on trying to create a
4294 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4295 UseBuildVector = true;
4296 break;
4297 }
4298
4299 // Add the mask index for the new shuffle vector.
4300 Ops.push_back(Idx + OpNo * NewElts);
4301 }
4302
4303 if (UseBuildVector) {
4304 LLT EltTy = NarrowTy.getElementType();
4305 SmallVector<Register, 16> SVOps;
4306
4307 // Extract the input elements by hand.
4308 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4309 // The mask element. This indexes into the input.
4310 int Idx = Mask[FirstMaskIdx + MaskOffset];
4311
4312 // The input vector this mask element indexes into.
4313 unsigned Input = (unsigned)Idx / NewElts;
4314
4315 if (Input >= array_lengthof(Inputs)) {
4316 // The mask element is "undef" or indexes off the end of the input.
4317 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4318 continue;
4319 }
4320
4321 // Turn the index into an offset from the start of the input vector.
4322 Idx -= Input * NewElts;
4323
4324 // Extract the vector element by hand.
4325 SVOps.push_back(MIRBuilder
4326 .buildExtractVectorElement(
4327 EltTy, Inputs[Input],
4328 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
4329 .getReg(0));
4330 }
4331
4332 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4333 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4334 } else if (InputUsed[0] == -1U) {
4335 // No input vectors were used! The result is undefined.
4336 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4337 } else {
4338 Register Op0 = Inputs[InputUsed[0]];
4339 // If only one input was used, use an undefined vector for the other.
4340 Register Op1 = InputUsed[1] == -1U
4341 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4342 : Inputs[InputUsed[1]];
4343 // At least one input vector was used. Create a new shuffle vector.
4344 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4345 }
4346
4347 Ops.clear();
4348 }
4349
4350 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4351 MI.eraseFromParent();
4352 return Legalized;
4353}
4354
4355LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
4356 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4357 unsigned Opc = MI.getOpcode();
4358 assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&(static_cast <bool> (Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD
&& Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
"Sequential reductions not expected") ? void (0) : __assert_fail
("Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && \"Sequential reductions not expected\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4360, __extension__ __PRETTY_FUNCTION__))
4359 Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&(static_cast <bool> (Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD
&& Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
"Sequential reductions not expected") ? void (0) : __assert_fail
("Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && \"Sequential reductions not expected\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4360, __extension__ __PRETTY_FUNCTION__))
4360 "Sequential reductions not expected")(static_cast <bool> (Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD
&& Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
"Sequential reductions not expected") ? void (0) : __assert_fail
("Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && \"Sequential reductions not expected\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4360, __extension__ __PRETTY_FUNCTION__))
;
4361
4362 if (TypeIdx != 1)
4363 return UnableToLegalize;
4364
4365 // The semantics of the normal non-sequential reductions allow us to freely
4366 // re-associate the operation.
4367 Register SrcReg = MI.getOperand(1).getReg();
4368 LLT SrcTy = MRI.getType(SrcReg);
4369 Register DstReg = MI.getOperand(0).getReg();
4370 LLT DstTy = MRI.getType(DstReg);
4371
4372 if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
4373 return UnableToLegalize;
4374
4375 SmallVector<Register> SplitSrcs;
4376 const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
4377 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4378 SmallVector<Register> PartialReductions;
4379 for (unsigned Part = 0; Part < NumParts; ++Part) {
4380 PartialReductions.push_back(
4381 MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4382 }
4383
4384 unsigned ScalarOpc;
4385 switch (Opc) {
4386 case TargetOpcode::G_VECREDUCE_FADD:
4387 ScalarOpc = TargetOpcode::G_FADD;
4388 break;
4389 case TargetOpcode::G_VECREDUCE_FMUL:
4390 ScalarOpc = TargetOpcode::G_FMUL;
4391 break;
4392 case TargetOpcode::G_VECREDUCE_FMAX:
4393 ScalarOpc = TargetOpcode::G_FMAXNUM;
4394 break;
4395 case TargetOpcode::G_VECREDUCE_FMIN:
4396 ScalarOpc = TargetOpcode::G_FMINNUM;
4397 break;
4398 case TargetOpcode::G_VECREDUCE_ADD:
4399 ScalarOpc = TargetOpcode::G_ADD;
4400 break;
4401 case TargetOpcode::G_VECREDUCE_MUL:
4402 ScalarOpc = TargetOpcode::G_MUL;
4403 break;
4404 case TargetOpcode::G_VECREDUCE_AND:
4405 ScalarOpc = TargetOpcode::G_AND;
4406 break;
4407 case TargetOpcode::G_VECREDUCE_OR:
4408 ScalarOpc = TargetOpcode::G_OR;
4409 break;
4410 case TargetOpcode::G_VECREDUCE_XOR:
4411 ScalarOpc = TargetOpcode::G_XOR;
4412 break;
4413 case TargetOpcode::G_VECREDUCE_SMAX:
4414 ScalarOpc = TargetOpcode::G_SMAX;
4415 break;
4416 case TargetOpcode::G_VECREDUCE_SMIN:
4417 ScalarOpc = TargetOpcode::G_SMIN;
4418 break;
4419 case TargetOpcode::G_VECREDUCE_UMAX:
4420 ScalarOpc = TargetOpcode::G_UMAX;
4421 break;
4422 case TargetOpcode::G_VECREDUCE_UMIN:
4423 ScalarOpc = TargetOpcode::G_UMIN;
4424 break;
4425 default:
4426 LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("legalizer")) { dbgs() << "Can't legalize: unknown reduction kind.\n"
; } } while (false)
;
4427 return UnableToLegalize;
4428 }
4429
4430 // If the types involved are powers of 2, we can generate intermediate vector
4431 // ops, before generating a final reduction operation.
4432 if (isPowerOf2_32(SrcTy.getNumElements()) &&
4433 isPowerOf2_32(NarrowTy.getNumElements())) {
4434 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4435 }
4436
4437 Register Acc = PartialReductions[0];
4438 for (unsigned Part = 1; Part < NumParts; ++Part) {
4439 if (Part == NumParts - 1) {
4440 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4441 {Acc, PartialReductions[Part]});
4442 } else {
4443 Acc = MIRBuilder
4444 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4445 .getReg(0);
4446 }
4447 }
4448 MI.eraseFromParent();
4449 return Legalized;
4450}
4451
4452LegalizerHelper::LegalizeResult
4453LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4454 LLT SrcTy, LLT NarrowTy,
4455 unsigned ScalarOpc) {
4456 SmallVector<Register> SplitSrcs;
4457 // Split the sources into NarrowTy size pieces.
4458 extractParts(SrcReg, NarrowTy,
4459 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4460 // We're going to do a tree reduction using vector operations until we have
4461 // one NarrowTy size value left.
4462 while (SplitSrcs.size() > 1) {
4463 SmallVector<Register> PartialRdxs;
4464 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4465 Register LHS = SplitSrcs[Idx];
4466 Register RHS = SplitSrcs[Idx + 1];
4467 // Create the intermediate vector op.
4468 Register Res =
4469 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4470 PartialRdxs.push_back(Res);
4471 }
4472 SplitSrcs = std::move(PartialRdxs);
4473 }
4474 // Finally generate the requested NarrowTy based reduction.
4475 Observer.changingInstr(MI);
4476 MI.getOperand(1).setReg(SplitSrcs[0]);
4477 Observer.changedInstr(MI);
4478 return Legalized;
4479}
4480
4481LegalizerHelper::LegalizeResult
4482LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
4483 const LLT HalfTy, const LLT AmtTy) {
4484
4485 Register InL = MRI.createGenericVirtualRegister(HalfTy);
4486 Register InH = MRI.createGenericVirtualRegister(HalfTy);
4487 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4488
4489 if (Amt.isNullValue()) {
4490 MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4491 MI.eraseFromParent();
4492 return Legalized;
4493 }
4494
4495 LLT NVT = HalfTy;
4496 unsigned NVTBits = HalfTy.getSizeInBits();
4497 unsigned VTBits = 2 * NVTBits;
4498
4499 SrcOp Lo(Register(0)), Hi(Register(0));
4500 if (MI.getOpcode() == TargetOpcode::G_SHL) {
4501 if (Amt.ugt(VTBits)) {
4502 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4503 } else if (Amt.ugt(NVTBits)) {
4504 Lo = MIRBuilder.buildConstant(NVT, 0);
4505 Hi = MIRBuilder.buildShl(NVT, InL,
4506 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4507 } else if (Amt == NVTBits) {
4508 Lo = MIRBuilder.buildConstant(NVT, 0);
4509 Hi = InL;
4510 } else {
4511 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4512 auto OrLHS =
4513 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4514 auto OrRHS = MIRBuilder.buildLShr(
4515 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4516 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4517 }
4518 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4519 if (Amt.ugt(VTBits)) {
4520 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4521 } else if (Amt.ugt(NVTBits)) {
4522 Lo = MIRBuilder.buildLShr(NVT, InH,
4523 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4524 Hi = MIRBuilder.buildConstant(NVT, 0);
4525 } else if (Amt == NVTBits) {
4526 Lo = InH;
4527 Hi = MIRBuilder.buildConstant(NVT, 0);
4528 } else {
4529 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4530
4531 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4532 auto OrRHS = MIRBuilder.buildShl(
4533 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4534
4535 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4536 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4537 }
4538 } else {
4539 if (Amt.ugt(VTBits)) {
4540 Hi = Lo = MIRBuilder.buildAShr(
4541 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4542 } else if (Amt.ugt(NVTBits)) {
4543 Lo = MIRBuilder.buildAShr(NVT, InH,
4544 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4545 Hi = MIRBuilder.buildAShr(NVT, InH,
4546 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4547 } else if (Amt == NVTBits) {
4548 Lo = InH;
4549 Hi = MIRBuilder.buildAShr(NVT, InH,
4550 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4551 } else {
4552 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4553
4554 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4555 auto OrRHS = MIRBuilder.buildShl(
4556 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4557
4558 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4559 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4560 }
4561 }
4562
4563 MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4564 MI.eraseFromParent();
4565
4566 return Legalized;
4567}
4568
4569// TODO: Optimize if constant shift amount.
4570LegalizerHelper::LegalizeResult
4571LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
4572 LLT RequestedTy) {
4573 if (TypeIdx == 1) {
4574 Observer.changingInstr(MI);
4575 narrowScalarSrc(MI, RequestedTy, 2);
4576 Observer.changedInstr(MI);
4577 return Legalized;
4578 }
4579
4580 Register DstReg = MI.getOperand(0).getReg();
4581 LLT DstTy = MRI.getType(DstReg);
4582 if (DstTy.isVector())
4583 return UnableToLegalize;
4584
4585 Register Amt = MI.getOperand(2).getReg();
4586 LLT ShiftAmtTy = MRI.getType(Amt);
4587 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4588 if (DstEltSize % 2 != 0)
4589 return UnableToLegalize;
4590
4591 // Ignore the input type. We can only go to exactly half the size of the
4592 // input. If that isn't small enough, the resulting pieces will be further
4593 // legalized.
4594 const unsigned NewBitSize = DstEltSize / 2;
4595 const LLT HalfTy = LLT::scalar(NewBitSize);
4596 const LLT CondTy = LLT::scalar(1);
4597
4598 if (const MachineInstr *KShiftAmt =
4599 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4600 return narrowScalarShiftByConstant(
4601 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4602 }
4603
4604 // TODO: Expand with known bits.
4605
4606 // Handle the fully general expansion by an unknown amount.
4607 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4608
4609 Register InL = MRI.createGenericVirtualRegister(HalfTy);
4610 Register InH = MRI.createGenericVirtualRegister(HalfTy);
4611 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4612
4613 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4614 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4615
4616 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4617 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4618 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4619
4620 Register ResultRegs[2];
4621 switch (MI.getOpcode()) {
4622 case TargetOpcode::G_SHL: {
4623 // Short: ShAmt < NewBitSize
4624 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4625
4626 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4627 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4628 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4629
4630 // Long: ShAmt >= NewBitSize
4631 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4632 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4633
4634 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4635 auto Hi = MIRBuilder.buildSelect(
4636 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4637
4638 ResultRegs[0] = Lo.getReg(0);
4639 ResultRegs[1] = Hi.getReg(0);
4640 break;
4641 }
4642 case TargetOpcode::G_LSHR:
4643 case TargetOpcode::G_ASHR: {
4644 // Short: ShAmt < NewBitSize
4645 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4646
4647 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4648 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4649 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4650
4651 // Long: ShAmt >= NewBitSize
4652 MachineInstrBuilder HiL;
4653 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4654 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4655 } else {
4656 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4657 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
4658 }
4659 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4660 {InH, AmtExcess}); // Lo from Hi part.
4661
4662 auto Lo = MIRBuilder.buildSelect(
4663 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4664
4665 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4666
4667 ResultRegs[0] = Lo.getReg(0);
4668 ResultRegs[1] = Hi.getReg(0);
4669 break;
4670 }
4671 default:
4672 llvm_unreachable("not a shift")::llvm::llvm_unreachable_internal("not a shift", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4672)
;
4673 }
4674
4675 MIRBuilder.buildMerge(DstReg, ResultRegs);
4676 MI.eraseFromParent();
4677 return Legalized;
4678}
4679
4680LegalizerHelper::LegalizeResult
4681LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
4682 LLT MoreTy) {
4683 assert(TypeIdx == 0 && "Expecting only Idx 0")(static_cast <bool> (TypeIdx == 0 && "Expecting only Idx 0"
) ? void (0) : __assert_fail ("TypeIdx == 0 && \"Expecting only Idx 0\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4683, __extension__ __PRETTY_FUNCTION__))
;
4684
4685 Observer.changingInstr(MI);
4686 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4687 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4688 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4689 moreElementsVectorSrc(MI, MoreTy, I);
4690 }
4691
4692 MachineBasicBlock &MBB = *MI.getParent();
4693 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
4694 moreElementsVectorDst(MI, MoreTy, 0);
4695 Observer.changedInstr(MI);
4696 return Legalized;
4697}
4698
4699LegalizerHelper::LegalizeResult
4700LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
4701 LLT MoreTy) {
4702 unsigned Opc = MI.getOpcode();
4703 switch (Opc) {
4704 case TargetOpcode::G_IMPLICIT_DEF:
4705 case TargetOpcode::G_LOAD: {
4706 if (TypeIdx != 0)
4707 return UnableToLegalize;
4708 Observer.changingInstr(MI);
4709 moreElementsVectorDst(MI, MoreTy, 0);
4710 Observer.changedInstr(MI);
4711 return Legalized;
4712 }
4713 case TargetOpcode::G_STORE:
4714 if (TypeIdx != 0)
4715 return UnableToLegalize;
4716 Observer.changingInstr(MI);
4717 moreElementsVectorSrc(MI, MoreTy, 0);
4718 Observer.changedInstr(MI);
4719 return Legalized;
4720 case TargetOpcode::G_AND:
4721 case TargetOpcode::G_OR:
4722 case TargetOpcode::G_XOR:
4723 case TargetOpcode::G_SMIN:
4724 case TargetOpcode::G_SMAX:
4725 case TargetOpcode::G_UMIN:
4726 case TargetOpcode::G_UMAX:
4727 case TargetOpcode::G_FMINNUM:
4728 case TargetOpcode::G_FMAXNUM:
4729 case TargetOpcode::G_FMINNUM_IEEE:
4730 case TargetOpcode::G_FMAXNUM_IEEE:
4731 case TargetOpcode::G_FMINIMUM:
4732 case TargetOpcode::G_FMAXIMUM: {
4733 Observer.changingInstr(MI);
4734 moreElementsVectorSrc(MI, MoreTy, 1);
4735 moreElementsVectorSrc(MI, MoreTy, 2);
4736 moreElementsVectorDst(MI, MoreTy, 0);
4737 Observer.changedInstr(MI);
4738 return Legalized;
4739 }
4740 case TargetOpcode::G_EXTRACT:
4741 if (TypeIdx != 1)
4742 return UnableToLegalize;
4743 Observer.changingInstr(MI);
4744 moreElementsVectorSrc(MI, MoreTy, 1);
4745 Observer.changedInstr(MI);
4746 return Legalized;
4747 case TargetOpcode::G_INSERT:
4748 case TargetOpcode::G_FREEZE:
4749 if (TypeIdx != 0)
4750 return UnableToLegalize;
4751 Observer.changingInstr(MI);
4752 moreElementsVectorSrc(MI, MoreTy, 1);
4753 moreElementsVectorDst(MI, MoreTy, 0);
4754 Observer.changedInstr(MI);
4755 return Legalized;
4756 case TargetOpcode::G_SELECT:
4757 if (TypeIdx != 0)
4758 return UnableToLegalize;
4759 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4760 return UnableToLegalize;
4761
4762 Observer.changingInstr(MI);
4763 moreElementsVectorSrc(MI, MoreTy, 2);
4764 moreElementsVectorSrc(MI, MoreTy, 3);
4765 moreElementsVectorDst(MI, MoreTy, 0);
4766 Observer.changedInstr(MI);
4767 return Legalized;
4768 case TargetOpcode::G_UNMERGE_VALUES: {
4769 if (TypeIdx != 1)
4770 return UnableToLegalize;
4771
4772 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4773 int NumDst = MI.getNumOperands() - 1;
4774 moreElementsVectorSrc(MI, MoreTy, NumDst);
4775
4776 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4777 for (int I = 0; I != NumDst; ++I)
4778 MIB.addDef(MI.getOperand(I).getReg());
4779
4780 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
4781 for (int I = NumDst; I != NewNumDst; ++I)
4782 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
4783
4784 MIB.addUse(MI.getOperand(NumDst).getReg());
4785 MI.eraseFromParent();
4786 return Legalized;
4787 }
4788 case TargetOpcode::G_PHI:
4789 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
4790 default:
4791 return UnableToLegalize;
4792 }
4793}
4794
4795void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
4796 ArrayRef<Register> Src1Regs,
4797 ArrayRef<Register> Src2Regs,
4798 LLT NarrowTy) {
4799 MachineIRBuilder &B = MIRBuilder;
4800 unsigned SrcParts = Src1Regs.size();
4801 unsigned DstParts = DstRegs.size();
4802
4803 unsigned DstIdx = 0; // Low bits of the result.
4804 Register FactorSum =
4805 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4806 DstRegs[DstIdx] = FactorSum;
4807
4808 unsigned CarrySumPrevDstIdx;
4809 SmallVector<Register, 4> Factors;
4810
4811 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4812 // Collect low parts of muls for DstIdx.
4813 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4814 i <= std::min(DstIdx, SrcParts - 1); ++i) {
4815 MachineInstrBuilder Mul =
4816 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
4817 Factors.push_back(Mul.getReg(0));
4818 }
4819 // Collect high parts of muls from previous DstIdx.
4820 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4821 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
4822 MachineInstrBuilder Umulh =
4823 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
4824 Factors.push_back(Umulh.getReg(0));
4825 }
4826 // Add CarrySum from additions calculated for previous DstIdx.
4827 if (DstIdx != 1) {
4828 Factors.push_back(CarrySumPrevDstIdx);
4829 }
4830
4831 Register CarrySum;
4832 // Add all factors and accumulate all carries into CarrySum.
4833 if (DstIdx != DstParts - 1) {
4834 MachineInstrBuilder Uaddo =
4835 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
4836 FactorSum = Uaddo.getReg(0);
4837 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
4838 for (unsigned i = 2; i < Factors.size(); ++i) {
4839 MachineInstrBuilder Uaddo =
4840 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
4841 FactorSum = Uaddo.getReg(0);
4842 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
4843 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4844 }
4845 } else {
4846 // Since value for the next index is not calculated, neither is CarrySum.
4847 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4848 for (unsigned i = 2; i < Factors.size(); ++i)
4849 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
4850 }
4851
4852 CarrySumPrevDstIdx = CarrySum;
4853 DstRegs[DstIdx] = FactorSum;
4854 Factors.clear();
4855 }
4856}
4857
4858LegalizerHelper::LegalizeResult
4859LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
4860 LLT NarrowTy) {
4861 if (TypeIdx != 0)
4862 return UnableToLegalize;
4863
4864 Register DstReg = MI.getOperand(0).getReg();
4865 LLT DstType = MRI.getType(DstReg);
4866 // FIXME: add support for vector types
4867 if (DstType.isVector())
4868 return UnableToLegalize;
4869
4870 unsigned Opcode = MI.getOpcode();
4871 unsigned OpO, OpE, OpF;
4872 switch (Opcode) {
4873 case TargetOpcode::G_SADDO:
4874 case TargetOpcode::G_SADDE:
4875 case TargetOpcode::G_UADDO:
4876 case TargetOpcode::G_UADDE:
4877 case TargetOpcode::G_ADD:
4878 OpO = TargetOpcode::G_UADDO;
4879 OpE = TargetOpcode::G_UADDE;
4880 OpF = TargetOpcode::G_UADDE;
4881 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
4882 OpF = TargetOpcode::G_SADDE;
4883 break;
4884 case TargetOpcode::G_SSUBO:
4885 case TargetOpcode::G_SSUBE:
4886 case TargetOpcode::G_USUBO:
4887 case TargetOpcode::G_USUBE:
4888 case TargetOpcode::G_SUB:
4889 OpO = TargetOpcode::G_USUBO;
4890 OpE = TargetOpcode::G_USUBE;
4891 OpF = TargetOpcode::G_USUBE;
4892 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
4893 OpF = TargetOpcode::G_SSUBE;
4894 break;
4895 default:
4896 llvm_unreachable("Unexpected add/sub opcode!")::llvm::llvm_unreachable_internal("Unexpected add/sub opcode!"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 4896)
;
4897 }
4898
4899 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
4900 unsigned NumDefs = MI.getNumExplicitDefs();
4901 Register Src1 = MI.getOperand(NumDefs).getReg();
4902 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
4903 Register CarryDst, CarryIn;
4904 if (NumDefs == 2)
4905 CarryDst = MI.getOperand(1).getReg();
4906 if (MI.getNumOperands() == NumDefs + 3)
4907 CarryIn = MI.getOperand(NumDefs + 2).getReg();
4908
4909 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
4910 LLT LeftoverTy, DummyTy;
4911 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
4912 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
4913 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
4914
4915 int NarrowParts = Src1Regs.size();
4916 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
4917 Src1Regs.push_back(Src1Left[I]);
4918 Src2Regs.push_back(Src2Left[I]);
4919 }
4920 DstRegs.reserve(Src1Regs.size());
4921
4922 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
4923 Register DstReg =
4924 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
4925 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
4926 // Forward the final carry-out to the destination register
4927 if (i == e - 1 && CarryDst)
4928 CarryOut = CarryDst;
4929
4930 if (!CarryIn) {
4931 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
4932 {Src1Regs[i], Src2Regs[i]});
4933 } else if (i == e - 1) {
4934 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
4935 {Src1Regs[i], Src2Regs[i], CarryIn});
4936 } else {
4937 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
4938 {Src1Regs[i], Src2Regs[i], CarryIn});
4939 }
4940
4941 DstRegs.push_back(DstReg);
4942 CarryIn = CarryOut;
4943 }
4944 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
4945 makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
4946 makeArrayRef(DstRegs).drop_front(NarrowParts));
4947
4948 MI.eraseFromParent();
4949 return Legalized;
4950}
4951
4952LegalizerHelper::LegalizeResult
4953LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
4954 Register DstReg = MI.getOperand(0).getReg();
4955 Register Src1 = MI.getOperand(1).getReg();
4956 Register Src2 = MI.getOperand(2).getReg();
4957
4958 LLT Ty = MRI.getType(DstReg);
4959 if (Ty.isVector())
4960 return UnableToLegalize;
4961
4962 unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
4963 unsigned DstSize = Ty.getSizeInBits();
4964 unsigned NarrowSize = NarrowTy.getSizeInBits();
4965 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
4966 return UnableToLegalize;
4967
4968 unsigned NumDstParts = DstSize / NarrowSize;
4969 unsigned NumSrcParts = SrcSize / NarrowSize;
4970 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
4971 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
4972
4973 SmallVector<Register, 2> Src1Parts, Src2Parts;
4974 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
4975 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
4976 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
4977 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
4978
4979 // Take only high half of registers if this is high mul.
4980 ArrayRef<Register> DstRegs(
4981 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
4982 MIRBuilder.buildMerge(DstReg, DstRegs);
4983 MI.eraseFromParent();
4984 return Legalized;
4985}
4986
4987LegalizerHelper::LegalizeResult
4988LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
4989 LLT NarrowTy) {
4990 if (TypeIdx != 0)
4991 return UnableToLegalize;
4992
4993 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
4994
4995 Register Src = MI.getOperand(1).getReg();
4996 LLT SrcTy = MRI.getType(Src);
4997
4998 // If all finite floats fit into the narrowed integer type, we can just swap
4999 // out the result type. This is practically only useful for conversions from
5000 // half to at least 16-bits, so just handle the one case.
5001 if (SrcTy.getScalarType() != LLT::scalar(16) ||
5002 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
5003 return UnableToLegalize;
5004
5005 Observer.changingInstr(MI);
5006 narrowScalarDst(MI, NarrowTy, 0,
5007 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
5008 Observer.changedInstr(MI);
5009 return Legalized;
5010}
5011
5012LegalizerHelper::LegalizeResult
5013LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
5014 LLT NarrowTy) {
5015 if (TypeIdx != 1)
5016 return UnableToLegalize;
5017
5018 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5019
5020 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5021 // FIXME: add support for when SizeOp1 isn't an exact multiple of
5022 // NarrowSize.
5023 if (SizeOp1 % NarrowSize != 0)
5024 return UnableToLegalize;
5025 int NumParts = SizeOp1 / NarrowSize;
5026
5027 SmallVector<Register, 2> SrcRegs, DstRegs;
5028 SmallVector<uint64_t, 2> Indexes;
5029 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
5030
5031 Register OpReg = MI.getOperand(0).getReg();
5032 uint64_t OpStart = MI.getOperand(2).getImm();
5033 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5034 for (int i = 0; i < NumParts; ++i) {
5035 unsigned SrcStart = i * NarrowSize;
5036
5037 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
5038 // No part of the extract uses this subregister, ignore it.
5039 continue;
5040 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5041 // The entire subregister is extracted, forward the value.
5042 DstRegs.push_back(SrcRegs[i]);
5043 continue;
5044 }
5045
5046 // OpSegStart is where this destination segment would start in OpReg if it
5047 // extended infinitely in both directions.
5048 int64_t ExtractOffset;
5049 uint64_t SegSize;
5050 if (OpStart < SrcStart) {
5051 ExtractOffset = 0;
5052 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
5053 } else {
5054 ExtractOffset = OpStart - SrcStart;
5055 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
5056 }
5057
5058 Register SegReg = SrcRegs[i];
5059 if (ExtractOffset != 0 || SegSize != NarrowSize) {
5060 // A genuine extract is needed.
5061 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
5062 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
5063 }
5064
5065 DstRegs.push_back(SegReg);
5066 }
5067
5068 Register DstReg = MI.getOperand(0).getReg();
5069 if (MRI.getType(DstReg).isVector())
5070 MIRBuilder.buildBuildVector(DstReg, DstRegs);
5071 else if (DstRegs.size() > 1)
5072 MIRBuilder.buildMerge(DstReg, DstRegs);
5073 else
5074 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
5075 MI.eraseFromParent();
5076 return Legalized;
5077}
5078
5079LegalizerHelper::LegalizeResult
5080LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
5081 LLT NarrowTy) {
5082 // FIXME: Don't know how to handle secondary types yet.
5083 if (TypeIdx != 0)
5084 return UnableToLegalize;
5085
5086 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
5087 SmallVector<uint64_t, 2> Indexes;
5088 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5089 LLT LeftoverTy;
5090 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
5091 LeftoverRegs);
5092
5093 for (Register Reg : LeftoverRegs)
5094 SrcRegs.push_back(Reg);
5095
5096 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5097 Register OpReg = MI.getOperand(2).getReg();
5098 uint64_t OpStart = MI.getOperand(3).getImm();
5099 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5100 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
5101 unsigned DstStart = I * NarrowSize;
5102
5103 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5104 // The entire subregister is defined by this insert, forward the new
5105 // value.
5106 DstRegs.push_back(OpReg);
5107 continue;
5108 }
5109
5110 Register SrcReg = SrcRegs[I];
5111 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
5112 // The leftover reg is smaller than NarrowTy, so we need to extend it.
5113 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
5114 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
5115 }
5116
5117 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
5118 // No part of the insert affects this subregister, forward the original.
5119 DstRegs.push_back(SrcReg);
5120 continue;
5121 }
5122
5123 // OpSegStart is where this destination segment would start in OpReg if it
5124 // extended infinitely in both directions.
5125 int64_t ExtractOffset, InsertOffset;
5126 uint64_t SegSize;
5127 if (OpStart < DstStart) {
5128 InsertOffset = 0;
5129 ExtractOffset = DstStart - OpStart;
5130 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
5131 } else {
5132 InsertOffset = OpStart - DstStart;
5133 ExtractOffset = 0;
5134 SegSize =
5135 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
5136 }
5137
5138 Register SegReg = OpReg;
5139 if (ExtractOffset != 0 || SegSize != OpSize) {
5140 // A genuine extract is needed.
5141 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
5142 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
5143 }
5144
5145 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
5146 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
5147 DstRegs.push_back(DstReg);
5148 }
5149
5150 uint64_t WideSize = DstRegs.size() * NarrowSize;
5151 Register DstReg = MI.getOperand(0).getReg();
5152 if (WideSize > RegTy.getSizeInBits()) {
5153 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
5154 MIRBuilder.buildMerge(MergeReg, DstRegs);
5155 MIRBuilder.buildTrunc(DstReg, MergeReg);
5156 } else
5157 MIRBuilder.buildMerge(DstReg, DstRegs);
5158
5159 MI.eraseFromParent();
5160 return Legalized;
5161}
5162
5163LegalizerHelper::LegalizeResult
5164LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
5165 LLT NarrowTy) {
5166 Register DstReg = MI.getOperand(0).getReg();
5167 LLT DstTy = MRI.getType(DstReg);
5168
5169 assert(MI.getNumOperands() == 3 && TypeIdx == 0)(static_cast <bool> (MI.getNumOperands() == 3 &&
TypeIdx == 0) ? void (0) : __assert_fail ("MI.getNumOperands() == 3 && TypeIdx == 0"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 5169, __extension__ __PRETTY_FUNCTION__))
;
5170
5171 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
5172 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
5173 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
5174 LLT LeftoverTy;
5175 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
5176 Src0Regs, Src0LeftoverRegs))
5177 return UnableToLegalize;
5178
5179 LLT Unused;
5180 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
5181 Src1Regs, Src1LeftoverRegs))
5182 llvm_unreachable("inconsistent extractParts result")::llvm::llvm_unreachable_internal("inconsistent extractParts result"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 5182)
;
5183
5184 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
5185 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
5186 {Src0Regs[I], Src1Regs[I]});
5187 DstRegs.push_back(Inst.getReg(0));
5188 }
5189
5190 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
5191 auto Inst = MIRBuilder.buildInstr(
5192 MI.getOpcode(),
5193 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
5194 DstLeftoverRegs.push_back(Inst.getReg(0));
5195 }
5196
5197 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
5198 LeftoverTy, DstLeftoverRegs);
5199
5200 MI.eraseFromParent();
5201 return Legalized;
5202}
5203
5204LegalizerHelper::LegalizeResult
5205LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
5206 LLT NarrowTy) {
5207 if (TypeIdx != 0)
5208 return UnableToLegalize;
5209
5210 Register DstReg = MI.getOperand(0).getReg();
5211 Register SrcReg = MI.getOperand(1).getReg();
5212
5213 LLT DstTy = MRI.getType(DstReg);
5214 if (DstTy.isVector())
5215 return UnableToLegalize;
5216
5217 SmallVector<Register, 8> Parts;
5218 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
5219 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
5220 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
5221
5222 MI.eraseFromParent();
5223 return Legalized;
5224}
5225
5226LegalizerHelper::LegalizeResult
5227LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
5228 LLT NarrowTy) {
5229 if (TypeIdx != 0)
5230 return UnableToLegalize;
5231
5232 Register CondReg = MI.getOperand(1).getReg();
5233 LLT CondTy = MRI.getType(CondReg);
5234 if (CondTy.isVector()) // TODO: Handle vselect
5235 return UnableToLegalize;
5236
5237 Register DstReg = MI.getOperand(0).getReg();
5238 LLT DstTy = MRI.getType(DstReg);
5239
5240 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
5241 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
5242 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
5243 LLT LeftoverTy;
5244 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
5245 Src1Regs, Src1LeftoverRegs))
5246 return UnableToLegalize;
5247
5248 LLT Unused;
5249 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
5250 Src2Regs, Src2LeftoverRegs))
5251 llvm_unreachable("inconsistent extractParts result")::llvm::llvm_unreachable_internal("inconsistent extractParts result"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 5251)
;
5252
5253 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
5254 auto Select = MIRBuilder.buildSelect(NarrowTy,
5255 CondReg, Src1Regs[I], Src2Regs[I]);
5256 DstRegs.push_back(Select.getReg(0));
5257 }
5258
5259 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
5260 auto Select = MIRBuilder.buildSelect(
5261 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
5262 DstLeftoverRegs.push_back(Select.getReg(0));
5263 }
5264
5265 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
5266 LeftoverTy, DstLeftoverRegs);
5267
5268 MI.eraseFromParent();
5269 return Legalized;
5270}
5271
5272LegalizerHelper::LegalizeResult
5273LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
5274 LLT NarrowTy) {
5275 if (TypeIdx != 1)
5276 return UnableToLegalize;
5277
5278 Register DstReg = MI.getOperand(0).getReg();
5279 Register SrcReg = MI.getOperand(1).getReg();
5280 LLT DstTy = MRI.getType(DstReg);
5281 LLT SrcTy = MRI.getType(SrcReg);
5282 unsigned NarrowSize = NarrowTy.getSizeInBits();
5283
5284 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5285 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
5286
5287 MachineIRBuilder &B = MIRBuilder;
5288 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
5289 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
5290 auto C_0 = B.buildConstant(NarrowTy, 0);
5291 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
5292 UnmergeSrc.getReg(1), C_0);
5293 auto LoCTLZ = IsUndef ?
5294 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
5295 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
5296 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
5297 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
5298 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
5299 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
5300
5301 MI.eraseFromParent();
5302 return Legalized;
5303 }
5304
5305 return UnableToLegalize;
5306}
5307
5308LegalizerHelper::LegalizeResult
5309LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
5310 LLT NarrowTy) {
5311 if (TypeIdx != 1)
5312 return UnableToLegalize;
5313
5314 Register DstReg = MI.getOperand(0).getReg();
5315 Register SrcReg = MI.getOperand(1).getReg();
5316 LLT DstTy = MRI.getType(DstReg);
5317 LLT SrcTy = MRI.getType(SrcReg);
5318 unsigned NarrowSize = NarrowTy.getSizeInBits();
5319
5320 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5321 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
5322
5323 MachineIRBuilder &B = MIRBuilder;
5324 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
5325 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
5326 auto C_0 = B.buildConstant(NarrowTy, 0);
5327 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
5328 UnmergeSrc.getReg(0), C_0);
5329 auto HiCTTZ = IsUndef ?
5330 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
5331 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
5332 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
5333 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
5334 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
5335 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
5336
5337 MI.eraseFromParent();
5338 return Legalized;
5339 }
5340
5341 return UnableToLegalize;
5342}
5343
5344LegalizerHelper::LegalizeResult
5345LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
5346 LLT NarrowTy) {
5347 if (TypeIdx != 1)
5348 return UnableToLegalize;
5349
5350 Register DstReg = MI.getOperand(0).getReg();
5351 LLT DstTy = MRI.getType(DstReg);
5352 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5353 unsigned NarrowSize = NarrowTy.getSizeInBits();
5354
5355 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5356 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
5357
5358 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
5359 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
5360 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
5361
5362 MI.eraseFromParent();
5363 return Legalized;
5364 }
5365
5366 return UnableToLegalize;
5367}
5368
5369LegalizerHelper::LegalizeResult
5370LegalizerHelper::lowerBitCount(MachineInstr &MI) {
5371 unsigned Opc = MI.getOpcode();
5372 const auto &TII = MIRBuilder.getTII();
5373 auto isSupported = [this](const LegalityQuery &Q) {
5374 auto QAction = LI.getAction(Q).Action;
5375 return QAction == Legal || QAction == Libcall || QAction == Custom;
5376 };
5377 switch (Opc) {
5378 default:
5379 return UnableToLegalize;
5380 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
5381 // This trivially expands to CTLZ.
5382 Observer.changingInstr(MI);
5383 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
5384 Observer.changedInstr(MI);
5385 return Legalized;
5386 }
5387 case TargetOpcode::G_CTLZ: {
5388 Register DstReg = MI.getOperand(0).getReg();
5389 Register SrcReg = MI.getOperand(1).getReg();
5390 LLT DstTy = MRI.getType(DstReg);
5391 LLT SrcTy = MRI.getType(SrcReg);
5392 unsigned Len = SrcTy.getSizeInBits();
5393
5394 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
5395 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
5396 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
5397 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
5398 auto ICmp = MIRBuilder.buildICmp(
5399 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
5400 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
5401 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
5402 MI.eraseFromParent();
5403 return Legalized;
5404 }
5405 // for now, we do this:
5406 // NewLen = NextPowerOf2(Len);
5407 // x = x | (x >> 1);
5408 // x = x | (x >> 2);
5409 // ...
5410 // x = x | (x >>16);
5411 // x = x | (x >>32); // for 64-bit input
5412 // Upto NewLen/2
5413 // return Len - popcount(x);
5414 //
5415 // Ref: "Hacker's Delight" by Henry Warren
5416 Register Op = SrcReg;
5417 unsigned NewLen = PowerOf2Ceil(Len);
5418 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
5419 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
5420 auto MIBOp = MIRBuilder.buildOr(
5421 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
5422 Op = MIBOp.getReg(0);
5423 }
5424 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
5425 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
5426 MIBPop);
5427 MI.eraseFromParent();
5428 return Legalized;
5429 }
5430 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
5431 // This trivially expands to CTTZ.
5432 Observer.changingInstr(MI);
5433 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
5434 Observer.changedInstr(MI);
5435 return Legalized;
5436 }
5437 case TargetOpcode::G_CTTZ: {
5438 Register DstReg = MI.getOperand(0).getReg();
5439 Register SrcReg = MI.getOperand(1).getReg();
5440 LLT DstTy = MRI.getType(DstReg);
5441 LLT SrcTy = MRI.getType(SrcReg);
5442
5443 unsigned Len = SrcTy.getSizeInBits();
5444 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
5445 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
5446 // zero.
5447 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
5448 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
5449 auto ICmp = MIRBuilder.buildICmp(
5450 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
5451 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
5452 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
5453 MI.eraseFromParent();
5454 return Legalized;
5455 }
5456 // for now, we use: { return popcount(~x & (x - 1)); }
5457 // unless the target has ctlz but not ctpop, in which case we use:
5458 // { return 32 - nlz(~x & (x-1)); }
5459 // Ref: "Hacker's Delight" by Henry Warren
5460 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
5461 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
5462 auto MIBTmp = MIRBuilder.buildAnd(
5463 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
5464 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
5465 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
5466 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
5467 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
5468 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
5469 MI.eraseFromParent();
5470 return Legalized;
5471 }
5472 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
5473 MI.getOperand(1).setReg(MIBTmp.getReg(0));
5474 return Legalized;
5475 }
5476 case TargetOpcode::G_CTPOP: {
5477 Register SrcReg = MI.getOperand(1).getReg();
5478 LLT Ty = MRI.getType(SrcReg);
5479 unsigned Size = Ty.getSizeInBits();
5480 MachineIRBuilder &B = MIRBuilder;
5481
5482 // Count set bits in blocks of 2 bits. Default approach would be
5483 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
5484 // We use following formula instead:
5485 // B2Count = val - { (val >> 1) & 0x55555555 }
5486 // since it gives same result in blocks of 2 with one instruction less.
5487 auto C_1 = B.buildConstant(Ty, 1);
5488 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
5489 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
5490 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
5491 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
5492 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
5493
5494 // In order to get count in blocks of 4 add values from adjacent block of 2.
5495 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
5496 auto C_2 = B.buildConstant(Ty, 2);
5497 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
5498 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
5499 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
5500 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
5501 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
5502 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
5503
5504 // For count in blocks of 8 bits we don't have to mask high 4 bits before
5505 // addition since count value sits in range {0,...,8} and 4 bits are enough
5506 // to hold such binary values. After addition high 4 bits still hold count
5507 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
5508 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
5509 auto C_4 = B.buildConstant(Ty, 4);
5510 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
5511 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
5512 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
5513 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
5514 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
5515
5516 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm")(static_cast <bool> (Size<=128 && "Scalar size is too large for CTPOP lower algorithm"
) ? void (0) : __assert_fail ("Size<=128 && \"Scalar size is too large for CTPOP lower algorithm\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 5516, __extension__ __PRETTY_FUNCTION__))
;
5517 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
5518 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
5519 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
5520 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
5521
5522 // Shift count result from 8 high bits to low bits.
5523 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
5524 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
5525
5526 MI.eraseFromParent();
5527 return Legalized;
5528 }
5529 }
5530}
5531
5532// Check that (every element of) Reg is undef or not an exact multiple of BW.
5533static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
5534 Register Reg, unsigned BW) {
5535 return matchUnaryPredicate(
5536 MRI, Reg,
5537 [=](const Constant *C) {
5538 // Null constant here means an undef.
5539 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
5540 return !CI || CI->getValue().urem(BW) != 0;
5541 },
5542 /*AllowUndefs*/ true);
5543}
5544
5545LegalizerHelper::LegalizeResult
5546LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
5547 Register Dst = MI.getOperand(0).getReg();
5548 Register X = MI.getOperand(1).getReg();
5549 Register Y = MI.getOperand(2).getReg();
5550 Register Z = MI.getOperand(3).getReg();
5551 LLT Ty = MRI.getType(Dst);
5552 LLT ShTy = MRI.getType(Z);
5553
5554 unsigned BW = Ty.getScalarSizeInBits();
5555
5556 if (!isPowerOf2_32(BW))
5557 return UnableToLegalize;
5558
5559 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5560 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
5561
5562 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
5563 // fshl X, Y, Z -> fshr X, Y, -Z
5564 // fshr X, Y, Z -> fshl X, Y, -Z
5565 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
5566 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
5567 } else {
5568 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
5569 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
5570 auto One = MIRBuilder.buildConstant(ShTy, 1);
5571 if (IsFSHL) {
5572 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
5573 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
5574 } else {
5575 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
5576 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
5577 }
5578
5579 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
5580 }
5581
5582 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
5583 MI.eraseFromParent();
5584 return Legalized;
5585}
5586
5587LegalizerHelper::LegalizeResult
5588LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
5589 Register Dst = MI.getOperand(0).getReg();
5590 Register X = MI.getOperand(1).getReg();
5591 Register Y = MI.getOperand(2).getReg();
5592 Register Z = MI.getOperand(3).getReg();
5593 LLT Ty = MRI.getType(Dst);
5594 LLT ShTy = MRI.getType(Z);
5595
5596 const unsigned BW = Ty.getScalarSizeInBits();
5597 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5598
5599 Register ShX, ShY;
5600 Register ShAmt, InvShAmt;
5601
5602 // FIXME: Emit optimized urem by constant instead of letting it expand later.
5603 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
5604 // fshl: X << C | Y >> (BW - C)
5605 // fshr: X << (BW - C) | Y >> C
5606 // where C = Z % BW is not zero
5607 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
5608 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
5609 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
5610 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
5611 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
5612 } else {
5613 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
5614 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
5615 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
5616 if (isPowerOf2_32(BW)) {
5617 // Z % BW -> Z & (BW - 1)
5618 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
5619 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
5620 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
5621 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
5622 } else {
5623 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
5624 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
5625 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
5626 }
5627
5628 auto One = MIRBuilder.buildConstant(ShTy, 1);
5629 if (IsFSHL) {
5630 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
5631 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
5632 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
5633 } else {
5634 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
5635 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
5636 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
5637 }
5638 }
5639
5640 MIRBuilder.buildOr(Dst, ShX, ShY);
5641 MI.eraseFromParent();
5642 return Legalized;
5643}
5644
5645LegalizerHelper::LegalizeResult
5646LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
5647 // These operations approximately do the following (while avoiding undefined
5648 // shifts by BW):
5649 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5650 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5651 Register Dst = MI.getOperand(0).getReg();
5652 LLT Ty = MRI.getType(Dst);
5653 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
5654
5655 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5656 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
5657
5658 // TODO: Use smarter heuristic that accounts for vector legalization.
5659 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
5660 return lowerFunnelShiftAsShifts(MI);
5661
5662 // This only works for powers of 2, fallback to shifts if it fails.
5663 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
5664 if (Result == UnableToLegalize)
5665 return lowerFunnelShiftAsShifts(MI);
5666 return Result;
5667}
5668
5669LegalizerHelper::LegalizeResult
5670LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
5671 Register Dst = MI.getOperand(0).getReg();
5672 Register Src = MI.getOperand(1).getReg();
5673 Register Amt = MI.getOperand(2).getReg();
5674 LLT AmtTy = MRI.getType(Amt);
5675 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
5676 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
5677 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
5678 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
5679 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
5680 MI.eraseFromParent();
5681 return Legalized;
5682}
5683
5684LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
5685 Register Dst = MI.getOperand(0).getReg();
5686 Register Src = MI.getOperand(1).getReg();
5687 Register Amt = MI.getOperand(2).getReg();
5688 LLT DstTy = MRI.getType(Dst);
5689 LLT SrcTy = MRI.getType(Dst);
5690 LLT AmtTy = MRI.getType(Amt);
5691
5692 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
5693 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
5694
5695 MIRBuilder.setInstrAndDebugLoc(MI);
5696
5697 // If a rotate in the other direction is supported, use it.
5698 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
5699 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
5700 isPowerOf2_32(EltSizeInBits))
5701 return lowerRotateWithReverseRotate(MI);
5702
5703 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
5704 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
5705 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
5706 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
5707 Register ShVal;
5708 Register RevShiftVal;
5709 if (isPowerOf2_32(EltSizeInBits)) {
5710 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
5711 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
5712 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
5713 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
5714 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
5715 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
5716 RevShiftVal =
5717 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
5718 } else {
5719 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
5720 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
5721 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
5722 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
5723 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
5724 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
5725 auto One = MIRBuilder.buildConstant(AmtTy, 1);
5726 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
5727 RevShiftVal =
5728 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
5729 }
5730 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
5731 MI.eraseFromParent();
5732 return Legalized;
5733}
5734
5735// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
5736// representation.
5737LegalizerHelper::LegalizeResult
5738LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
5739 Register Dst = MI.getOperand(0).getReg();
5740 Register Src = MI.getOperand(1).getReg();
5741 const LLT S64 = LLT::scalar(64);
5742 const LLT S32 = LLT::scalar(32);
5743 const LLT S1 = LLT::scalar(1);
5744
5745 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32)(static_cast <bool> (MRI.getType(Src) == S64 &&
MRI.getType(Dst) == S32) ? void (0) : __assert_fail ("MRI.getType(Src) == S64 && MRI.getType(Dst) == S32"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 5745, __extension__ __PRETTY_FUNCTION__))
;
5746
5747 // unsigned cul2f(ulong u) {
5748 // uint lz = clz(u);
5749 // uint e = (u != 0) ? 127U + 63U - lz : 0;
5750 // u = (u << lz) & 0x7fffffffffffffffUL;
5751 // ulong t = u & 0xffffffffffUL;
5752 // uint v = (e << 23) | (uint)(u >> 40);
5753 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
5754 // return as_float(v + r);
5755 // }
5756
5757 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
5758 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
5759
5760 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
5761
5762 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
5763 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
5764
5765 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
5766 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
5767
5768 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
5769 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
5770
5771 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
5772
5773 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
5774 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
5775
5776 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
5777 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
5778 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
5779
5780 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
5781 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
5782 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
5783 auto One = MIRBuilder.buildConstant(S32, 1);
5784
5785 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
5786 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
5787 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
5788 MIRBuilder.buildAdd(Dst, V, R);
5789
5790 MI.eraseFromParent();
5791 return Legalized;
5792}
5793
5794LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
5795 Register Dst = MI.getOperand(0).getReg();
5796 Register Src = MI.getOperand(1).getReg();
5797 LLT DstTy = MRI.getType(Dst);
5798 LLT SrcTy = MRI.getType(Src);
5799
5800 if (SrcTy == LLT::scalar(1)) {
5801 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
5802 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5803 MIRBuilder.buildSelect(Dst, Src, True, False);
5804 MI.eraseFromParent();
5805 return Legalized;
5806 }
5807
5808 if (SrcTy != LLT::scalar(64))
5809 return UnableToLegalize;
5810
5811 if (DstTy == LLT::scalar(32)) {
5812 // TODO: SelectionDAG has several alternative expansions to port which may
5813 // be more reasonble depending on the available instructions. If a target
5814 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
5815 // intermediate type, this is probably worse.
5816 return lowerU64ToF32BitOps(MI);
5817 }
5818
5819 return UnableToLegalize;
5820}
5821
5822LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
5823 Register Dst = MI.getOperand(0).getReg();
5824 Register Src = MI.getOperand(1).getReg();
5825 LLT DstTy = MRI.getType(Dst);
5826 LLT SrcTy = MRI.getType(Src);
5827
5828 const LLT S64 = LLT::scalar(64);
5829 const LLT S32 = LLT::scalar(32);
5830 const LLT S1 = LLT::scalar(1);
5831
5832 if (SrcTy == S1) {
5833 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
5834 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5835 MIRBuilder.buildSelect(Dst, Src, True, False);
5836 MI.eraseFromParent();
5837 return Legalized;
5838 }
5839
5840 if (SrcTy != S64)
5841 return UnableToLegalize;
5842
5843 if (DstTy == S32) {
5844 // signed cl2f(long l) {
5845 // long s = l >> 63;
5846 // float r = cul2f((l + s) ^ s);
5847 // return s ? -r : r;
5848 // }
5849 Register L = Src;
5850 auto SignBit = MIRBuilder.buildConstant(S64, 63);
5851 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
5852
5853 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
5854 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
5855 auto R = MIRBuilder.buildUITOFP(S32, Xor);
5856
5857 auto RNeg = MIRBuilder.buildFNeg(S32, R);
5858 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
5859 MIRBuilder.buildConstant(S64, 0));
5860 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
5861 MI.eraseFromParent();
5862 return Legalized;
5863 }
5864
5865 return UnableToLegalize;
5866}
5867
5868LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
5869 Register Dst = MI.getOperand(0).getReg();
5870 Register Src = MI.getOperand(1).getReg();
5871 LLT DstTy = MRI.getType(Dst);
5872 LLT SrcTy = MRI.getType(Src);
5873 const LLT S64 = LLT::scalar(64);
5874 const LLT S32 = LLT::scalar(32);
5875
5876 if (SrcTy != S64 && SrcTy != S32)
5877 return UnableToLegalize;
5878 if (DstTy != S32 && DstTy != S64)
5879 return UnableToLegalize;
5880
5881 // FPTOSI gives same result as FPTOUI for positive signed integers.
5882 // FPTOUI needs to deal with fp values that convert to unsigned integers
5883 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
5884
5885 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
5886 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
5887 : APFloat::IEEEdouble(),
5888 APInt::getNullValue(SrcTy.getSizeInBits()));
5889 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
5890
5891 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
5892
5893 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
5894 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
5895 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
5896 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
5897 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
5898 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
5899 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
5900
5901 const LLT S1 = LLT::scalar(1);
5902
5903 MachineInstrBuilder FCMP =
5904 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
5905 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
5906
5907 MI.eraseFromParent();
5908 return Legalized;
5909}
5910
5911LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
5912 Register Dst = MI.getOperand(0).getReg();
5913 Register Src = MI.getOperand(1).getReg();
5914 LLT DstTy = MRI.getType(Dst);
5915 LLT SrcTy = MRI.getType(Src);
5916 const LLT S64 = LLT::scalar(64);
5917 const LLT S32 = LLT::scalar(32);
5918
5919 // FIXME: Only f32 to i64 conversions are supported.
5920 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
5921 return UnableToLegalize;
5922
5923 // Expand f32 -> i64 conversion
5924 // This algorithm comes from compiler-rt's implementation of fixsfdi:
5925 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
5926
5927 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
5928
5929 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
5930 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
5931
5932 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
5933 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
5934
5935 auto SignMask = MIRBuilder.buildConstant(SrcTy,
5936 APInt::getSignMask(SrcEltBits));
5937 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
5938 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
5939 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
5940 Sign = MIRBuilder.buildSExt(DstTy, Sign);
5941
5942 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
5943 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
5944 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
5945
5946 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
5947 R = MIRBuilder.buildZExt(DstTy, R);
5948
5949 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
5950 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
5951 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
5952 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
5953
5954 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
5955 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
5956
5957 const LLT S1 = LLT::scalar(1);
5958 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
5959 S1, Exponent, ExponentLoBit);
5960
5961 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
5962
5963 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
5964 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
5965
5966 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
5967
5968 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
5969 S1, Exponent, ZeroSrcTy);
5970
5971 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
5972 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
5973
5974 MI.eraseFromParent();
5975 return Legalized;
5976}
5977
5978// f64 -> f16 conversion using round-to-nearest-even rounding mode.
5979LegalizerHelper::LegalizeResult
5980LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
5981 Register Dst = MI.getOperand(0).getReg();
5982 Register Src = MI.getOperand(1).getReg();
5983
5984 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
5985 return UnableToLegalize;
5986
5987 const unsigned ExpMask = 0x7ff;
5988 const unsigned ExpBiasf64 = 1023;
5989 const unsigned ExpBiasf16 = 15;
5990 const LLT S32 = LLT::scalar(32);
5991 const LLT S1 = LLT::scalar(1);
5992
5993 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
5994 Register U = Unmerge.getReg(0);
5995 Register UH = Unmerge.getReg(1);
5996
5997 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
5998 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
5999
6000 // Subtract the fp64 exponent bias (1023) to get the real exponent and
6001 // add the f16 bias (15) to get the biased exponent for the f16 format.
6002 E = MIRBuilder.buildAdd(
6003 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
6004
6005 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
6006 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
6007
6008 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
6009 MIRBuilder.buildConstant(S32, 0x1ff));
6010 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
6011
6012 auto Zero = MIRBuilder.buildConstant(S32, 0);
6013 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
6014 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
6015 M = MIRBuilder.buildOr(S32, M, Lo40Set);
6016
6017 // (M != 0 ? 0x0200 : 0) | 0x7c00;
6018 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
6019 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
6020 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
6021
6022 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
6023 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
6024
6025 // N = M | (E << 12);
6026 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
6027 auto N = MIRBuilder.buildOr(S32, M, EShl12);
6028
6029 // B = clamp(1-E, 0, 13);
6030 auto One = MIRBuilder.buildConstant(S32, 1);
6031 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
6032 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
6033 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
6034
6035 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
6036 MIRBuilder.buildConstant(S32, 0x1000));
6037
6038 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
6039 auto D0 = MIRBuilder.buildShl(S32, D, B);
6040
6041 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
6042 D0, SigSetHigh);
6043 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
6044 D = MIRBuilder.buildOr(S32, D, D1);
6045
6046 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
6047 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
6048
6049 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
6050 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
6051
6052 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
6053 MIRBuilder.buildConstant(S32, 3));
6054 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
6055
6056 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
6057 MIRBuilder.buildConstant(S32, 5));
6058 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
6059
6060 V1 = MIRBuilder.buildOr(S32, V0, V1);
6061 V = MIRBuilder.buildAdd(S32, V, V1);
6062
6063 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
6064 E, MIRBuilder.buildConstant(S32, 30));
6065 V = MIRBuilder.buildSelect(S32, CmpEGt30,
6066 MIRBuilder.buildConstant(S32, 0x7c00), V);
6067
6068 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
6069 E, MIRBuilder.buildConstant(S32, 1039));
6070 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
6071
6072 // Extract the sign bit.
6073 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
6074 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
6075
6076 // Insert the sign bit
6077 V = MIRBuilder.buildOr(S32, Sign, V);
6078
6079 MIRBuilder.buildTrunc(Dst, V);
6080 MI.eraseFromParent();
6081 return Legalized;
6082}
6083
6084LegalizerHelper::LegalizeResult
6085LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
6086 Register Dst = MI.getOperand(0).getReg();
6087 Register Src = MI.getOperand(1).getReg();
6088
6089 LLT DstTy = MRI.getType(Dst);
6090 LLT SrcTy = MRI.getType(Src);
6091 const LLT S64 = LLT::scalar(64);
6092 const LLT S16 = LLT::scalar(16);
6093
6094 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
6095 return lowerFPTRUNC_F64_TO_F16(MI);
6096
6097 return UnableToLegalize;
6098}
6099
6100// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
6101// multiplication tree.
6102LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
6103 Register Dst = MI.getOperand(0).getReg();
6104 Register Src0 = MI.getOperand(1).getReg();
6105 Register Src1 = MI.getOperand(2).getReg();
6106 LLT Ty = MRI.getType(Dst);
6107
6108 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
6109 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
6110 MI.eraseFromParent();
6111 return Legalized;
6112}
6113
6114static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
6115 switch (Opc) {
6116 case TargetOpcode::G_SMIN:
6117 return CmpInst::ICMP_SLT;
6118 case TargetOpcode::G_SMAX:
6119 return CmpInst::ICMP_SGT;
6120 case TargetOpcode::G_UMIN:
6121 return CmpInst::ICMP_ULT;
6122 case TargetOpcode::G_UMAX:
6123 return CmpInst::ICMP_UGT;
6124 default:
6125 llvm_unreachable("not in integer min/max")::llvm::llvm_unreachable_internal("not in integer min/max", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp"
, 6125)
;
6126 }
6127}
6128
6129LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
6130 Register Dst = MI.getOperand(0).getReg();
6131 Register Src0 = MI.getOperand(1).getReg();
6132 Register Src1 = MI.getOperand(2).getReg();
6133
6134 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
6135 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
6136
6137 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
6138 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
6139
6140 MI.eraseFromParent();
6141 return Legalized;
6142}
6143
6144LegalizerHelper::LegalizeResult
6145LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
6146 Register Dst = MI.getOperand(0).getReg();
6147 Register Src0 = MI.getOperand(1).getReg();
6148 Register Src1 = MI.getOperand(2).getReg();
6149
6150 const LLT Src0Ty = MRI.getType(Src0);
6151 const LLT Src1Ty = MRI.getType(Src1);
6152
6153 const int Src0Size = Src0Ty.getScalarSizeInBits();
6154 const int Src1Size = Src1Ty.getScalarSizeInBits();
6155
6156 auto SignBitMask = MIRBuilder.buildConstant(
6157 Src0Ty, APInt::getSignMask(Src0Size));
6158
6159 auto NotSignBitMask = MIRBuilder.buildConstant(
6160 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
6161
6162 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
6163 Register And1;
6164 if (Src0Ty == Src1Ty) {
6165 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
6166 } else if (Src0Size > Src1Size) {
6167 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
6168 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
6169 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
6170 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
6171 } else {
6172 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
6173 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
6174 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
6175 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
6176 }
6177
6178 // Be careful about setting nsz/nnan/ninf on every instruction, since the
6179 // constants are a nan and -0.0, but the final result should preserve
6180 // everything.
6181 unsigned Flags = MI.getFlags();
6182 MIRBuilder.buildOr(Dst, And0, And1, Flags);
6183
6184 MI.eraseFromParent();
6185 return Legalized;
6186}
6187
6188LegalizerHelper::LegalizeResult
6189LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
6190 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
6191 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
6192
6193 Register Dst = MI.getOperand(0).getReg();
6194 Register Src0 = MI.getOperand(1).getReg();
6195 Register Src1 = MI.getOperand(2).getReg();
6196 LLT Ty = MRI.getType(Dst);
6197
6198 if (!MI.getFlag(MachineInstr::FmNoNans)) {
6199 // Insert canonicalizes if it's possible we need to quiet to get correct
6200 // sNaN behavior.
6201
6202 // Note this must be done here, and not as an optimization combine in the
6203 // absence of a dedicate quiet-snan instruction as we're using an
6204 // omni-purpose G_FCANONICALIZE.
6205 if (!isKnownNeverSNaN(Src0, MRI))
6206 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
6207
6208 if (!isKnownNeverSNaN(Src1, MRI))
6209 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
6210 }
6211
6212 // If there are no nans, it's safe to simply replace this with the non-IEEE
6213 // version.
6214 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
6215 MI.eraseFromParent();
6216 return Legalized;
6217}
6218
6219LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
6220 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
6221 Register DstReg = MI.getOperand(0).getReg();
6222 LLT Ty = MRI.getType(DstReg);
6223 unsigned Flags = MI.getFlags();
6224
6225 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
6226 Flags);
6227 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
6228 MI.eraseFromParent();
6229 return Legalized;
6230}
6231
6232LegalizerHelper::LegalizeResult
6233LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
6234 Register DstReg = MI.getOperand(0).getReg();
6235 Register X = MI.getOperand(1).getReg();
6236 const unsigned Flags = MI.getFlags();
6237 const LLT Ty = MRI.getType(DstReg);
6238 const LLT CondTy = Ty.changeElementSize(1);
6239
6240 // round(x) =>
6241 // t = trunc(x);
6242 // d = fabs(x - t);
6243 // o = copysign(1.0f, x);
6244 // return t + (d >= 0.5 ? o : 0.0);
6245
6246 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
6247
6248 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
6249 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
6250 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
6251 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
6252 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
6253 auto SignOne = MIRBuilder.bu