LLVM 20.0.0git
RISCVLegalizerInfo.cpp
Go to the documentation of this file.
1//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "RISCVLegalizerInfo.h"
16#include "RISCVSubtarget.h"
29#include "llvm/IR/Type.h"
30
31using namespace llvm;
32using namespace LegalityPredicates;
33using namespace LegalizeMutations;
34
36typeIsLegalIntOrFPVec(unsigned TypeIdx,
37 std::initializer_list<LLT> IntOrFPVecTys,
38 const RISCVSubtarget &ST) {
39 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
40 return ST.hasVInstructions() &&
41 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
42 ST.hasVInstructionsI64()) &&
43 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
44 ST.getELen() == 64);
45 };
46
47 return all(typeInSet(TypeIdx, IntOrFPVecTys), P);
48}
49
51typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
52 const RISCVSubtarget &ST) {
53 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
54 return ST.hasVInstructions() &&
55 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
56 ST.getELen() == 64);
57 };
58 return all(typeInSet(TypeIdx, BoolVecTys), P);
59}
60
61static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
62 std::initializer_list<LLT> PtrVecTys,
63 const RISCVSubtarget &ST) {
64 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
65 return ST.hasVInstructions() &&
66 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
67 ST.getELen() == 64) &&
68 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
69 Query.Types[TypeIdx].getScalarSizeInBits() == 32);
70 };
71 return all(typeInSet(TypeIdx, PtrVecTys), P);
72}
73
75 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
76 const LLT sDoubleXLen = LLT::scalar(2 * XLen);
77 const LLT p0 = LLT::pointer(0, XLen);
78 const LLT s1 = LLT::scalar(1);
79 const LLT s8 = LLT::scalar(8);
80 const LLT s16 = LLT::scalar(16);
81 const LLT s32 = LLT::scalar(32);
82 const LLT s64 = LLT::scalar(64);
83 const LLT s128 = LLT::scalar(128);
84
85 const LLT nxv1s1 = LLT::scalable_vector(1, s1);
86 const LLT nxv2s1 = LLT::scalable_vector(2, s1);
87 const LLT nxv4s1 = LLT::scalable_vector(4, s1);
88 const LLT nxv8s1 = LLT::scalable_vector(8, s1);
89 const LLT nxv16s1 = LLT::scalable_vector(16, s1);
90 const LLT nxv32s1 = LLT::scalable_vector(32, s1);
91 const LLT nxv64s1 = LLT::scalable_vector(64, s1);
92
93 const LLT nxv1s8 = LLT::scalable_vector(1, s8);
94 const LLT nxv2s8 = LLT::scalable_vector(2, s8);
95 const LLT nxv4s8 = LLT::scalable_vector(4, s8);
96 const LLT nxv8s8 = LLT::scalable_vector(8, s8);
97 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
98 const LLT nxv32s8 = LLT::scalable_vector(32, s8);
99 const LLT nxv64s8 = LLT::scalable_vector(64, s8);
100
101 const LLT nxv1s16 = LLT::scalable_vector(1, s16);
102 const LLT nxv2s16 = LLT::scalable_vector(2, s16);
103 const LLT nxv4s16 = LLT::scalable_vector(4, s16);
104 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
105 const LLT nxv16s16 = LLT::scalable_vector(16, s16);
106 const LLT nxv32s16 = LLT::scalable_vector(32, s16);
107
108 const LLT nxv1s32 = LLT::scalable_vector(1, s32);
109 const LLT nxv2s32 = LLT::scalable_vector(2, s32);
110 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
111 const LLT nxv8s32 = LLT::scalable_vector(8, s32);
112 const LLT nxv16s32 = LLT::scalable_vector(16, s32);
113
114 const LLT nxv1s64 = LLT::scalable_vector(1, s64);
115 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
116 const LLT nxv4s64 = LLT::scalable_vector(4, s64);
117 const LLT nxv8s64 = LLT::scalable_vector(8, s64);
118
119 const LLT nxv1p0 = LLT::scalable_vector(1, p0);
120 const LLT nxv2p0 = LLT::scalable_vector(2, p0);
121 const LLT nxv4p0 = LLT::scalable_vector(4, p0);
122 const LLT nxv8p0 = LLT::scalable_vector(8, p0);
123 const LLT nxv16p0 = LLT::scalable_vector(16, p0);
124
125 using namespace TargetOpcode;
126
127 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
128
129 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
130 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
131 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
132 nxv1s64, nxv2s64, nxv4s64, nxv8s64};
133
134 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
135
136 getActionDefinitionsBuilder({G_ADD, G_SUB})
137 .legalFor({sXLen})
138 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
139 .customFor(ST.is64Bit(), {s32})
141 .clampScalar(0, sXLen, sXLen);
142
143 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
144 .legalFor({sXLen})
145 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
147 .clampScalar(0, sXLen, sXLen);
148
150 {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
151
152 getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
153
154 // TODO: Use Vector Single-Width Saturating Instructions for vector types.
155 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
156 .lower();
157
158 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
159 .legalFor({{sXLen, sXLen}})
160 .customFor(ST.is64Bit(), {{s32, s32}})
161 .widenScalarToNextPow2(0)
162 .clampScalar(1, sXLen, sXLen)
163 .clampScalar(0, sXLen, sXLen);
164
165 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
166 .legalFor({{s32, s16}})
167 .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}})
168 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
169 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
170 .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))
171 .maxScalar(0, sXLen);
172
173 getActionDefinitionsBuilder(G_SEXT_INREG)
174 .customFor({sXLen})
175 .clampScalar(0, sXLen, sXLen)
176 .lower();
177
178 // Merge/Unmerge
179 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
180 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
181 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
182 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
183 if (XLen == 32 && ST.hasStdExtD()) {
184 MergeUnmergeActions.legalIf(
185 all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
186 }
187 MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
188 .widenScalarToNextPow2(BigTyIdx, XLen)
189 .clampScalar(LitTyIdx, sXLen, sXLen)
190 .clampScalar(BigTyIdx, sXLen, sXLen);
191 }
192
193 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
194
195 getActionDefinitionsBuilder({G_ROTR, G_ROTL})
196 .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}})
197 .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
198 {{s32, s32}})
199 .lower();
200
201 getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
202
203 getActionDefinitionsBuilder(G_BITCAST).legalIf(
205 typeIsLegalBoolVec(0, BoolVecTys, ST)),
207 typeIsLegalBoolVec(1, BoolVecTys, ST))));
208
209 auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
210 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
211 BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
212 else
213 BSWAPActions.maxScalar(0, sXLen).lower();
214
215 auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
216 auto &CountZerosUndefActions =
217 getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
218 if (ST.hasStdExtZbb()) {
219 CountZerosActions.legalFor({{sXLen, sXLen}})
220 .customFor({{s32, s32}})
221 .clampScalar(0, s32, sXLen)
222 .widenScalarToNextPow2(0)
223 .scalarSameSizeAs(1, 0);
224 } else {
225 CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
226 CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
227 }
228 CountZerosUndefActions.lower();
229
230 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
231 if (ST.hasStdExtZbb()) {
232 CTPOPActions.legalFor({{sXLen, sXLen}})
233 .clampScalar(0, sXLen, sXLen)
234 .scalarSameSizeAs(1, 0);
235 } else {
236 CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
237 }
238
239 getActionDefinitionsBuilder(G_CONSTANT)
240 .legalFor({p0})
241 .legalFor(!ST.is64Bit(), {s32})
242 .customFor(ST.is64Bit(), {s64})
243 .widenScalarToNextPow2(0)
244 .clampScalar(0, sXLen, sXLen);
245
246 // TODO: transform illegal vector types into legal vector type
247 getActionDefinitionsBuilder(G_FREEZE)
248 .legalFor({s16, s32, p0})
249 .legalFor(ST.is64Bit(), {s64})
250 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
251 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
252 .widenScalarToNextPow2(0)
253 .clampScalar(0, s16, sXLen);
254
255 // TODO: transform illegal vector types into legal vector type
256 // TODO: Merge with G_FREEZE?
257 getActionDefinitionsBuilder(
258 {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
259 .legalFor({s32, sXLen, p0})
260 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
261 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
262 .widenScalarToNextPow2(0)
263 .clampScalar(0, s32, sXLen);
264
265 getActionDefinitionsBuilder(G_ICMP)
266 .legalFor({{sXLen, sXLen}, {sXLen, p0}})
267 .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
268 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
269 .widenScalarOrEltToNextPow2OrMinSize(1, 8)
270 .clampScalar(1, sXLen, sXLen)
271 .clampScalar(0, sXLen, sXLen);
272
273 getActionDefinitionsBuilder(G_SELECT)
274 .legalFor({{s32, sXLen}, {p0, sXLen}})
275 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
276 typeIsLegalBoolVec(1, BoolVecTys, ST)))
277 .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}})
278 .widenScalarToNextPow2(0)
279 .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
280 .clampScalar(1, sXLen, sXLen);
281
282 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
283 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
284 auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD});
285
286 // Return the alignment needed for scalar memory ops. If unaligned scalar mem
287 // is supported, we only require byte alignment. Otherwise, we need the memory
288 // op to be natively aligned.
289 auto getScalarMemAlign = [&ST](unsigned Size) {
290 return ST.enableUnalignedScalarMem() ? 8 : Size;
291 };
292
293 LoadActions.legalForTypesWithMemDesc(
294 {{s16, p0, s8, getScalarMemAlign(8)},
295 {s32, p0, s8, getScalarMemAlign(8)},
296 {s16, p0, s16, getScalarMemAlign(16)},
297 {s32, p0, s16, getScalarMemAlign(16)},
298 {s32, p0, s32, getScalarMemAlign(32)},
299 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
300 StoreActions.legalForTypesWithMemDesc(
301 {{s16, p0, s8, getScalarMemAlign(8)},
302 {s32, p0, s8, getScalarMemAlign(8)},
303 {s16, p0, s16, getScalarMemAlign(16)},
304 {s32, p0, s16, getScalarMemAlign(16)},
305 {s32, p0, s32, getScalarMemAlign(32)},
306 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
307 ExtLoadActions.legalForTypesWithMemDesc(
308 {{sXLen, p0, s8, getScalarMemAlign(8)},
309 {sXLen, p0, s16, getScalarMemAlign(16)}});
310 if (XLen == 64) {
311 LoadActions.legalForTypesWithMemDesc(
312 {{s64, p0, s8, getScalarMemAlign(8)},
313 {s64, p0, s16, getScalarMemAlign(16)},
314 {s64, p0, s32, getScalarMemAlign(32)},
315 {s64, p0, s64, getScalarMemAlign(64)}});
316 StoreActions.legalForTypesWithMemDesc(
317 {{s64, p0, s8, getScalarMemAlign(8)},
318 {s64, p0, s16, getScalarMemAlign(16)},
319 {s64, p0, s32, getScalarMemAlign(32)},
320 {s64, p0, s64, getScalarMemAlign(64)}});
321 ExtLoadActions.legalForTypesWithMemDesc(
322 {{s64, p0, s32, getScalarMemAlign(32)}});
323 } else if (ST.hasStdExtD()) {
324 LoadActions.legalForTypesWithMemDesc(
325 {{s64, p0, s64, getScalarMemAlign(64)}});
326 StoreActions.legalForTypesWithMemDesc(
327 {{s64, p0, s64, getScalarMemAlign(64)}});
328 }
329
330 // Vector loads/stores.
331 if (ST.hasVInstructions()) {
332 LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
333 {nxv4s8, p0, nxv4s8, 8},
334 {nxv8s8, p0, nxv8s8, 8},
335 {nxv16s8, p0, nxv16s8, 8},
336 {nxv32s8, p0, nxv32s8, 8},
337 {nxv64s8, p0, nxv64s8, 8},
338 {nxv2s16, p0, nxv2s16, 16},
339 {nxv4s16, p0, nxv4s16, 16},
340 {nxv8s16, p0, nxv8s16, 16},
341 {nxv16s16, p0, nxv16s16, 16},
342 {nxv32s16, p0, nxv32s16, 16},
343 {nxv2s32, p0, nxv2s32, 32},
344 {nxv4s32, p0, nxv4s32, 32},
345 {nxv8s32, p0, nxv8s32, 32},
346 {nxv16s32, p0, nxv16s32, 32}});
347 StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
348 {nxv4s8, p0, nxv4s8, 8},
349 {nxv8s8, p0, nxv8s8, 8},
350 {nxv16s8, p0, nxv16s8, 8},
351 {nxv32s8, p0, nxv32s8, 8},
352 {nxv64s8, p0, nxv64s8, 8},
353 {nxv2s16, p0, nxv2s16, 16},
354 {nxv4s16, p0, nxv4s16, 16},
355 {nxv8s16, p0, nxv8s16, 16},
356 {nxv16s16, p0, nxv16s16, 16},
357 {nxv32s16, p0, nxv32s16, 16},
358 {nxv2s32, p0, nxv2s32, 32},
359 {nxv4s32, p0, nxv4s32, 32},
360 {nxv8s32, p0, nxv8s32, 32},
361 {nxv16s32, p0, nxv16s32, 32}});
362
363 if (ST.getELen() == 64) {
364 LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
365 {nxv1s16, p0, nxv1s16, 16},
366 {nxv1s32, p0, nxv1s32, 32}});
367 StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
368 {nxv1s16, p0, nxv1s16, 16},
369 {nxv1s32, p0, nxv1s32, 32}});
370 }
371
372 if (ST.hasVInstructionsI64()) {
373 LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
374 {nxv2s64, p0, nxv2s64, 64},
375 {nxv4s64, p0, nxv4s64, 64},
376 {nxv8s64, p0, nxv8s64, 64}});
377 StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
378 {nxv2s64, p0, nxv2s64, 64},
379 {nxv4s64, p0, nxv4s64, 64},
380 {nxv8s64, p0, nxv8s64, 64}});
381 }
382
383 // we will take the custom lowering logic if we have scalable vector types
384 // with non-standard alignments
385 LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
386 StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
387
388 // Pointers require that XLen sized elements are legal.
389 if (XLen <= ST.getELen()) {
390 LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
391 StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
392 }
393 }
394
395 LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8)
396 .lowerIfMemSizeNotByteSizePow2()
397 .clampScalar(0, s16, sXLen)
398 .lower();
399 StoreActions
400 .clampScalar(0, s16, sXLen)
401 .lowerIfMemSizeNotByteSizePow2()
402 .lower();
403
404 ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower();
405
406 getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
407
408 getActionDefinitionsBuilder(G_PTRTOINT)
409 .legalFor({{sXLen, p0}})
410 .clampScalar(0, sXLen, sXLen);
411
412 getActionDefinitionsBuilder(G_INTTOPTR)
413 .legalFor({{p0, sXLen}})
414 .clampScalar(1, sXLen, sXLen);
415
416 getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
417
418 getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}});
419
420 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
421
422 getActionDefinitionsBuilder(G_PHI)
423 .legalFor({p0, s32, sXLen})
424 .widenScalarToNextPow2(0)
425 .clampScalar(0, s32, sXLen);
426
427 getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
428 .legalFor({p0});
429
430 if (ST.hasStdExtZmmul()) {
431 getActionDefinitionsBuilder(G_MUL)
432 .legalFor({sXLen})
433 .widenScalarToNextPow2(0)
434 .clampScalar(0, sXLen, sXLen);
435
436 // clang-format off
437 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
438 .legalFor({sXLen})
439 .lower();
440 // clang-format on
441
442 getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
443 } else {
444 getActionDefinitionsBuilder(G_MUL)
445 .libcallFor({sXLen, sDoubleXLen})
446 .widenScalarToNextPow2(0)
447 .clampScalar(0, sXLen, sDoubleXLen);
448
449 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
450
451 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
452 .minScalar(0, sXLen)
453 // Widen sXLen to sDoubleXLen so we can use a single libcall to get
454 // the low bits for the mul result and high bits to do the overflow
455 // check.
456 .widenScalarIf(typeIs(0, sXLen),
457 LegalizeMutations::changeTo(0, sDoubleXLen))
458 .lower();
459 }
460
461 if (ST.hasStdExtM()) {
462 getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM})
463 .legalFor({sXLen})
464 .customFor({s32})
465 .libcallFor({sDoubleXLen})
466 .clampScalar(0, s32, sDoubleXLen)
467 .widenScalarToNextPow2(0);
468 getActionDefinitionsBuilder(G_SREM)
469 .legalFor({sXLen})
470 .libcallFor({sDoubleXLen})
471 .clampScalar(0, sXLen, sDoubleXLen)
472 .widenScalarToNextPow2(0);
473 } else {
474 getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
475 .libcallFor({sXLen, sDoubleXLen})
476 .clampScalar(0, sXLen, sDoubleXLen)
477 .widenScalarToNextPow2(0);
478 }
479
480 // TODO: Use libcall for sDoubleXLen.
481 getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower();
482
483 getActionDefinitionsBuilder(G_ABS)
484 .customFor(ST.hasStdExtZbb(), {sXLen})
485 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
486 .lower();
487
488 getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN})
489 .legalFor(ST.hasStdExtZbb(), {sXLen})
490 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
491 .lower();
492
493 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
494
495 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
496
497 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
498
499 getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
500 .lower();
501
502 // FP Operations
503
504 // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
505 getActionDefinitionsBuilder(
506 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
507 .legalFor(ST.hasStdExtF(), {s32})
508 .legalFor(ST.hasStdExtD(), {s64})
509 .legalFor(ST.hasStdExtZfh(), {s16})
510 .libcallFor({s32, s64})
511 .libcallFor(ST.is64Bit(), {s128});
512
513 getActionDefinitionsBuilder({G_FNEG, G_FABS})
514 .legalFor(ST.hasStdExtF(), {s32})
515 .legalFor(ST.hasStdExtD(), {s64})
516 .legalFor(ST.hasStdExtZfh(), {s16})
517 .lowerFor({s32, s64, s128});
518
519 getActionDefinitionsBuilder(G_FREM)
520 .libcallFor({s32, s64})
521 .libcallFor(ST.is64Bit(), {s128})
522 .minScalar(0, s32)
523 .scalarize(0);
524
525 getActionDefinitionsBuilder(G_FCOPYSIGN)
526 .legalFor(ST.hasStdExtF(), {{s32, s32}})
527 .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}})
528 .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}})
529 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}})
530 .lower();
531
532 // FIXME: Use Zfhmin.
533 getActionDefinitionsBuilder(G_FPTRUNC)
534 .legalFor(ST.hasStdExtD(), {{s32, s64}})
535 .legalFor(ST.hasStdExtZfh(), {{s16, s32}})
536 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}})
537 .libcallFor({{s32, s64}})
538 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
539 getActionDefinitionsBuilder(G_FPEXT)
540 .legalFor(ST.hasStdExtD(), {{s64, s32}})
541 .legalFor(ST.hasStdExtZfh(), {{s32, s16}})
542 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}})
543 .libcallFor({{s64, s32}})
544 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});
545
546 getActionDefinitionsBuilder(G_FCMP)
547 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
548 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
549 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
550 .clampScalar(0, sXLen, sXLen)
551 .libcallFor({{sXLen, s32}, {sXLen, s64}})
552 .libcallFor(ST.is64Bit(), {{sXLen, s128}});
553
554 // TODO: Support vector version of G_IS_FPCLASS.
555 getActionDefinitionsBuilder(G_IS_FPCLASS)
556 .customFor(ST.hasStdExtF(), {{s1, s32}})
557 .customFor(ST.hasStdExtD(), {{s1, s64}})
558 .customFor(ST.hasStdExtZfh(), {{s1, s16}})
559 .lowerFor({{s1, s32}, {s1, s64}});
560
561 getActionDefinitionsBuilder(G_FCONSTANT)
562 .legalFor(ST.hasStdExtF(), {s32})
563 .legalFor(ST.hasStdExtD(), {s64})
564 .legalFor(ST.hasStdExtZfh(), {s16})
565 .lowerFor({s32, s64, s128});
566
567 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
568 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
569 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
570 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
571 .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}})
572 .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}})
573 .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}})
574 .widenScalarToNextPow2(0)
575 .minScalar(0, s32)
576 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
577 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32.
578 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}});
579
580 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
581 .legalFor(ST.hasStdExtF(), {{s32, sXLen}})
582 .legalFor(ST.hasStdExtD(), {{s64, sXLen}})
583 .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}})
584 .widenScalarToNextPow2(1)
585 // Promote to XLen if the operation is legal.
586 .widenScalarIf(
587 [=, &ST](const LegalityQuery &Query) {
588 return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
589 (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
590 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
591 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
592 (ST.hasStdExtZfh() &&
593 Query.Types[0].getSizeInBits() == 16));
594 },
596 // Otherwise only promote to s32 since we have si libcalls.
597 .minScalar(1, s32)
598 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
599 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32.
600 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}});
601
602 // FIXME: We can do custom inline expansion like SelectionDAG.
603 getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
604 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
605 G_INTRINSIC_ROUNDEVEN})
606 .legalFor(ST.hasStdExtZfa(), {s32})
607 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
608 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16})
609 .libcallFor({s32, s64})
610 .libcallFor(ST.is64Bit(), {s128});
611
612 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
613 .legalFor(ST.hasStdExtZfa(), {s32})
614 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
615 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16});
616
617 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
618 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
619 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
620 G_FTANH})
621 .libcallFor({s32, s64})
622 .libcallFor(ST.is64Bit(), {s128});
623 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
624 .libcallFor({{s32, s32}, {s64, s32}})
625 .libcallFor(ST.is64Bit(), {s128, s32});
626
627 getActionDefinitionsBuilder(G_VASTART).customFor({p0});
628
629 // va_list must be a pointer, but most sized types are pretty easy to handle
630 // as the destination.
631 getActionDefinitionsBuilder(G_VAARG)
632 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
633 // other than sXLen.
634 .clampScalar(0, sXLen, sXLen)
635 .lowerForCartesianProduct({sXLen, p0}, {p0});
636
637 getActionDefinitionsBuilder(G_VSCALE)
638 .clampScalar(0, sXLen, sXLen)
639 .customFor({sXLen});
640
641 auto &SplatActions =
642 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
643 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
644 typeIs(1, sXLen)))
645 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));
646 // Handle case of s64 element vectors on RV32. If the subtarget does not have
647 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
648 // does have f64, then we don't know whether the type is an f64 or an i64,
649 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
650 // depending on how the instructions it consumes are legalized. They are not
651 // legalized yet since legalization is in reverse postorder, so we cannot
652 // make the decision at this moment.
653 if (XLen == 32) {
654 if (ST.hasVInstructionsF64() && ST.hasStdExtD())
655 SplatActions.legalIf(all(
656 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
657 else if (ST.hasVInstructionsI64())
658 SplatActions.customIf(all(
659 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
660 }
661
662 SplatActions.clampScalar(1, sXLen, sXLen);
663
664 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
665 LLT DstTy = Query.Types[0];
666 LLT SrcTy = Query.Types[1];
667 return DstTy.getElementType() == LLT::scalar(1) &&
668 DstTy.getElementCount().getKnownMinValue() >= 8 &&
669 SrcTy.getElementCount().getKnownMinValue() >= 8;
670 };
671 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
672 // We don't have the ability to slide mask vectors down indexed by their
673 // i1 elements; the smallest we can do is i8. Often we are able to bitcast
674 // to equivalent i8 vectors.
675 .bitcastIf(
676 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
677 typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred),
678 [=](const LegalityQuery &Query) {
679 LLT CastTy = LLT::vector(
680 Query.Types[0].getElementCount().divideCoefficientBy(8), 8);
681 return std::pair(0, CastTy);
682 })
683 .customIf(LegalityPredicates::any(
684 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
685 typeIsLegalBoolVec(1, BoolVecTys, ST)),
686 all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
687 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))));
688
689 getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
690 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
691 typeIsLegalBoolVec(1, BoolVecTys, ST)))
692 .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
693 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
694
695 getLegacyLegalizerInfo().computeTables();
696 verify(*ST.getInstrInfo());
697}
698
700 MachineInstr &MI) const {
701 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
702 switch (IntrinsicID) {
703 default:
704 return false;
705 case Intrinsic::vacopy: {
706 // vacopy arguments must be legal because of the intrinsic signature.
707 // No need to check here.
708
709 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
710 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
711 MachineFunction &MF = *MI.getMF();
712 const DataLayout &DL = MIRBuilder.getDataLayout();
713 LLVMContext &Ctx = MF.getFunction().getContext();
714
715 Register DstLst = MI.getOperand(1).getReg();
716 LLT PtrTy = MRI.getType(DstLst);
717
718 // Load the source va_list
719 Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
721 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
722 auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
723
724 // Store the result in the destination va_list
727 MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);
728
729 MI.eraseFromParent();
730 return true;
731 }
732 }
733}
734
735bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
736 MachineIRBuilder &MIRBuilder) const {
737 // Stores the address of the VarArgsFrameIndex slot into the memory location
738 assert(MI.getOpcode() == TargetOpcode::G_VASTART);
739 MachineFunction *MF = MI.getParent()->getParent();
741 int FI = FuncInfo->getVarArgsFrameIndex();
742 LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
743 auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
744 assert(MI.hasOneMemOperand());
745 MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
746 *MI.memoperands()[0]);
747 MI.eraseFromParent();
748 return true;
749}
750
751bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
752 MachineIRBuilder &MIRBuilder) const {
753 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
754 auto &MF = *MI.getParent()->getParent();
755 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
756 unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());
757
758 Register PtrReg = MI.getOperand(0).getReg();
759 LLT PtrTy = MRI.getType(PtrReg);
760 Register IndexReg = MI.getOperand(2).getReg();
761 LLT IndexTy = MRI.getType(IndexReg);
762
763 if (!isPowerOf2_32(EntrySize))
764 return false;
765
766 auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize));
767 IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0);
768
769 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg);
770
773 EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout())));
774
775 Register TargetReg;
776 switch (MJTI->getEntryKind()) {
777 default:
778 return false;
780 // For PIC, the sequence is:
781 // BRIND(load(Jumptable + index) + RelocBase)
782 // RelocBase can be JumpTable, GOT or some sort of global base.
783 unsigned LoadOpc =
784 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
785 auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO);
786 TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0);
787 break;
788 }
790 auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy,
791 Addr, *MMO);
792 TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0);
793 break;
794 }
796 TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0);
797 break;
798 }
799
800 MIRBuilder.buildBrIndirect(TargetReg);
801
802 MI.eraseFromParent();
803 return true;
804}
805
806bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
807 bool ShouldOptForSize) const {
808 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
809 int64_t Imm = APImm.getSExtValue();
810 // All simm32 constants should be handled by isel.
811 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
812 // this check redundant, but small immediates are common so this check
813 // should have better compile time.
814 if (isInt<32>(Imm))
815 return false;
816
817 // We only need to cost the immediate, if constant pool lowering is enabled.
819 return false;
820
822 if (Seq.size() <= STI.getMaxBuildIntsCost())
823 return false;
824
825 // Optimizations below are disabled for opt size. If we're optimizing for
826 // size, use a constant pool.
827 if (ShouldOptForSize)
828 return true;
829 //
830 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
831 // that if it will avoid a constant pool.
832 // It will require an extra temporary register though.
833 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
834 // low and high 32 bits are the same and bit 31 and 63 are set.
835 unsigned ShiftAmt, AddOpc;
837 RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);
838 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
839}
840
841bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
842 MachineIRBuilder &MIB) const {
843 const LLT XLenTy(STI.getXLenVT());
844 Register Dst = MI.getOperand(0).getReg();
845
846 // We define our scalable vector types for lmul=1 to use a 64 bit known
847 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
848 // vscale as VLENB / 8.
849 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
851 // Support for VLEN==32 is incomplete.
852 return false;
853
854 // We assume VLENB is a multiple of 8. We manually choose the best shift
855 // here because SimplifyDemandedBits isn't always able to simplify it.
856 uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
857 if (isPowerOf2_64(Val)) {
858 uint64_t Log2 = Log2_64(Val);
859 if (Log2 < 3) {
860 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
861 MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
862 } else if (Log2 > 3) {
863 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
864 MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
865 } else {
866 MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
867 }
868 } else if ((Val % 8) == 0) {
869 // If the multiplier is a multiple of 8, scale it down to avoid needing
870 // to shift the VLENB value.
871 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
872 MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
873 } else {
874 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
875 auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
876 MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
877 }
878 MI.eraseFromParent();
879 return true;
880}
881
882// Custom-lower extensions from mask vectors by using a vselect either with 1
883// for zero/any-extension or -1 for sign-extension:
884// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
885// Note that any-extension is lowered identically to zero-extension.
886bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
887 MachineIRBuilder &MIB) const {
888
889 unsigned Opc = MI.getOpcode();
890 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
891 Opc == TargetOpcode::G_ANYEXT);
892
894 Register Dst = MI.getOperand(0).getReg();
895 Register Src = MI.getOperand(1).getReg();
896
897 LLT DstTy = MRI.getType(Dst);
898 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
899 LLT DstEltTy = DstTy.getElementType();
900 auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));
901 auto SplatTrue =
902 MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));
903 MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);
904
905 MI.eraseFromParent();
906 return true;
907}
908
909bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
910 LegalizerHelper &Helper,
911 MachineIRBuilder &MIB) const {
912 assert((isa<GLoad>(MI) || isa<GStore>(MI)) &&
913 "Machine instructions must be Load/Store.");
915 MachineFunction *MF = MI.getMF();
916 const DataLayout &DL = MIB.getDataLayout();
917 LLVMContext &Ctx = MF->getFunction().getContext();
918
919 Register DstReg = MI.getOperand(0).getReg();
920 LLT DataTy = MRI.getType(DstReg);
921 if (!DataTy.isVector())
922 return false;
923
924 if (!MI.hasOneMemOperand())
925 return false;
926
927 MachineMemOperand *MMO = *MI.memoperands_begin();
928
929 const auto *TLI = STI.getTargetLowering();
930 EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx));
931
932 if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO))
933 return true;
934
935 unsigned EltSizeBits = DataTy.getScalarSizeInBits();
936 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
937 "Unexpected unaligned RVV load type");
938
939 // Calculate the new vector type with i8 elements
940 unsigned NumElements =
941 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
942 LLT NewDataTy = LLT::scalable_vector(NumElements, 8);
943
944 Helper.bitcast(MI, 0, NewDataTy);
945
946 return true;
947}
948
949/// Return the type of the mask type suitable for masking the provided
950/// vector type. This is simply an i1 element type vector of the same
951/// (possibly scalable) length.
952static LLT getMaskTypeFor(LLT VecTy) {
953 assert(VecTy.isVector());
954 ElementCount EC = VecTy.getElementCount();
955 return LLT::vector(EC, LLT::scalar(1));
956}
957
958/// Creates an all ones mask suitable for masking a vector of type VecTy with
959/// vector length VL.
961 MachineIRBuilder &MIB,
963 LLT MaskTy = getMaskTypeFor(VecTy);
964 return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
965}
966
967/// Gets the two common "VL" operands: an all-ones mask and the vector length.
968/// VecTy is a scalable vector type.
969static std::pair<MachineInstrBuilder, MachineInstrBuilder>
971 assert(VecTy.isScalableVector() && "Expecting scalable container type");
972 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
973 LLT XLenTy(STI.getXLenVT());
974 auto VL = MIB.buildConstant(XLenTy, -1);
975 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
976 return {Mask, VL};
977}
978
980buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
981 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
983 // TODO: If the Hi bits of the splat are undefined, then it's fine to just
984 // splat Lo even if it might be sign extended. I don't think we have
985 // introduced a case where we're build a s64 where the upper bits are undef
986 // yet.
987
988 // Fall back to a stack store and stride x0 vector load.
989 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
990 // preprocessDAG in SDAG.
991 return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},
992 {Passthru, Lo, Hi, VL});
993}
994
996buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
997 const SrcOp &Scalar, const SrcOp &VL,
999 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
1000 auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);
1001 return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),
1002 Unmerge.getReg(1), VL, MIB, MRI);
1003}
1004
1005// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1006// legal equivalently-sized i8 type, so we can use that as a go-between.
1007// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1008// VMCLR_VL.
1009bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1010 MachineIRBuilder &MIB) const {
1011 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1012
1013 MachineRegisterInfo &MRI = *MIB.getMRI();
1014
1015 Register Dst = MI.getOperand(0).getReg();
1016 Register SplatVal = MI.getOperand(1).getReg();
1017
1018 LLT VecTy = MRI.getType(Dst);
1019 LLT XLenTy(STI.getXLenVT());
1020
1021 // Handle case of s64 element vectors on rv32
1022 if (XLenTy.getSizeInBits() == 32 &&
1023 VecTy.getElementType().getSizeInBits() == 64) {
1024 auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI);
1025 buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
1026 MRI);
1027 MI.eraseFromParent();
1028 return true;
1029 }
1030
1031 // All-zeros or all-ones splats are handled specially.
1032 MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);
1033 if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {
1034 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1035 MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});
1036 MI.eraseFromParent();
1037 return true;
1038 }
1039 if (isNullOrNullSplat(SplatValMI, MRI)) {
1040 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1041 MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});
1042 MI.eraseFromParent();
1043 return true;
1044 }
1045
1046 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1047 // ones) by promoting it to an s8 splat.
1048 LLT InterEltTy = LLT::scalar(8);
1049 LLT InterTy = VecTy.changeElementType(InterEltTy);
1050 auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);
1051 auto And =
1052 MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));
1053 auto LHS = MIB.buildSplatVector(InterTy, And);
1054 auto ZeroSplat =
1055 MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));
1056 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);
1057 MI.eraseFromParent();
1058 return true;
1059}
1060
1061static LLT getLMUL1Ty(LLT VecTy) {
1062 assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1063 "Unexpected vector LLT");
1065 VecTy.getElementType().getSizeInBits(),
1066 VecTy.getElementType());
1067}
1068
1069bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1070 MachineIRBuilder &MIB) const {
1071 GExtractSubvector &ES = cast<GExtractSubvector>(MI);
1072
1073 MachineRegisterInfo &MRI = *MIB.getMRI();
1074
1075 Register Dst = ES.getReg(0);
1076 Register Src = ES.getSrcVec();
1077 uint64_t Idx = ES.getIndexImm();
1078
1079 // With an index of 0 this is a cast-like subvector, which can be performed
1080 // with subregister operations.
1081 if (Idx == 0)
1082 return true;
1083
1084 LLT LitTy = MRI.getType(Dst);
1085 LLT BigTy = MRI.getType(Src);
1086
1087 if (LitTy.getElementType() == LLT::scalar(1)) {
1088 // We can't slide this mask vector up indexed by its i1 elements.
1089 // This poses a problem when we wish to insert a scalable vector which
1090 // can't be re-expressed as a larger type. Just choose the slow path and
1091 // extend to a larger type, then truncate back down.
1092 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1093 LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
1094 auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
1095 auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
1096 auto SplatZero = MIB.buildSplatVector(
1097 ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
1098 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
1099 MI.eraseFromParent();
1100 return true;
1101 }
1102
1103 // extract_subvector scales the index by vscale if the subvector is scalable,
1104 // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1105 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1106 MVT LitTyMVT = getMVTForLLT(LitTy);
1107 auto Decompose =
1109 getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
1110 unsigned RemIdx = Decompose.second;
1111
1112 // If the Idx has been completely eliminated then this is a subvector extract
1113 // which naturally aligns to a vector register. These can easily be handled
1114 // using subregister manipulation.
1115 if (RemIdx == 0)
1116 return true;
1117
1118 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1119 // was > M1 then the index would need to be a multiple of VLMAX, and so would
1120 // divide exactly.
1121 assert(
1124
1125 // If the vector type is an LMUL-group type, extract a subvector equal to the
1126 // nearest full vector register type.
1127 LLT InterLitTy = BigTy;
1128 Register Vec = Src;
1130 getLMUL1Ty(BigTy).getSizeInBits())) {
1131 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1132 // we should have successfully decomposed the extract into a subregister.
1133 assert(Decompose.first != RISCV::NoSubRegister);
1134 InterLitTy = getLMUL1Ty(BigTy);
1135 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1136 // specified on the source Register (the equivalent) since generic virtual
1137 // register does not allow subregister index.
1138 Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0);
1139 }
1140
1141 // Slide this vector register down by the desired number of elements in order
1142 // to place the desired subvector starting at element 0.
1143 const LLT XLenTy(STI.getXLenVT());
1144 auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
1145 auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
1147 auto Slidedown = MIB.buildInstr(
1148 RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
1149 {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1150
1151 // Now the vector is in the right position, extract our final subvector. This
1152 // should resolve to a COPY.
1153 MIB.buildExtractSubvector(Dst, Slidedown, 0);
1154
1155 MI.eraseFromParent();
1156 return true;
1157}
1158
1159bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1160 LegalizerHelper &Helper,
1161 MachineIRBuilder &MIB) const {
1162 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
1163
1164 MachineRegisterInfo &MRI = *MIB.getMRI();
1165
1166 Register Dst = IS.getReg(0);
1167 Register BigVec = IS.getBigVec();
1168 Register LitVec = IS.getSubVec();
1169 uint64_t Idx = IS.getIndexImm();
1170
1171 LLT BigTy = MRI.getType(BigVec);
1172 LLT LitTy = MRI.getType(LitVec);
1173
1174 if (Idx == 0 ||
1175 MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1176 return true;
1177
1178 // We don't have the ability to slide mask vectors up indexed by their i1
1179 // elements; the smallest we can do is i8. Often we are able to bitcast to
1180 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1181 // vectors and truncate down after the insert.
1182 if (LitTy.getElementType() == LLT::scalar(1)) {
1183 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1184 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1185 if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1186 return Helper.bitcast(
1187 IS, 0,
1189
1190 // We can't slide this mask vector up indexed by its i1 elements.
1191 // This poses a problem when we wish to insert a scalable vector which
1192 // can't be re-expressed as a larger type. Just choose the slow path and
1193 // extend to a larger type, then truncate back down.
1194 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1195 return Helper.widenScalar(IS, 0, ExtBigTy);
1196 }
1197
1198 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1199 unsigned SubRegIdx, RemIdx;
1200 std::tie(SubRegIdx, RemIdx) =
1202 getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);
1203
1206 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1207 bool ExactlyVecRegSized =
1208 STI.expandVScale(LitTy.getSizeInBits())
1209 .isKnownMultipleOf(STI.expandVScale(VecRegSize));
1210
1211 // If the Idx has been completely eliminated and this subvector's size is a
1212 // vector register or a multiple thereof, or the surrounding elements are
1213 // undef, then this is a subvector insert which naturally aligns to a vector
1214 // register. These can easily be handled using subregister manipulation.
1215 if (RemIdx == 0 && ExactlyVecRegSized)
1216 return true;
1217
1218 // If the subvector is smaller than a vector register, then the insertion
1219 // must preserve the undisturbed elements of the register. We do this by
1220 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1221 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1222 // subvector within the vector register, and an INSERT_SUBVECTOR of that
1223 // LMUL=1 type back into the larger vector (resolving to another subregister
1224 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1225 // to avoid allocating a large register group to hold our subvector.
1226
1227 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1228 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1229 // (in our case undisturbed). This means we can set up a subvector insertion
1230 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1231 // size of the subvector.
1232 const LLT XLenTy(STI.getXLenVT());
1233 LLT InterLitTy = BigTy;
1234 Register AlignedExtract = BigVec;
1235 unsigned AlignedIdx = Idx - RemIdx;
1237 getLMUL1Ty(BigTy).getSizeInBits())) {
1238 InterLitTy = getLMUL1Ty(BigTy);
1239 // Extract a subvector equal to the nearest full vector register type. This
1240 // should resolve to a G_EXTRACT on a subreg.
1241 AlignedExtract =
1242 MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
1243 }
1244
1245 auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
1246 LitVec, 0);
1247
1248 auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
1249 auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
1250
1251 // If we're inserting into the lowest elements, use a tail undisturbed
1252 // vmv.v.v.
1254 bool NeedInsertSubvec =
1255 TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits());
1256 Register InsertedDst =
1257 NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst;
1258 if (RemIdx == 0) {
1259 Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst},
1260 {AlignedExtract, Insert, VL});
1261 } else {
1262 auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
1263 // Construct the vector length corresponding to RemIdx + length(LitTy).
1264 VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
1265 // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1266 ElementCount EndIndex =
1269 if (STI.expandVScale(EndIndex) ==
1270 STI.expandVScale(InterLitTy.getElementCount()))
1271 Policy = RISCVII::TAIL_AGNOSTIC;
1272
1273 Inserted =
1274 MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst},
1275 {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1276 }
1277
1278 // If required, insert this subvector back into the correct vector register.
1279 // This should resolve to an INSERT_SUBREG instruction.
1280 if (NeedInsertSubvec)
1281 MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx);
1282
1283 MI.eraseFromParent();
1284 return true;
1285}
1286
1287static unsigned getRISCVWOpcode(unsigned Opcode) {
1288 switch (Opcode) {
1289 default:
1290 llvm_unreachable("Unexpected opcode");
1291 case TargetOpcode::G_ASHR:
1292 return RISCV::G_SRAW;
1293 case TargetOpcode::G_LSHR:
1294 return RISCV::G_SRLW;
1295 case TargetOpcode::G_SHL:
1296 return RISCV::G_SLLW;
1297 case TargetOpcode::G_SDIV:
1298 return RISCV::G_DIVW;
1299 case TargetOpcode::G_UDIV:
1300 return RISCV::G_DIVUW;
1301 case TargetOpcode::G_UREM:
1302 return RISCV::G_REMUW;
1303 case TargetOpcode::G_ROTL:
1304 return RISCV::G_ROLW;
1305 case TargetOpcode::G_ROTR:
1306 return RISCV::G_RORW;
1307 case TargetOpcode::G_CTLZ:
1308 return RISCV::G_CLZW;
1309 case TargetOpcode::G_CTTZ:
1310 return RISCV::G_CTZW;
1311 case TargetOpcode::G_FPTOSI:
1312 return RISCV::G_FCVT_W_RV64;
1313 case TargetOpcode::G_FPTOUI:
1314 return RISCV::G_FCVT_WU_RV64;
1315 }
1316}
1317
1320 LostDebugLocObserver &LocObserver) const {
1321 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1322 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1323 MachineFunction &MF = *MI.getParent()->getParent();
1324 switch (MI.getOpcode()) {
1325 default:
1326 // No idea what to do.
1327 return false;
1328 case TargetOpcode::G_ABS:
1329 return Helper.lowerAbsToMaxNeg(MI);
1330 // TODO: G_FCONSTANT
1331 case TargetOpcode::G_CONSTANT: {
1332 const Function &F = MF.getFunction();
1333 // TODO: if PSI and BFI are present, add " ||
1334 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1335 bool ShouldOptForSize = F.hasOptSize() || F.hasMinSize();
1336 const ConstantInt *ConstVal = MI.getOperand(1).getCImm();
1337 if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))
1338 return true;
1339 return Helper.lowerConstant(MI);
1340 }
1341 case TargetOpcode::G_SUB:
1342 case TargetOpcode::G_ADD: {
1343 Helper.Observer.changingInstr(MI);
1344 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1345 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1346
1347 Register DstALU = MRI.createGenericVirtualRegister(sXLen);
1348
1349 MachineOperand &MO = MI.getOperand(0);
1350 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1351 auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32);
1352
1353 MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext});
1354 MO.setReg(DstALU);
1355
1356 Helper.Observer.changedInstr(MI);
1357 return true;
1358 }
1359 case TargetOpcode::G_SEXT_INREG: {
1360 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1361 int64_t SizeInBits = MI.getOperand(2).getImm();
1362 // Source size of 32 is sext.w.
1363 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1364 return true;
1365
1366 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1367 return true;
1368
1369 return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
1371 }
1372 case TargetOpcode::G_ASHR:
1373 case TargetOpcode::G_LSHR:
1374 case TargetOpcode::G_SHL: {
1375 if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
1376 // We don't need a custom node for shift by constant. Just widen the
1377 // source and the shift amount.
1378 unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1379 if (MI.getOpcode() == TargetOpcode::G_ASHR)
1380 ExtOpc = TargetOpcode::G_SEXT;
1381 else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1382 ExtOpc = TargetOpcode::G_ZEXT;
1383
1384 Helper.Observer.changingInstr(MI);
1385 Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc);
1386 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT);
1387 Helper.widenScalarDst(MI, sXLen);
1388 Helper.Observer.changedInstr(MI);
1389 return true;
1390 }
1391
1392 Helper.Observer.changingInstr(MI);
1393 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1394 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1395 Helper.widenScalarDst(MI, sXLen);
1396 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1397 Helper.Observer.changedInstr(MI);
1398 return true;
1399 }
1400 case TargetOpcode::G_SDIV:
1401 case TargetOpcode::G_UDIV:
1402 case TargetOpcode::G_UREM:
1403 case TargetOpcode::G_ROTL:
1404 case TargetOpcode::G_ROTR: {
1405 Helper.Observer.changingInstr(MI);
1406 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1407 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1408 Helper.widenScalarDst(MI, sXLen);
1409 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1410 Helper.Observer.changedInstr(MI);
1411 return true;
1412 }
1413 case TargetOpcode::G_CTLZ:
1414 case TargetOpcode::G_CTTZ: {
1415 Helper.Observer.changingInstr(MI);
1416 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1417 Helper.widenScalarDst(MI, sXLen);
1418 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1419 Helper.Observer.changedInstr(MI);
1420 return true;
1421 }
1422 case TargetOpcode::G_FPTOSI:
1423 case TargetOpcode::G_FPTOUI: {
1424 Helper.Observer.changingInstr(MI);
1425 Helper.widenScalarDst(MI, sXLen);
1426 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1428 Helper.Observer.changedInstr(MI);
1429 return true;
1430 }
1431 case TargetOpcode::G_IS_FPCLASS: {
1432 Register GISFPCLASS = MI.getOperand(0).getReg();
1433 Register Src = MI.getOperand(1).getReg();
1434 const MachineOperand &ImmOp = MI.getOperand(2);
1435 MachineIRBuilder MIB(MI);
1436
1437 // Turn LLVM IR's floating point classes to that in RISC-V,
1438 // by simply rotating the 10-bit immediate right by two bits.
1439 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1440 auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
1441 auto ConstZero = MIB.buildConstant(sXLen, 0);
1442
1443 auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
1444 auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
1445 MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
1446
1447 MI.eraseFromParent();
1448 return true;
1449 }
1450 case TargetOpcode::G_BRJT:
1451 return legalizeBRJT(MI, MIRBuilder);
1452 case TargetOpcode::G_VASTART:
1453 return legalizeVAStart(MI, MIRBuilder);
1454 case TargetOpcode::G_VSCALE:
1455 return legalizeVScale(MI, MIRBuilder);
1456 case TargetOpcode::G_ZEXT:
1457 case TargetOpcode::G_SEXT:
1458 case TargetOpcode::G_ANYEXT:
1459 return legalizeExt(MI, MIRBuilder);
1460 case TargetOpcode::G_SPLAT_VECTOR:
1461 return legalizeSplatVector(MI, MIRBuilder);
1462 case TargetOpcode::G_EXTRACT_SUBVECTOR:
1463 return legalizeExtractSubvector(MI, MIRBuilder);
1464 case TargetOpcode::G_INSERT_SUBVECTOR:
1465 return legalizeInsertSubvector(MI, Helper, MIRBuilder);
1466 case TargetOpcode::G_LOAD:
1467 case TargetOpcode::G_STORE:
1468 return legalizeLoadStore(MI, Helper, MIRBuilder);
1469 }
1470
1471 llvm_unreachable("expected switch to return");
1472}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define P(N)
ppc ctr loops verify
static LLT getLMUL1Ty(LLT VecTy)
static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static std::pair< MachineInstrBuilder, MachineInstrBuilder > buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Gets the two common "VL" operands: an all-ones mask and the vector length.
static LegalityPredicate typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list< LLT > BoolVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, const SrcOp &Scalar, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LegalityPredicate typeIsLegalIntOrFPVec(unsigned TypeIdx, std::initializer_list< LLT > IntOrFPVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, Register Hi, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx, std::initializer_list< LLT > PtrVecTys, const RISCVSubtarget &ST)
static unsigned getRISCVWOpcode(unsigned Opcode)
This file declares the targeting of the Machinelegalizer class for RISC-V.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition: APInt.cpp:1128
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Represents an extract subvector.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:211
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildBrIndirect(Register Tgt)
Build and insert G_BRINDIRECT Tgt.
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getEntrySize(const DataLayout &TD) const
getEntrySize - Return the size of each entry in the jump table.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
unsigned getEntryAlignment(const DataLayout &TD) const
getEntryAlignment - Return the alignment of each entry in the jump table.
JTEntryKind getEntryKind() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
RISCVLegalizerInfo(const RISCVSubtarget &ST)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getMaxBuildIntsCost() const
bool useConstantPoolForLargeInts() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Register getReg() const
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:225
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static constexpr unsigned RVVBitsPerBlock
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1987
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1565
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
@ And
Bitwise or logical AND of integers.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.