LLVM 20.0.0git
RISCVLegalizerInfo.cpp
Go to the documentation of this file.
1//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "RISCVLegalizerInfo.h"
16#include "RISCVSubtarget.h"
28#include "llvm/IR/Type.h"
29
30using namespace llvm;
31using namespace LegalityPredicates;
32using namespace LegalizeMutations;
33
35typeIsLegalIntOrFPVec(unsigned TypeIdx,
36 std::initializer_list<LLT> IntOrFPVecTys,
37 const RISCVSubtarget &ST) {
38 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
39 return ST.hasVInstructions() &&
40 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
41 ST.hasVInstructionsI64()) &&
42 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
43 ST.getELen() == 64);
44 };
45
46 return all(typeInSet(TypeIdx, IntOrFPVecTys), P);
47}
48
50typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
51 const RISCVSubtarget &ST) {
52 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
53 return ST.hasVInstructions() &&
54 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
55 ST.getELen() == 64);
56 };
57 return all(typeInSet(TypeIdx, BoolVecTys), P);
58}
59
60static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
61 std::initializer_list<LLT> PtrVecTys,
62 const RISCVSubtarget &ST) {
63 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
64 return ST.hasVInstructions() &&
65 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
66 ST.getELen() == 64) &&
67 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
68 Query.Types[TypeIdx].getScalarSizeInBits() == 32);
69 };
70 return all(typeInSet(TypeIdx, PtrVecTys), P);
71}
72
74 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
75 const LLT sDoubleXLen = LLT::scalar(2 * XLen);
76 const LLT p0 = LLT::pointer(0, XLen);
77 const LLT s1 = LLT::scalar(1);
78 const LLT s8 = LLT::scalar(8);
79 const LLT s16 = LLT::scalar(16);
80 const LLT s32 = LLT::scalar(32);
81 const LLT s64 = LLT::scalar(64);
82 const LLT s128 = LLT::scalar(128);
83
84 const LLT nxv1s1 = LLT::scalable_vector(1, s1);
85 const LLT nxv2s1 = LLT::scalable_vector(2, s1);
86 const LLT nxv4s1 = LLT::scalable_vector(4, s1);
87 const LLT nxv8s1 = LLT::scalable_vector(8, s1);
88 const LLT nxv16s1 = LLT::scalable_vector(16, s1);
89 const LLT nxv32s1 = LLT::scalable_vector(32, s1);
90 const LLT nxv64s1 = LLT::scalable_vector(64, s1);
91
92 const LLT nxv1s8 = LLT::scalable_vector(1, s8);
93 const LLT nxv2s8 = LLT::scalable_vector(2, s8);
94 const LLT nxv4s8 = LLT::scalable_vector(4, s8);
95 const LLT nxv8s8 = LLT::scalable_vector(8, s8);
96 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
97 const LLT nxv32s8 = LLT::scalable_vector(32, s8);
98 const LLT nxv64s8 = LLT::scalable_vector(64, s8);
99
100 const LLT nxv1s16 = LLT::scalable_vector(1, s16);
101 const LLT nxv2s16 = LLT::scalable_vector(2, s16);
102 const LLT nxv4s16 = LLT::scalable_vector(4, s16);
103 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
104 const LLT nxv16s16 = LLT::scalable_vector(16, s16);
105 const LLT nxv32s16 = LLT::scalable_vector(32, s16);
106
107 const LLT nxv1s32 = LLT::scalable_vector(1, s32);
108 const LLT nxv2s32 = LLT::scalable_vector(2, s32);
109 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
110 const LLT nxv8s32 = LLT::scalable_vector(8, s32);
111 const LLT nxv16s32 = LLT::scalable_vector(16, s32);
112
113 const LLT nxv1s64 = LLT::scalable_vector(1, s64);
114 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
115 const LLT nxv4s64 = LLT::scalable_vector(4, s64);
116 const LLT nxv8s64 = LLT::scalable_vector(8, s64);
117
118 const LLT nxv1p0 = LLT::scalable_vector(1, p0);
119 const LLT nxv2p0 = LLT::scalable_vector(2, p0);
120 const LLT nxv4p0 = LLT::scalable_vector(4, p0);
121 const LLT nxv8p0 = LLT::scalable_vector(8, p0);
122 const LLT nxv16p0 = LLT::scalable_vector(16, p0);
123
124 using namespace TargetOpcode;
125
126 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
127
128 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
129 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
130 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
131 nxv1s64, nxv2s64, nxv4s64, nxv8s64};
132
133 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
134
135 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
136 .legalFor({sXLen})
137 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
139 .clampScalar(0, sXLen, sXLen);
140
142 {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
143
144 getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
145
146 // TODO: Use Vector Single-Width Saturating Instructions for vector types.
147 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
148 .lower();
149
150 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
151 .legalFor({{sXLen, sXLen}})
152 .customFor(ST.is64Bit(), {{s32, s32}})
153 .widenScalarToNextPow2(0)
154 .clampScalar(1, sXLen, sXLen)
155 .clampScalar(0, sXLen, sXLen);
156
157 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
158 .legalFor({{s32, s16}})
159 .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}})
160 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
161 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
162 .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))
163 .maxScalar(0, sXLen);
164
165 getActionDefinitionsBuilder(G_SEXT_INREG)
166 .customFor({sXLen})
167 .clampScalar(0, sXLen, sXLen)
168 .lower();
169
170 // Merge/Unmerge
171 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
172 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
173 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
174 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
175 if (XLen == 32 && ST.hasStdExtD()) {
176 MergeUnmergeActions.legalIf(
177 all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
178 }
179 MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
180 .widenScalarToNextPow2(BigTyIdx, XLen)
181 .clampScalar(LitTyIdx, sXLen, sXLen)
182 .clampScalar(BigTyIdx, sXLen, sXLen);
183 }
184
185 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
186
187 getActionDefinitionsBuilder({G_ROTR, G_ROTL})
188 .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}})
189 .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
190 {{s32, s32}})
191 .lower();
192
193 getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
194
195 getActionDefinitionsBuilder(G_BITCAST).legalIf(
197 typeIsLegalBoolVec(0, BoolVecTys, ST)),
199 typeIsLegalBoolVec(1, BoolVecTys, ST))));
200
201 auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
202 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
203 BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
204 else
205 BSWAPActions.maxScalar(0, sXLen).lower();
206
207 auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
208 auto &CountZerosUndefActions =
209 getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
210 if (ST.hasStdExtZbb()) {
211 CountZerosActions.legalFor({{sXLen, sXLen}})
212 .customFor({{s32, s32}})
213 .clampScalar(0, s32, sXLen)
214 .widenScalarToNextPow2(0)
215 .scalarSameSizeAs(1, 0);
216 } else {
217 CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
218 CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
219 }
220 CountZerosUndefActions.lower();
221
222 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
223 if (ST.hasStdExtZbb()) {
224 CTPOPActions.legalFor({{sXLen, sXLen}})
225 .clampScalar(0, sXLen, sXLen)
226 .scalarSameSizeAs(1, 0);
227 } else {
228 CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
229 }
230
231 getActionDefinitionsBuilder(G_CONSTANT)
232 .legalFor({p0})
233 .legalFor(!ST.is64Bit(), {s32})
234 .customFor(ST.is64Bit(), {s64})
235 .widenScalarToNextPow2(0)
236 .clampScalar(0, sXLen, sXLen);
237
238 // TODO: transform illegal vector types into legal vector type
239 getActionDefinitionsBuilder(G_FREEZE)
240 .legalFor({s16, s32, p0})
241 .legalFor(ST.is64Bit(), {s64})
242 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
243 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
244 .widenScalarToNextPow2(0)
245 .clampScalar(0, s16, sXLen);
246
247 // TODO: transform illegal vector types into legal vector type
248 // TODO: Merge with G_FREEZE?
249 getActionDefinitionsBuilder(
250 {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
251 .legalFor({s32, sXLen, p0})
252 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
253 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
254 .widenScalarToNextPow2(0)
255 .clampScalar(0, s32, sXLen);
256
257 getActionDefinitionsBuilder(G_ICMP)
258 .legalFor({{sXLen, sXLen}, {sXLen, p0}})
259 .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
260 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
261 .widenScalarOrEltToNextPow2OrMinSize(1, 8)
262 .clampScalar(1, sXLen, sXLen)
263 .clampScalar(0, sXLen, sXLen);
264
265 getActionDefinitionsBuilder(G_SELECT)
266 .legalFor({{s32, sXLen}, {p0, sXLen}})
267 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
268 typeIsLegalBoolVec(1, BoolVecTys, ST)))
269 .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}})
270 .widenScalarToNextPow2(0)
271 .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
272 .clampScalar(1, sXLen, sXLen);
273
274 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
275 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
276 auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD});
277
278 // Return the alignment needed for scalar memory ops. If unaligned scalar mem
279 // is supported, we only require byte alignment. Otherwise, we need the memory
280 // op to be natively aligned.
281 auto getScalarMemAlign = [&ST](unsigned Size) {
282 return ST.enableUnalignedScalarMem() ? 8 : Size;
283 };
284
285 LoadActions.legalForTypesWithMemDesc(
286 {{s16, p0, s8, getScalarMemAlign(8)},
287 {s32, p0, s8, getScalarMemAlign(8)},
288 {s16, p0, s16, getScalarMemAlign(16)},
289 {s32, p0, s16, getScalarMemAlign(16)},
290 {s32, p0, s32, getScalarMemAlign(32)},
291 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
292 StoreActions.legalForTypesWithMemDesc(
293 {{s16, p0, s8, getScalarMemAlign(8)},
294 {s32, p0, s8, getScalarMemAlign(8)},
295 {s16, p0, s16, getScalarMemAlign(16)},
296 {s32, p0, s16, getScalarMemAlign(16)},
297 {s32, p0, s32, getScalarMemAlign(32)},
298 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
299 ExtLoadActions.legalForTypesWithMemDesc(
300 {{sXLen, p0, s8, getScalarMemAlign(8)},
301 {sXLen, p0, s16, getScalarMemAlign(16)}});
302 if (XLen == 64) {
303 LoadActions.legalForTypesWithMemDesc(
304 {{s64, p0, s8, getScalarMemAlign(8)},
305 {s64, p0, s16, getScalarMemAlign(16)},
306 {s64, p0, s32, getScalarMemAlign(32)},
307 {s64, p0, s64, getScalarMemAlign(64)}});
308 StoreActions.legalForTypesWithMemDesc(
309 {{s64, p0, s8, getScalarMemAlign(8)},
310 {s64, p0, s16, getScalarMemAlign(16)},
311 {s64, p0, s32, getScalarMemAlign(32)},
312 {s64, p0, s64, getScalarMemAlign(64)}});
313 ExtLoadActions.legalForTypesWithMemDesc(
314 {{s64, p0, s32, getScalarMemAlign(32)}});
315 } else if (ST.hasStdExtD()) {
316 LoadActions.legalForTypesWithMemDesc(
317 {{s64, p0, s64, getScalarMemAlign(64)}});
318 StoreActions.legalForTypesWithMemDesc(
319 {{s64, p0, s64, getScalarMemAlign(64)}});
320 }
321
322 // Vector loads/stores.
323 if (ST.hasVInstructions()) {
324 LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
325 {nxv4s8, p0, nxv4s8, 8},
326 {nxv8s8, p0, nxv8s8, 8},
327 {nxv16s8, p0, nxv16s8, 8},
328 {nxv32s8, p0, nxv32s8, 8},
329 {nxv64s8, p0, nxv64s8, 8},
330 {nxv2s16, p0, nxv2s16, 16},
331 {nxv4s16, p0, nxv4s16, 16},
332 {nxv8s16, p0, nxv8s16, 16},
333 {nxv16s16, p0, nxv16s16, 16},
334 {nxv32s16, p0, nxv32s16, 16},
335 {nxv2s32, p0, nxv2s32, 32},
336 {nxv4s32, p0, nxv4s32, 32},
337 {nxv8s32, p0, nxv8s32, 32},
338 {nxv16s32, p0, nxv16s32, 32}});
339 StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
340 {nxv4s8, p0, nxv4s8, 8},
341 {nxv8s8, p0, nxv8s8, 8},
342 {nxv16s8, p0, nxv16s8, 8},
343 {nxv32s8, p0, nxv32s8, 8},
344 {nxv64s8, p0, nxv64s8, 8},
345 {nxv2s16, p0, nxv2s16, 16},
346 {nxv4s16, p0, nxv4s16, 16},
347 {nxv8s16, p0, nxv8s16, 16},
348 {nxv16s16, p0, nxv16s16, 16},
349 {nxv32s16, p0, nxv32s16, 16},
350 {nxv2s32, p0, nxv2s32, 32},
351 {nxv4s32, p0, nxv4s32, 32},
352 {nxv8s32, p0, nxv8s32, 32},
353 {nxv16s32, p0, nxv16s32, 32}});
354
355 if (ST.getELen() == 64) {
356 LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
357 {nxv1s16, p0, nxv1s16, 16},
358 {nxv1s32, p0, nxv1s32, 32}});
359 StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
360 {nxv1s16, p0, nxv1s16, 16},
361 {nxv1s32, p0, nxv1s32, 32}});
362 }
363
364 if (ST.hasVInstructionsI64()) {
365 LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
366 {nxv2s64, p0, nxv2s64, 64},
367 {nxv4s64, p0, nxv4s64, 64},
368 {nxv8s64, p0, nxv8s64, 64}});
369 StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
370 {nxv2s64, p0, nxv2s64, 64},
371 {nxv4s64, p0, nxv4s64, 64},
372 {nxv8s64, p0, nxv8s64, 64}});
373 }
374
375 // we will take the custom lowering logic if we have scalable vector types
376 // with non-standard alignments
377 LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
378 StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
379
380 // Pointers require that XLen sized elements are legal.
381 if (XLen <= ST.getELen()) {
382 LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
383 StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
384 }
385 }
386
387 LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8)
388 .lowerIfMemSizeNotByteSizePow2()
389 .clampScalar(0, s16, sXLen)
390 .lower();
391 StoreActions
392 .clampScalar(0, s16, sXLen)
393 .lowerIfMemSizeNotByteSizePow2()
394 .lower();
395
396 ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower();
397
398 getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
399
400 getActionDefinitionsBuilder(G_PTRTOINT)
401 .legalFor({{sXLen, p0}})
402 .clampScalar(0, sXLen, sXLen);
403
404 getActionDefinitionsBuilder(G_INTTOPTR)
405 .legalFor({{p0, sXLen}})
406 .clampScalar(1, sXLen, sXLen);
407
408 getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
409
410 getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}});
411
412 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
413
414 getActionDefinitionsBuilder(G_PHI)
415 .legalFor({p0, s32, sXLen})
416 .widenScalarToNextPow2(0)
417 .clampScalar(0, s32, sXLen);
418
419 getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
420 .legalFor({p0});
421
422 if (ST.hasStdExtZmmul()) {
423 getActionDefinitionsBuilder(G_MUL)
424 .legalFor({sXLen})
425 .widenScalarToNextPow2(0)
426 .clampScalar(0, sXLen, sXLen);
427
428 // clang-format off
429 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
430 .legalFor({sXLen})
431 .lower();
432 // clang-format on
433
434 getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
435 } else {
436 getActionDefinitionsBuilder(G_MUL)
437 .libcallFor({sXLen, sDoubleXLen})
438 .widenScalarToNextPow2(0)
439 .clampScalar(0, sXLen, sDoubleXLen);
440
441 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
442
443 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
444 .minScalar(0, sXLen)
445 // Widen sXLen to sDoubleXLen so we can use a single libcall to get
446 // the low bits for the mul result and high bits to do the overflow
447 // check.
448 .widenScalarIf(typeIs(0, sXLen),
449 LegalizeMutations::changeTo(0, sDoubleXLen))
450 .lower();
451 }
452
453 if (ST.hasStdExtM()) {
454 getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM})
455 .legalFor({sXLen})
456 .customFor({s32})
457 .libcallFor({sDoubleXLen})
458 .clampScalar(0, s32, sDoubleXLen)
459 .widenScalarToNextPow2(0);
460 getActionDefinitionsBuilder(G_SREM)
461 .legalFor({sXLen})
462 .libcallFor({sDoubleXLen})
463 .clampScalar(0, sXLen, sDoubleXLen)
464 .widenScalarToNextPow2(0);
465 } else {
466 getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
467 .libcallFor({sXLen, sDoubleXLen})
468 .clampScalar(0, sXLen, sDoubleXLen)
469 .widenScalarToNextPow2(0);
470 }
471
472 // TODO: Use libcall for sDoubleXLen.
473 getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower();
474
475 getActionDefinitionsBuilder(G_ABS)
476 .customFor(ST.hasStdExtZbb(), {sXLen})
477 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
478 .lower();
479
480 getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN})
481 .legalFor(ST.hasStdExtZbb(), {sXLen})
482 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
483 .lower();
484
485 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
486
487 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
488
489 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
490
491 getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
492 .lower();
493
494 // FP Operations
495
496 // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
497 getActionDefinitionsBuilder(
498 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
499 .legalFor(ST.hasStdExtF(), {s32})
500 .legalFor(ST.hasStdExtD(), {s64})
501 .legalFor(ST.hasStdExtZfh(), {s16})
502 .libcallFor({s32, s64})
503 .libcallFor(ST.is64Bit(), {s128});
504
505 getActionDefinitionsBuilder({G_FNEG, G_FABS})
506 .legalFor(ST.hasStdExtF(), {s32})
507 .legalFor(ST.hasStdExtD(), {s64})
508 .legalFor(ST.hasStdExtZfh(), {s16})
509 .lowerFor({s32, s64, s128});
510
511 getActionDefinitionsBuilder(G_FREM)
512 .libcallFor({s32, s64})
513 .libcallFor(ST.is64Bit(), {s128})
514 .minScalar(0, s32)
515 .scalarize(0);
516
517 getActionDefinitionsBuilder(G_FCOPYSIGN)
518 .legalFor(ST.hasStdExtF(), {{s32, s32}})
519 .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}})
520 .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}})
521 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}})
522 .lower();
523
524 // FIXME: Use Zfhmin.
525 getActionDefinitionsBuilder(G_FPTRUNC)
526 .legalFor(ST.hasStdExtD(), {{s32, s64}})
527 .legalFor(ST.hasStdExtZfh(), {{s16, s32}})
528 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}})
529 .libcallFor({{s32, s64}})
530 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
531 getActionDefinitionsBuilder(G_FPEXT)
532 .legalFor(ST.hasStdExtD(), {{s64, s32}})
533 .legalFor(ST.hasStdExtZfh(), {{s32, s16}})
534 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}})
535 .libcallFor({{s64, s32}})
536 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});
537
538 getActionDefinitionsBuilder(G_FCMP)
539 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
540 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
541 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
542 .clampScalar(0, sXLen, sXLen)
543 .libcallFor({{sXLen, s32}, {sXLen, s64}})
544 .libcallFor(ST.is64Bit(), {{sXLen, s128}});
545
546 // TODO: Support vector version of G_IS_FPCLASS.
547 getActionDefinitionsBuilder(G_IS_FPCLASS)
548 .customFor(ST.hasStdExtF(), {{s1, s32}})
549 .customFor(ST.hasStdExtD(), {{s1, s64}})
550 .customFor(ST.hasStdExtZfh(), {{s1, s16}})
551 .lowerFor({{s1, s32}, {s1, s64}});
552
553 getActionDefinitionsBuilder(G_FCONSTANT)
554 .legalFor(ST.hasStdExtF(), {s32})
555 .legalFor(ST.hasStdExtD(), {s64})
556 .legalFor(ST.hasStdExtZfh(), {s16})
557 .lowerFor({s32, s64, s128});
558
559 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
560 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
561 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
562 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
563 .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}})
564 .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}})
565 .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}})
566 .widenScalarToNextPow2(0)
567 .minScalar(0, s32)
568 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
569 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32.
570 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}});
571
572 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
573 .legalFor(ST.hasStdExtF(), {{s32, sXLen}})
574 .legalFor(ST.hasStdExtD(), {{s64, sXLen}})
575 .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}})
576 .widenScalarToNextPow2(1)
577 // Promote to XLen if the operation is legal.
578 .widenScalarIf(
579 [=, &ST](const LegalityQuery &Query) {
580 return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
581 (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
582 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
583 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
584 (ST.hasStdExtZfh() &&
585 Query.Types[0].getSizeInBits() == 16));
586 },
588 // Otherwise only promote to s32 since we have si libcalls.
589 .minScalar(1, s32)
590 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
591 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32.
592 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}});
593
594 // FIXME: We can do custom inline expansion like SelectionDAG.
595 getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
596 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
597 G_INTRINSIC_ROUNDEVEN})
598 .legalFor(ST.hasStdExtZfa(), {s32})
599 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
600 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16})
601 .libcallFor({s32, s64})
602 .libcallFor(ST.is64Bit(), {s128});
603
604 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
605 .legalFor(ST.hasStdExtZfa(), {s32})
606 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
607 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16});
608
609 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
610 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
611 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
612 G_FTANH})
613 .libcallFor({s32, s64})
614 .libcallFor(ST.is64Bit(), {s128});
615 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
616 .libcallFor({{s32, s32}, {s64, s32}})
617 .libcallFor(ST.is64Bit(), {s128, s32});
618
619 getActionDefinitionsBuilder(G_VASTART).customFor({p0});
620
621 // va_list must be a pointer, but most sized types are pretty easy to handle
622 // as the destination.
623 getActionDefinitionsBuilder(G_VAARG)
624 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
625 // other than sXLen.
626 .clampScalar(0, sXLen, sXLen)
627 .lowerForCartesianProduct({sXLen, p0}, {p0});
628
629 getActionDefinitionsBuilder(G_VSCALE)
630 .clampScalar(0, sXLen, sXLen)
631 .customFor({sXLen});
632
633 auto &SplatActions =
634 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
635 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
636 typeIs(1, sXLen)))
637 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));
638 // Handle case of s64 element vectors on RV32. If the subtarget does not have
639 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
640 // does have f64, then we don't know whether the type is an f64 or an i64,
641 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
642 // depending on how the instructions it consumes are legalized. They are not
643 // legalized yet since legalization is in reverse postorder, so we cannot
644 // make the decision at this moment.
645 if (XLen == 32) {
646 if (ST.hasVInstructionsF64() && ST.hasStdExtD())
647 SplatActions.legalIf(all(
648 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
649 else if (ST.hasVInstructionsI64())
650 SplatActions.customIf(all(
651 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
652 }
653
654 SplatActions.clampScalar(1, sXLen, sXLen);
655
656 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
657 LLT DstTy = Query.Types[0];
658 LLT SrcTy = Query.Types[1];
659 return DstTy.getElementType() == LLT::scalar(1) &&
660 DstTy.getElementCount().getKnownMinValue() >= 8 &&
661 SrcTy.getElementCount().getKnownMinValue() >= 8;
662 };
663 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
664 // We don't have the ability to slide mask vectors down indexed by their
665 // i1 elements; the smallest we can do is i8. Often we are able to bitcast
666 // to equivalent i8 vectors.
667 .bitcastIf(
668 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
669 typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred),
670 [=](const LegalityQuery &Query) {
671 LLT CastTy = LLT::vector(
672 Query.Types[0].getElementCount().divideCoefficientBy(8), 8);
673 return std::pair(0, CastTy);
674 })
675 .customIf(LegalityPredicates::any(
676 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
677 typeIsLegalBoolVec(1, BoolVecTys, ST)),
678 all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
679 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))));
680
681 getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
682 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
683 typeIsLegalBoolVec(1, BoolVecTys, ST)))
684 .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
685 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
686
687 getLegacyLegalizerInfo().computeTables();
688 verify(*ST.getInstrInfo());
689}
690
692 MachineInstr &MI) const {
693 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
694 switch (IntrinsicID) {
695 default:
696 return false;
697 case Intrinsic::vacopy: {
698 // vacopy arguments must be legal because of the intrinsic signature.
699 // No need to check here.
700
701 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
702 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
703 MachineFunction &MF = *MI.getMF();
704 const DataLayout &DL = MIRBuilder.getDataLayout();
705 LLVMContext &Ctx = MF.getFunction().getContext();
706
707 Register DstLst = MI.getOperand(1).getReg();
708 LLT PtrTy = MRI.getType(DstLst);
709
710 // Load the source va_list
711 Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
713 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
714 auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
715
716 // Store the result in the destination va_list
719 MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);
720
721 MI.eraseFromParent();
722 return true;
723 }
724 }
725}
726
727bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
728 MachineIRBuilder &MIRBuilder) const {
729 // Stores the address of the VarArgsFrameIndex slot into the memory location
730 assert(MI.getOpcode() == TargetOpcode::G_VASTART);
731 MachineFunction *MF = MI.getParent()->getParent();
733 int FI = FuncInfo->getVarArgsFrameIndex();
734 LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
735 auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
736 assert(MI.hasOneMemOperand());
737 MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
738 *MI.memoperands()[0]);
739 MI.eraseFromParent();
740 return true;
741}
742
743bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
744 MachineIRBuilder &MIRBuilder) const {
745 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
746 auto &MF = *MI.getParent()->getParent();
747 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
748 unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());
749
750 Register PtrReg = MI.getOperand(0).getReg();
751 LLT PtrTy = MRI.getType(PtrReg);
752 Register IndexReg = MI.getOperand(2).getReg();
753 LLT IndexTy = MRI.getType(IndexReg);
754
755 if (!isPowerOf2_32(EntrySize))
756 return false;
757
758 auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize));
759 IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0);
760
761 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg);
762
765 EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout())));
766
767 Register TargetReg;
768 switch (MJTI->getEntryKind()) {
769 default:
770 return false;
772 // For PIC, the sequence is:
773 // BRIND(load(Jumptable + index) + RelocBase)
774 // RelocBase can be JumpTable, GOT or some sort of global base.
775 unsigned LoadOpc =
776 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
777 auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO);
778 TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0);
779 break;
780 }
782 auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy,
783 Addr, *MMO);
784 TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0);
785 break;
786 }
788 TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0);
789 break;
790 }
791
792 MIRBuilder.buildBrIndirect(TargetReg);
793
794 MI.eraseFromParent();
795 return true;
796}
797
798bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
799 bool ShouldOptForSize) const {
800 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
801 int64_t Imm = APImm.getSExtValue();
802 // All simm32 constants should be handled by isel.
803 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
804 // this check redundant, but small immediates are common so this check
805 // should have better compile time.
806 if (isInt<32>(Imm))
807 return false;
808
809 // We only need to cost the immediate, if constant pool lowering is enabled.
811 return false;
812
814 if (Seq.size() <= STI.getMaxBuildIntsCost())
815 return false;
816
817 // Optimizations below are disabled for opt size. If we're optimizing for
818 // size, use a constant pool.
819 if (ShouldOptForSize)
820 return true;
821 //
822 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
823 // that if it will avoid a constant pool.
824 // It will require an extra temporary register though.
825 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
826 // low and high 32 bits are the same and bit 31 and 63 are set.
827 unsigned ShiftAmt, AddOpc;
829 RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);
830 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
831}
832
833bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
834 MachineIRBuilder &MIB) const {
835 const LLT XLenTy(STI.getXLenVT());
836 Register Dst = MI.getOperand(0).getReg();
837
838 // We define our scalable vector types for lmul=1 to use a 64 bit known
839 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
840 // vscale as VLENB / 8.
841 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
843 // Support for VLEN==32 is incomplete.
844 return false;
845
846 // We assume VLENB is a multiple of 8. We manually choose the best shift
847 // here because SimplifyDemandedBits isn't always able to simplify it.
848 uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
849 if (isPowerOf2_64(Val)) {
850 uint64_t Log2 = Log2_64(Val);
851 if (Log2 < 3) {
852 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
853 MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
854 } else if (Log2 > 3) {
855 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
856 MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
857 } else {
858 MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
859 }
860 } else if ((Val % 8) == 0) {
861 // If the multiplier is a multiple of 8, scale it down to avoid needing
862 // to shift the VLENB value.
863 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
864 MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
865 } else {
866 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
867 auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
868 MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
869 }
870 MI.eraseFromParent();
871 return true;
872}
873
874// Custom-lower extensions from mask vectors by using a vselect either with 1
875// for zero/any-extension or -1 for sign-extension:
876// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
877// Note that any-extension is lowered identically to zero-extension.
878bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
879 MachineIRBuilder &MIB) const {
880
881 unsigned Opc = MI.getOpcode();
882 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
883 Opc == TargetOpcode::G_ANYEXT);
884
886 Register Dst = MI.getOperand(0).getReg();
887 Register Src = MI.getOperand(1).getReg();
888
889 LLT DstTy = MRI.getType(Dst);
890 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
891 LLT DstEltTy = DstTy.getElementType();
892 auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));
893 auto SplatTrue =
894 MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));
895 MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);
896
897 MI.eraseFromParent();
898 return true;
899}
900
901bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
902 LegalizerHelper &Helper,
903 MachineIRBuilder &MIB) const {
904 assert((isa<GLoad>(MI) || isa<GStore>(MI)) &&
905 "Machine instructions must be Load/Store.");
907 MachineFunction *MF = MI.getMF();
908 const DataLayout &DL = MIB.getDataLayout();
909 LLVMContext &Ctx = MF->getFunction().getContext();
910
911 Register DstReg = MI.getOperand(0).getReg();
912 LLT DataTy = MRI.getType(DstReg);
913 if (!DataTy.isVector())
914 return false;
915
916 if (!MI.hasOneMemOperand())
917 return false;
918
919 MachineMemOperand *MMO = *MI.memoperands_begin();
920
921 const auto *TLI = STI.getTargetLowering();
922 EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx));
923
924 if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO))
925 return true;
926
927 unsigned EltSizeBits = DataTy.getScalarSizeInBits();
928 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
929 "Unexpected unaligned RVV load type");
930
931 // Calculate the new vector type with i8 elements
932 unsigned NumElements =
933 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
934 LLT NewDataTy = LLT::scalable_vector(NumElements, 8);
935
936 Helper.bitcast(MI, 0, NewDataTy);
937
938 return true;
939}
940
941/// Return the type of the mask type suitable for masking the provided
942/// vector type. This is simply an i1 element type vector of the same
943/// (possibly scalable) length.
944static LLT getMaskTypeFor(LLT VecTy) {
945 assert(VecTy.isVector());
946 ElementCount EC = VecTy.getElementCount();
947 return LLT::vector(EC, LLT::scalar(1));
948}
949
950/// Creates an all ones mask suitable for masking a vector of type VecTy with
951/// vector length VL.
953 MachineIRBuilder &MIB,
955 LLT MaskTy = getMaskTypeFor(VecTy);
956 return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
957}
958
959/// Gets the two common "VL" operands: an all-ones mask and the vector length.
960/// VecTy is a scalable vector type.
961static std::pair<MachineInstrBuilder, MachineInstrBuilder>
963 assert(VecTy.isScalableVector() && "Expecting scalable container type");
964 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
965 LLT XLenTy(STI.getXLenVT());
966 auto VL = MIB.buildConstant(XLenTy, -1);
967 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
968 return {Mask, VL};
969}
970
972buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
973 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
975 // TODO: If the Hi bits of the splat are undefined, then it's fine to just
976 // splat Lo even if it might be sign extended. I don't think we have
977 // introduced a case where we're build a s64 where the upper bits are undef
978 // yet.
979
980 // Fall back to a stack store and stride x0 vector load.
981 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
982 // preprocessDAG in SDAG.
983 return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},
984 {Passthru, Lo, Hi, VL});
985}
986
988buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
989 const SrcOp &Scalar, const SrcOp &VL,
991 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
992 auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);
993 return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),
994 Unmerge.getReg(1), VL, MIB, MRI);
995}
996
997// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
998// legal equivalently-sized i8 type, so we can use that as a go-between.
999// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1000// VMCLR_VL.
1001bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1002 MachineIRBuilder &MIB) const {
1003 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1004
1005 MachineRegisterInfo &MRI = *MIB.getMRI();
1006
1007 Register Dst = MI.getOperand(0).getReg();
1008 Register SplatVal = MI.getOperand(1).getReg();
1009
1010 LLT VecTy = MRI.getType(Dst);
1011 LLT XLenTy(STI.getXLenVT());
1012
1013 // Handle case of s64 element vectors on rv32
1014 if (XLenTy.getSizeInBits() == 32 &&
1015 VecTy.getElementType().getSizeInBits() == 64) {
1016 auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI);
1017 buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
1018 MRI);
1019 MI.eraseFromParent();
1020 return true;
1021 }
1022
1023 // All-zeros or all-ones splats are handled specially.
1024 MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);
1025 if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {
1026 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1027 MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});
1028 MI.eraseFromParent();
1029 return true;
1030 }
1031 if (isNullOrNullSplat(SplatValMI, MRI)) {
1032 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1033 MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});
1034 MI.eraseFromParent();
1035 return true;
1036 }
1037
1038 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1039 // ones) by promoting it to an s8 splat.
1040 LLT InterEltTy = LLT::scalar(8);
1041 LLT InterTy = VecTy.changeElementType(InterEltTy);
1042 auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);
1043 auto And =
1044 MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));
1045 auto LHS = MIB.buildSplatVector(InterTy, And);
1046 auto ZeroSplat =
1047 MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));
1048 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);
1049 MI.eraseFromParent();
1050 return true;
1051}
1052
1053static LLT getLMUL1Ty(LLT VecTy) {
1054 assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1055 "Unexpected vector LLT");
1057 VecTy.getElementType().getSizeInBits(),
1058 VecTy.getElementType());
1059}
1060
1061bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1062 MachineIRBuilder &MIB) const {
1063 GExtractSubvector &ES = cast<GExtractSubvector>(MI);
1064
1065 MachineRegisterInfo &MRI = *MIB.getMRI();
1066
1067 Register Dst = ES.getReg(0);
1068 Register Src = ES.getSrcVec();
1069 uint64_t Idx = ES.getIndexImm();
1070
1071 // With an index of 0 this is a cast-like subvector, which can be performed
1072 // with subregister operations.
1073 if (Idx == 0)
1074 return true;
1075
1076 LLT LitTy = MRI.getType(Dst);
1077 LLT BigTy = MRI.getType(Src);
1078
1079 if (LitTy.getElementType() == LLT::scalar(1)) {
1080 // We can't slide this mask vector up indexed by its i1 elements.
1081 // This poses a problem when we wish to insert a scalable vector which
1082 // can't be re-expressed as a larger type. Just choose the slow path and
1083 // extend to a larger type, then truncate back down.
1084 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1085 LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
1086 auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
1087 auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
1088 auto SplatZero = MIB.buildSplatVector(
1089 ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
1090 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
1091 MI.eraseFromParent();
1092 return true;
1093 }
1094
1095 // extract_subvector scales the index by vscale if the subvector is scalable,
1096 // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1097 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1098 MVT LitTyMVT = getMVTForLLT(LitTy);
1099 auto Decompose =
1101 getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
1102 unsigned RemIdx = Decompose.second;
1103
1104 // If the Idx has been completely eliminated then this is a subvector extract
1105 // which naturally aligns to a vector register. These can easily be handled
1106 // using subregister manipulation.
1107 if (RemIdx == 0)
1108 return true;
1109
1110 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1111 // was > M1 then the index would need to be a multiple of VLMAX, and so would
1112 // divide exactly.
1113 assert(
1116
1117 // If the vector type is an LMUL-group type, extract a subvector equal to the
1118 // nearest full vector register type.
1119 LLT InterLitTy = BigTy;
1120 Register Vec = Src;
1122 getLMUL1Ty(BigTy).getSizeInBits())) {
1123 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1124 // we should have successfully decomposed the extract into a subregister.
1125 assert(Decompose.first != RISCV::NoSubRegister);
1126 InterLitTy = getLMUL1Ty(BigTy);
1127 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1128 // specified on the source Register (the equivalent) since generic virtual
1129 // register does not allow subregister index.
1130 Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0);
1131 }
1132
1133 // Slide this vector register down by the desired number of elements in order
1134 // to place the desired subvector starting at element 0.
1135 const LLT XLenTy(STI.getXLenVT());
1136 auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
1137 auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
1139 auto Slidedown = MIB.buildInstr(
1140 RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
1141 {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1142
1143 // Now the vector is in the right position, extract our final subvector. This
1144 // should resolve to a COPY.
1145 MIB.buildExtractSubvector(Dst, Slidedown, 0);
1146
1147 MI.eraseFromParent();
1148 return true;
1149}
1150
1151bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1152 LegalizerHelper &Helper,
1153 MachineIRBuilder &MIB) const {
1154 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
1155
1156 MachineRegisterInfo &MRI = *MIB.getMRI();
1157
1158 Register Dst = IS.getReg(0);
1159 Register BigVec = IS.getBigVec();
1160 Register LitVec = IS.getSubVec();
1161 uint64_t Idx = IS.getIndexImm();
1162
1163 LLT BigTy = MRI.getType(BigVec);
1164 LLT LitTy = MRI.getType(LitVec);
1165
1166 if (Idx == 0 ||
1167 MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1168 return true;
1169
1170 // We don't have the ability to slide mask vectors up indexed by their i1
1171 // elements; the smallest we can do is i8. Often we are able to bitcast to
1172 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1173 // vectors and truncate down after the insert.
1174 if (LitTy.getElementType() == LLT::scalar(1)) {
1175 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1176 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1177 if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1178 return Helper.bitcast(
1179 IS, 0,
1181
1182 // We can't slide this mask vector up indexed by its i1 elements.
1183 // This poses a problem when we wish to insert a scalable vector which
1184 // can't be re-expressed as a larger type. Just choose the slow path and
1185 // extend to a larger type, then truncate back down.
1186 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1187 return Helper.widenScalar(IS, 0, ExtBigTy);
1188 }
1189
1190 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1191 unsigned SubRegIdx, RemIdx;
1192 std::tie(SubRegIdx, RemIdx) =
1194 getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);
1195
1198 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1199 bool ExactlyVecRegSized =
1200 STI.expandVScale(LitTy.getSizeInBits())
1201 .isKnownMultipleOf(STI.expandVScale(VecRegSize));
1202
1203 // If the Idx has been completely eliminated and this subvector's size is a
1204 // vector register or a multiple thereof, or the surrounding elements are
1205 // undef, then this is a subvector insert which naturally aligns to a vector
1206 // register. These can easily be handled using subregister manipulation.
1207 if (RemIdx == 0 && ExactlyVecRegSized)
1208 return true;
1209
1210 // If the subvector is smaller than a vector register, then the insertion
1211 // must preserve the undisturbed elements of the register. We do this by
1212 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1213 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1214 // subvector within the vector register, and an INSERT_SUBVECTOR of that
1215 // LMUL=1 type back into the larger vector (resolving to another subregister
1216 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1217 // to avoid allocating a large register group to hold our subvector.
1218
1219 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1220 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1221 // (in our case undisturbed). This means we can set up a subvector insertion
1222 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1223 // size of the subvector.
1224 const LLT XLenTy(STI.getXLenVT());
1225 LLT InterLitTy = BigTy;
1226 Register AlignedExtract = BigVec;
1227 unsigned AlignedIdx = Idx - RemIdx;
1229 getLMUL1Ty(BigTy).getSizeInBits())) {
1230 InterLitTy = getLMUL1Ty(BigTy);
1231 // Extract a subvector equal to the nearest full vector register type. This
1232 // should resolve to a G_EXTRACT on a subreg.
1233 AlignedExtract =
1234 MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
1235 }
1236
1237 auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
1238 LitVec, 0);
1239
1240 auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
1241 auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
1242
1243 // If we're inserting into the lowest elements, use a tail undisturbed
1244 // vmv.v.v.
1246 bool NeedInsertSubvec =
1247 TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits());
1248 Register InsertedDst =
1249 NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst;
1250 if (RemIdx == 0) {
1251 Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst},
1252 {AlignedExtract, Insert, VL});
1253 } else {
1254 auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
1255 // Construct the vector length corresponding to RemIdx + length(LitTy).
1256 VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
1257 // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1258 ElementCount EndIndex =
1261 if (STI.expandVScale(EndIndex) ==
1262 STI.expandVScale(InterLitTy.getElementCount()))
1263 Policy = RISCVII::TAIL_AGNOSTIC;
1264
1265 Inserted =
1266 MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst},
1267 {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1268 }
1269
1270 // If required, insert this subvector back into the correct vector register.
1271 // This should resolve to an INSERT_SUBREG instruction.
1272 if (NeedInsertSubvec)
1273 MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx);
1274
1275 MI.eraseFromParent();
1276 return true;
1277}
1278
1279static unsigned getRISCVWOpcode(unsigned Opcode) {
1280 switch (Opcode) {
1281 default:
1282 llvm_unreachable("Unexpected opcode");
1283 case TargetOpcode::G_ASHR:
1284 return RISCV::G_SRAW;
1285 case TargetOpcode::G_LSHR:
1286 return RISCV::G_SRLW;
1287 case TargetOpcode::G_SHL:
1288 return RISCV::G_SLLW;
1289 case TargetOpcode::G_SDIV:
1290 return RISCV::G_DIVW;
1291 case TargetOpcode::G_UDIV:
1292 return RISCV::G_DIVUW;
1293 case TargetOpcode::G_UREM:
1294 return RISCV::G_REMUW;
1295 case TargetOpcode::G_ROTL:
1296 return RISCV::G_ROLW;
1297 case TargetOpcode::G_ROTR:
1298 return RISCV::G_RORW;
1299 case TargetOpcode::G_CTLZ:
1300 return RISCV::G_CLZW;
1301 case TargetOpcode::G_CTTZ:
1302 return RISCV::G_CTZW;
1303 case TargetOpcode::G_FPTOSI:
1304 return RISCV::G_FCVT_W_RV64;
1305 case TargetOpcode::G_FPTOUI:
1306 return RISCV::G_FCVT_WU_RV64;
1307 }
1308}
1309
1312 LostDebugLocObserver &LocObserver) const {
1313 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1314 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1315 MachineFunction &MF = *MI.getParent()->getParent();
1316 switch (MI.getOpcode()) {
1317 default:
1318 // No idea what to do.
1319 return false;
1320 case TargetOpcode::G_ABS:
1321 return Helper.lowerAbsToMaxNeg(MI);
1322 // TODO: G_FCONSTANT
1323 case TargetOpcode::G_CONSTANT: {
1324 const Function &F = MF.getFunction();
1325 // TODO: if PSI and BFI are present, add " ||
1326 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1327 bool ShouldOptForSize = F.hasOptSize() || F.hasMinSize();
1328 const ConstantInt *ConstVal = MI.getOperand(1).getCImm();
1329 if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))
1330 return true;
1331 return Helper.lowerConstant(MI);
1332 }
1333 case TargetOpcode::G_SEXT_INREG: {
1334 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1335 int64_t SizeInBits = MI.getOperand(2).getImm();
1336 // Source size of 32 is sext.w.
1337 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1338 return true;
1339
1340 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1341 return true;
1342
1343 return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
1345 }
1346 case TargetOpcode::G_ASHR:
1347 case TargetOpcode::G_LSHR:
1348 case TargetOpcode::G_SHL: {
1349 if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
1350 // We don't need a custom node for shift by constant. Just widen the
1351 // source and the shift amount.
1352 unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1353 if (MI.getOpcode() == TargetOpcode::G_ASHR)
1354 ExtOpc = TargetOpcode::G_SEXT;
1355 else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1356 ExtOpc = TargetOpcode::G_ZEXT;
1357
1358 Helper.Observer.changingInstr(MI);
1359 Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc);
1360 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT);
1361 Helper.widenScalarDst(MI, sXLen);
1362 Helper.Observer.changedInstr(MI);
1363 return true;
1364 }
1365
1366 Helper.Observer.changingInstr(MI);
1367 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1368 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1369 Helper.widenScalarDst(MI, sXLen);
1370 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1371 Helper.Observer.changedInstr(MI);
1372 return true;
1373 }
1374 case TargetOpcode::G_SDIV:
1375 case TargetOpcode::G_UDIV:
1376 case TargetOpcode::G_UREM:
1377 case TargetOpcode::G_ROTL:
1378 case TargetOpcode::G_ROTR: {
1379 Helper.Observer.changingInstr(MI);
1380 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1381 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1382 Helper.widenScalarDst(MI, sXLen);
1383 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1384 Helper.Observer.changedInstr(MI);
1385 return true;
1386 }
1387 case TargetOpcode::G_CTLZ:
1388 case TargetOpcode::G_CTTZ: {
1389 Helper.Observer.changingInstr(MI);
1390 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1391 Helper.widenScalarDst(MI, sXLen);
1392 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1393 Helper.Observer.changedInstr(MI);
1394 return true;
1395 }
1396 case TargetOpcode::G_FPTOSI:
1397 case TargetOpcode::G_FPTOUI: {
1398 Helper.Observer.changingInstr(MI);
1399 Helper.widenScalarDst(MI, sXLen);
1400 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1402 Helper.Observer.changedInstr(MI);
1403 return true;
1404 }
1405 case TargetOpcode::G_IS_FPCLASS: {
1406 Register GISFPCLASS = MI.getOperand(0).getReg();
1407 Register Src = MI.getOperand(1).getReg();
1408 const MachineOperand &ImmOp = MI.getOperand(2);
1409 MachineIRBuilder MIB(MI);
1410
1411 // Turn LLVM IR's floating point classes to that in RISC-V,
1412 // by simply rotating the 10-bit immediate right by two bits.
1413 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1414 auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
1415 auto ConstZero = MIB.buildConstant(sXLen, 0);
1416
1417 auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
1418 auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
1419 MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
1420
1421 MI.eraseFromParent();
1422 return true;
1423 }
1424 case TargetOpcode::G_BRJT:
1425 return legalizeBRJT(MI, MIRBuilder);
1426 case TargetOpcode::G_VASTART:
1427 return legalizeVAStart(MI, MIRBuilder);
1428 case TargetOpcode::G_VSCALE:
1429 return legalizeVScale(MI, MIRBuilder);
1430 case TargetOpcode::G_ZEXT:
1431 case TargetOpcode::G_SEXT:
1432 case TargetOpcode::G_ANYEXT:
1433 return legalizeExt(MI, MIRBuilder);
1434 case TargetOpcode::G_SPLAT_VECTOR:
1435 return legalizeSplatVector(MI, MIRBuilder);
1436 case TargetOpcode::G_EXTRACT_SUBVECTOR:
1437 return legalizeExtractSubvector(MI, MIRBuilder);
1438 case TargetOpcode::G_INSERT_SUBVECTOR:
1439 return legalizeInsertSubvector(MI, Helper, MIRBuilder);
1440 case TargetOpcode::G_LOAD:
1441 case TargetOpcode::G_STORE:
1442 return legalizeLoadStore(MI, Helper, MIRBuilder);
1443 }
1444
1445 llvm_unreachable("expected switch to return");
1446}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define P(N)
ppc ctr loops verify
static LLT getLMUL1Ty(LLT VecTy)
static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static std::pair< MachineInstrBuilder, MachineInstrBuilder > buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Gets the two common "VL" operands: an all-ones mask and the vector length.
static LegalityPredicate typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list< LLT > BoolVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, const SrcOp &Scalar, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LegalityPredicate typeIsLegalIntOrFPVec(unsigned TypeIdx, std::initializer_list< LLT > IntOrFPVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, Register Hi, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx, std::initializer_list< LLT > PtrVecTys, const RISCVSubtarget &ST)
static unsigned getRISCVWOpcode(unsigned Opcode)
This file declares the targeting of the Machinelegalizer class for RISC-V.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition: APInt.cpp:1128
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Represents an extract subvector.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:211
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildBrIndirect(Register Tgt)
Build and insert G_BRINDIRECT Tgt.
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getEntrySize(const DataLayout &TD) const
getEntrySize - Return the size of each entry in the jump table.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
unsigned getEntryAlignment(const DataLayout &TD) const
getEntryAlignment - Return the alignment of each entry in the jump table.
JTEntryKind getEntryKind() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
RISCVLegalizerInfo(const RISCVSubtarget &ST)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getMaxBuildIntsCost() const
bool useConstantPoolForLargeInts() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Register getReg() const
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:225
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static constexpr unsigned RVVBitsPerBlock
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1972
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1550
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1532
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
@ And
Bitwise or logical AND of integers.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:418
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.