LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 const LLT bf16 = LLT::bfloat16();
69 const LLT v4bf16 = LLT::fixed_vector(4, bf16);
70 const LLT v8bf16 = LLT::fixed_vector(8, bf16);
71
72 const LLT f16 = LLT::float16();
73 const LLT v4f16 = LLT::fixed_vector(4, f16);
74 const LLT v8f16 = LLT::fixed_vector(8, f16);
75
76 const LLT f32 = LLT::float32();
77 const LLT v2f32 = LLT::fixed_vector(2, f32);
78 const LLT v4f32 = LLT::fixed_vector(4, f32);
79
80 const LLT f64 = LLT::float64();
81 const LLT v2f64 = LLT::fixed_vector(2, f64);
82
83 const LLT f128 = LLT::float128();
84
85 const LLT i8 = LLT::integer(8);
86 const LLT v8i8 = LLT::fixed_vector(8, i8);
87 const LLT v16i8 = LLT::fixed_vector(16, i8);
88
89 const LLT i16 = LLT::integer(16);
90 const LLT v8i16 = LLT::fixed_vector(8, i16);
91 const LLT v4i16 = LLT::fixed_vector(4, i16);
92
93 const LLT i32 = LLT::integer(32);
94 const LLT v2i32 = LLT::fixed_vector(2, i32);
95 const LLT v4i32 = LLT::fixed_vector(4, i32);
96
97 const LLT i64 = LLT::integer(64);
98 const LLT v2i64 = LLT::fixed_vector(2, i64);
99
100 const LLT i128 = LLT::integer(128);
101
102 const LLT nxv16i8 = LLT::scalable_vector(16, i8);
103 const LLT nxv8i16 = LLT::scalable_vector(8, i16);
104 const LLT nxv4i32 = LLT::scalable_vector(4, i32);
105 const LLT nxv2i64 = LLT::scalable_vector(2, i64);
106
107 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
108 v16s8, v8s16, v4s32,
109 v2s64, v2p0,
110 /* End 128bit types */
111 /* Begin 64bit types */
112 v8s8, v4s16, v2s32};
113 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
114 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
115 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
116
117 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
118
119 // FIXME: support subtargets which have neon/fp-armv8 disabled.
120 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
122 return;
123 }
124
125 // Some instructions only support s16 if the subtarget has full 16-bit FP
126 // support.
127 const bool HasFP16 = ST.hasFullFP16();
128 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
129
130 const bool HasCSSC = ST.hasCSSC();
131 const bool HasRCPC3 = ST.hasRCPC3();
132 const bool HasSVE = ST.hasSVE();
133
135 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
136 .legalFor({p0, s8, s16, s32, s64})
137 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
138 v2s64, v2p0})
139 .widenScalarToNextPow2(0)
140 .clampScalar(0, s8, s64)
143 .clampNumElements(0, v8s8, v16s8)
144 .clampNumElements(0, v4s16, v8s16)
145 .clampNumElements(0, v2s32, v4s32)
146 .clampMaxNumElements(0, s64, 2)
147 .clampMaxNumElements(0, p0, 2)
149
151 .legalFor({p0, s16, s32, s64})
152 .legalFor(PackedVectorAllTypeList)
156 .clampScalar(0, s16, s64)
157 .clampNumElements(0, v8s8, v16s8)
158 .clampNumElements(0, v4s16, v8s16)
159 .clampNumElements(0, v2s32, v4s32)
160 .clampMaxNumElements(0, s64, 2)
161 .clampMaxNumElements(0, p0, 2);
162
164 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
165 smallerThan(1, 0)))
166 .widenScalarToNextPow2(0)
167 .clampScalar(0, s32, s64)
169 .minScalar(1, s8)
170 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
171 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
172
174 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
175 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
176 .widenScalarToNextPow2(1)
177 .clampScalar(1, s32, s128)
179 .minScalar(0, s16)
180 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
181 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
182 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
183
184 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
185 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
186 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
187 .widenScalarToNextPow2(0)
188 .clampScalar(0, s32, s64)
189 .clampMaxNumElements(0, s8, 16)
190 .clampMaxNumElements(0, s16, 8)
191 .clampNumElements(0, v2s32, v4s32)
192 .clampNumElements(0, v2s64, v2s64)
194 [=](const LegalityQuery &Query) {
195 return Query.Types[0].getNumElements() <= 2;
196 },
197 0, s32)
198 .minScalarOrEltIf(
199 [=](const LegalityQuery &Query) {
200 return Query.Types[0].getNumElements() <= 4;
201 },
202 0, s16)
203 .minScalarOrEltIf(
204 [=](const LegalityQuery &Query) {
205 return Query.Types[0].getNumElements() <= 16;
206 },
207 0, s8)
208 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
210
212 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
213 .widenScalarToNextPow2(0)
214 .clampScalar(0, s32, s64)
215 .clampMaxNumElements(0, s8, 16)
216 .clampMaxNumElements(0, s16, 8)
217 .clampNumElements(0, v2s32, v4s32)
218 .clampNumElements(0, v2s64, v2s64)
220 [=](const LegalityQuery &Query) {
221 return Query.Types[0].getNumElements() <= 2;
222 },
223 0, s32)
224 .minScalarOrEltIf(
225 [=](const LegalityQuery &Query) {
226 return Query.Types[0].getNumElements() <= 4;
227 },
228 0, s16)
229 .minScalarOrEltIf(
230 [=](const LegalityQuery &Query) {
231 return Query.Types[0].getNumElements() <= 16;
232 },
233 0, s8)
234 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
236
237 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
238 .customIf([=](const LegalityQuery &Query) {
239 const auto &SrcTy = Query.Types[0];
240 const auto &AmtTy = Query.Types[1];
241 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
242 AmtTy.getSizeInBits() == 32;
243 })
244 .legalFor({
245 {i32, i32},
246 {i32, i64},
247 {i64, i64},
248 {v8i8, v8i8},
249 {v16i8, v16i8},
250 {v4i16, v4i16},
251 {v8i16, v8i16},
252 {v2i32, v2i32},
253 {v4i32, v4i32},
254 {v2i64, v2i64},
255 })
256 .widenScalarToNextPow2(0)
257 .clampScalar(1, s32, s64)
258 .clampScalar(0, s32, s64)
259 .clampNumElements(0, v8s8, v16s8)
260 .clampNumElements(0, v4s16, v8s16)
261 .clampNumElements(0, v2s32, v4s32)
262 .clampNumElements(0, v2s64, v2s64)
264 .minScalarSameAs(1, 0)
268
270 .legalFor({{p0, i64}, {v2p0, v2i64}})
271 .clampScalarOrElt(1, s64, s64)
272 .clampNumElements(0, v2p0, v2p0);
273
274 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
275
276 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
277 .legalFor({i32, i64})
278 .libcallFor({i128})
279 .clampScalar(0, s32, s64)
281 .scalarize(0);
282
283 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
284 .lowerFor({i8, i16, i32, i64, v2i32, v4i32, v2i64})
285 .libcallFor({i128})
287 .minScalarOrElt(0, s32)
288 .clampNumElements(0, v2s32, v4s32)
289 .clampNumElements(0, v2s64, v2s64)
290 .scalarize(0);
291
292 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
293 .widenScalarToNextPow2(0, /*Min = */ 32)
294 .clampScalar(0, s32, s64)
295 .lower();
296
297 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
298 .legalFor({i64, v16i8, v8i16, v4i32})
299 .lower();
300
301 getActionDefinitionsBuilder({G_SMULFIX, G_UMULFIX}).lower();
302
303 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
304 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
305 .legalFor(HasCSSC, {i32, i64})
306 .minScalar(HasCSSC, 0, s32)
307 .clampNumElements(0, v8s8, v16s8)
308 .clampNumElements(0, v4s16, v8s16)
309 .clampNumElements(0, v2s32, v4s32)
310 .lower();
311
312 // FIXME: Legal vector types are only legal with NEON.
314 .legalFor(HasCSSC, {i32, i64})
315 .legalFor({v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
316 .customIf([=](const LegalityQuery &Q) {
317 // TODO: Fix suboptimal codegen for 128+ bit types.
318 LLT SrcTy = Q.Types[0];
319 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
320 })
321 .widenScalarIf(
322 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
323 [=](const LegalityQuery &Query) { return std::make_pair(0, v4i16); })
324 .widenScalarIf(
325 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
326 [=](const LegalityQuery &Query) { return std::make_pair(0, v2i32); })
327 .clampNumElements(0, v8s8, v16s8)
328 .clampNumElements(0, v4s16, v8s16)
329 .clampNumElements(0, v2s32, v4s32)
330 .clampNumElements(0, v2s64, v2s64)
332 .lower();
333
335 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
336 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
337 .lower();
338
340 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
341 .legalFor({{i32, i32}, {i64, i32}})
342 .clampScalar(0, s32, s64)
343 .clampScalar(1, s32, s64)
345
346 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
347 .customFor({{i32, i32}, {i32, i64}, {i64, i64}})
348 .lower();
349
351 .legalFor({{i32, i64}, {i64, i64}})
352 .customIf([=](const LegalityQuery &Q) {
353 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
354 })
355 .lower();
357
358 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
359 .customFor({{s32, s32}, {s64, s64}});
360
361 auto always = [=](const LegalityQuery &Q) { return true; };
363 .legalFor(HasCSSC, {{i32, i32}, {i64, i64}})
364 .legalFor({{v8i8, v8i8}, {v16i8, v16i8}})
365 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
366 .customFor({{s128, s128},
367 {v4s16, v4s16},
368 {v8s16, v8s16},
369 {v2s32, v2s32},
370 {v4s32, v4s32},
371 {v2s64, v2s64}})
372 .clampScalar(0, s32, s128)
375 .minScalarEltSameAsIf(always, 1, 0)
376 .maxScalarEltSameAsIf(always, 1, 0)
377 .clampNumElements(0, v8s8, v16s8)
378 .clampNumElements(0, v4s16, v8s16)
379 .clampNumElements(0, v2s32, v4s32)
380 .clampNumElements(0, v2s64, v2s64)
383
384 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
385 .legalFor({{i32, i32},
386 {i64, i64},
387 {v8i8, v8i8},
388 {v16i8, v16i8},
389 {v4i16, v4i16},
390 {v8i16, v8i16},
391 {v2i32, v2i32},
392 {v4i32, v4i32}})
393 .widenScalarToNextPow2(1, /*Min=*/32)
394 .clampScalar(1, s32, s64)
396 .clampNumElements(0, v8s8, v16s8)
397 .clampNumElements(0, v4s16, v8s16)
398 .clampNumElements(0, v2s32, v4s32)
401 .scalarSameSizeAs(0, 1);
402
403 getActionDefinitionsBuilder(G_CTLZ_ZERO_POISON).lower();
404
406 .lowerIf(isVector(0))
407 .widenScalarToNextPow2(1, /*Min=*/32)
408 .clampScalar(1, s32, s64)
409 .scalarSameSizeAs(0, 1)
410 .legalFor(HasCSSC, {s32, s64})
411 .customFor(!HasCSSC, {s32, s64});
412
413 getActionDefinitionsBuilder(G_CTTZ_ZERO_POISON).lower();
414
415 getActionDefinitionsBuilder(G_BITREVERSE)
416 .legalFor({i32, i64, v8i8, v16i8})
417 .widenScalarToNextPow2(0, /*Min = */ 32)
419 .clampScalar(0, s32, s64)
420 .clampNumElements(0, v8s8, v16s8)
421 .clampNumElements(0, v4s16, v8s16)
422 .clampNumElements(0, v2s32, v4s32)
423 .clampNumElements(0, v2s64, v2s64)
426 .lower();
427
428 getActionDefinitionsBuilder(G_CLMUL).legalFor({v8i8, v16i8});
429
431 .legalFor({i32, i64, v4i16, v8i16, v2i32, v4i32, v2i64})
433 .clampScalar(0, s32, s64)
434 .clampNumElements(0, v4s16, v8s16)
435 .clampNumElements(0, v2s32, v4s32)
436 .clampNumElements(0, v2s64, v2s64)
438
439 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
440 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
441 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
442 .clampNumElements(0, v8s8, v16s8)
443 .clampNumElements(0, v4s16, v8s16)
444 .clampNumElements(0, v2s32, v4s32)
445 .clampMaxNumElements(0, s64, 2)
448 .lower();
449
451 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
452 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
453 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
454 .legalFor({f32, f64, v2f32, v4f32, v2f64})
455 .legalFor(HasFP16, {f16, v4f16, v8f16})
456 .libcallFor({f128})
457 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
459 [=](const LegalityQuery &Q) {
460 return (!HasFP16 && Q.Types[0].getScalarType().isFloat16()) ||
461 Q.Types[0].getScalarType().isBFloat16();
462 },
463 changeElementTo(0, f32))
464 .clampNumElements(0, v4s16, v8s16)
465 .clampNumElements(0, v2s32, v4s32)
466 .clampNumElements(0, v2s64, v2s64)
468
469 getActionDefinitionsBuilder({G_FABS, G_FNEG})
470 .legalFor({f32, f64, v2f32, v4f32, v2f64})
471 .legalFor(HasFP16, {f16, bf16, v4f16, v4bf16, v8f16, v8bf16})
472 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
474 .clampNumElements(0, v4s16, v8s16)
475 .clampNumElements(0, v2s32, v4s32)
476 .clampNumElements(0, v2s64, v2s64)
478 .lowerFor({f16, bf16, v4f16, v4bf16, v8f16, v8bf16});
479
481 .libcallFor({f32, f64, f128})
482 .minScalar(0, f32)
483 .scalarize(0);
484
485 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
486 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
487 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
488 G_FSINH, G_FTANH, G_FMODF})
489 // We need a call for these, so we always need to scalarize.
490 .scalarize(0)
491 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
492 .minScalar(0, f32)
493 .libcallFor({f32, f64, f128});
494 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
495 .scalarize(0)
496 .minScalar(0, f32)
497 .libcallFor({{f32, i32}, {f64, i32}, {f128, i32}});
498
499 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
500 .legalFor({{i32, f32}, {i32, f64}, {i64, f32}, {i64, f64}})
501 .legalFor(HasFP16, {{i32, f16}, {i64, f16}})
502 .minScalar(1, s32)
503 .libcallFor({{s64, s128}})
504 .lower();
505 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
506 .legalFor({{i64, f32}, {i64, f64}})
507 .legalFor(HasFP16, {{i64, f16}})
508 .minScalar(0, s64)
509 .minScalar(1, s32)
510 .libcallFor({{s64, s128}})
511 .lower();
512
513 // TODO: Custom legalization for mismatched types.
514 getActionDefinitionsBuilder(G_FCOPYSIGN)
516 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
517 [=](const LegalityQuery &Query) {
518 const LLT Ty = Query.Types[0];
519 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
520 })
521 .lower();
522
524
525 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
526 auto &Actions = getActionDefinitionsBuilder(Op);
527
528 if (Op == G_SEXTLOAD)
530
531 // Atomics have zero extending behavior.
532 Actions
533 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
534 {s32, p0, s16, 8},
535 {s32, p0, s32, 8},
536 {s64, p0, s8, 2},
537 {s64, p0, s16, 2},
538 {s64, p0, s32, 4},
539 {s64, p0, s64, 8},
540 {p0, p0, s64, 8},
541 {v2s32, p0, s64, 8}})
542 .widenScalarToNextPow2(0)
543 .clampScalar(0, s32, s64)
544 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
545 // how to do that yet.
546 .unsupportedIfMemSizeNotPow2()
547 // Lower anything left over into G_*EXT and G_LOAD
548 .lower();
549 }
550
551 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
552 const LLT &ValTy = Query.Types[0];
553 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
554 };
555
557 .customIf([=](const LegalityQuery &Query) {
558 return HasRCPC3 && Query.Types[0] == s128 &&
559 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
560 })
561 .customIf([=](const LegalityQuery &Query) {
562 return Query.Types[0] == s128 &&
563 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
564 })
565 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
566 {s16, p0, s16, 8},
567 {s32, p0, s32, 8},
568 {s64, p0, s64, 8},
569 {p0, p0, s64, 8},
570 {s128, p0, s128, 8},
571 {v8s8, p0, s64, 8},
572 {v16s8, p0, s128, 8},
573 {v4s16, p0, s64, 8},
574 {v8s16, p0, s128, 8},
575 {v2s32, p0, s64, 8},
576 {v4s32, p0, s128, 8},
577 {v2s64, p0, s128, 8}})
578 // These extends are also legal
579 .legalForTypesWithMemDesc(
580 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
581 .legalForTypesWithMemDesc({
582 // SVE vscale x 128 bit base sizes
583 {nxv16s8, p0, nxv16s8, 8},
584 {nxv8s16, p0, nxv8s16, 8},
585 {nxv4s32, p0, nxv4s32, 8},
586 {nxv2s64, p0, nxv2s64, 8},
587 })
588 .widenScalarToNextPow2(0, /* MinSize = */ 8)
589 .clampMaxNumElements(0, s8, 16)
590 .clampMaxNumElements(0, s16, 8)
591 .clampMaxNumElements(0, s32, 4)
592 .clampMaxNumElements(0, s64, 2)
593 .clampMaxNumElements(0, p0, 2)
595 .clampScalar(0, s8, s64)
597 [=](const LegalityQuery &Query) {
598 // Clamp extending load results to 32-bits.
599 return Query.Types[0].isScalar() &&
600 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
601 Query.Types[0].getSizeInBits() > 32;
602 },
603 changeTo(0, s32))
604 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
605 .bitcastIf(typeInSet(0, {v4s8}),
606 [=](const LegalityQuery &Query) {
607 const LLT VecTy = Query.Types[0];
608 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
609 })
610 .customIf(IsPtrVecPred)
611 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
612 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
613
615 .customIf([=](const LegalityQuery &Query) {
616 return HasRCPC3 && Query.Types[0] == s128 &&
617 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
618 })
619 .customIf([=](const LegalityQuery &Query) {
620 return Query.Types[0] == s128 &&
621 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
622 })
623 .widenScalarIf(
624 all(scalarNarrowerThan(0, 32),
626 changeTo(0, s32))
628 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
629 {s32, p0, s8, 8}, // truncstorei8 from s32
630 {s64, p0, s8, 8}, // truncstorei8 from s64
631 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
632 {s64, p0, s16, 8}, // truncstorei16 from s64
633 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
634 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
635 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
636 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
637 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
638 .legalForTypesWithMemDesc({
639 // SVE vscale x 128 bit base sizes
640 // TODO: Add nxv2p0. Consider bitcastIf.
641 // See #92130
642 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
643 {nxv16s8, p0, nxv16s8, 8},
644 {nxv8s16, p0, nxv8s16, 8},
645 {nxv4s32, p0, nxv4s32, 8},
646 {nxv2s64, p0, nxv2s64, 8},
647 })
648 .clampScalar(0, s8, s64)
649 .minScalarOrElt(0, s8)
650 .lowerIf([=](const LegalityQuery &Query) {
651 return Query.Types[0].isScalar() &&
652 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
653 })
654 // Maximum: sN * k = 128
655 .clampMaxNumElements(0, s8, 16)
656 .clampMaxNumElements(0, s16, 8)
657 .clampMaxNumElements(0, s32, 4)
658 .clampMaxNumElements(0, s64, 2)
659 .clampMaxNumElements(0, p0, 2)
661 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
662 .bitcastIf(all(typeInSet(0, {v4s8}),
663 LegalityPredicate([=](const LegalityQuery &Query) {
664 return Query.Types[0].getSizeInBits() ==
665 Query.MMODescrs[0].MemoryTy.getSizeInBits();
666 })),
667 [=](const LegalityQuery &Query) {
668 const LLT VecTy = Query.Types[0];
669 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
670 })
671 .customIf(IsPtrVecPred)
672 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
673 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
674 .lower();
675
676 getActionDefinitionsBuilder(G_INDEXED_STORE)
677 // Idx 0 == Ptr, Idx 1 == Val
678 // TODO: we can implement legalizations but as of now these are
679 // generated in a very specific way.
681 {p0, s8, s8, 8},
682 {p0, s16, s16, 8},
683 {p0, s32, s8, 8},
684 {p0, s32, s16, 8},
685 {p0, s32, s32, 8},
686 {p0, s64, s64, 8},
687 {p0, p0, p0, 8},
688 {p0, v8s8, v8s8, 8},
689 {p0, v16s8, v16s8, 8},
690 {p0, v4s16, v4s16, 8},
691 {p0, v8s16, v8s16, 8},
692 {p0, v2s32, v2s32, 8},
693 {p0, v4s32, v4s32, 8},
694 {p0, v2s64, v2s64, 8},
695 {p0, v2p0, v2p0, 8},
696 {p0, s128, s128, 8},
697 })
698 .unsupported();
699
700 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
701 LLT LdTy = Query.Types[0];
702 LLT PtrTy = Query.Types[1];
703 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
704 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
705 return false;
706 if (PtrTy != p0)
707 return false;
708 return true;
709 };
710 getActionDefinitionsBuilder(G_INDEXED_LOAD)
713 .legalIf(IndexedLoadBasicPred)
714 .unsupported();
715 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
716 .unsupportedIf(
718 .legalIf(all(typeInSet(0, {s16, s32, s64}),
719 LegalityPredicate([=](const LegalityQuery &Q) {
720 LLT LdTy = Q.Types[0];
721 LLT PtrTy = Q.Types[1];
722 LLT MemTy = Q.MMODescrs[0].MemoryTy;
723 if (PtrTy != p0)
724 return false;
725 if (LdTy == s16)
726 return MemTy == s8;
727 if (LdTy == s32)
728 return MemTy == s8 || MemTy == s16;
729 if (LdTy == s64)
730 return MemTy == s8 || MemTy == s16 || MemTy == s32;
731 return false;
732 })))
733 .unsupported();
734
735 // Constants
737 .legalFor({p0, s8, s16, s32, s64})
738 .widenScalarToNextPow2(0)
739 .clampScalar(0, s8, s64);
740 getActionDefinitionsBuilder(G_FCONSTANT)
741 .legalFor({s16, s32, s64, s128});
742
743 // FIXME: fix moreElementsToNextPow2
745 .legalFor({{i32, i32}, {i32, i64}, {i32, p0}})
747 .minScalarOrElt(1, s8)
748 .clampScalar(1, s32, s64)
749 .clampScalar(0, s32, s32)
752 [=](const LegalityQuery &Query) {
753 const LLT &Ty = Query.Types[0];
754 const LLT &SrcTy = Query.Types[1];
755 return Ty.isVector() && !SrcTy.isPointerVector() &&
756 Ty.getElementType() != SrcTy.getElementType();
757 },
758 0, 1)
759 .minScalarOrEltIf(
760 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
761 1, s32)
762 .minScalarOrEltIf(
763 [=](const LegalityQuery &Query) {
764 return Query.Types[1].isPointerVector();
765 },
766 0, s64)
768 .clampNumElements(1, v8s8, v16s8)
769 .clampNumElements(1, v4s16, v8s16)
770 .clampNumElements(1, v2s32, v4s32)
771 .clampNumElements(1, v2s64, v2s64)
772 .clampNumElements(1, v2p0, v2p0)
773 .customIf(isVector(0));
774
776 .legalFor({{i32, f32},
777 {i32, f64},
778 {v4i32, v4f32},
779 {v2i32, v2f32},
780 {v2i64, v2f64}})
781 .legalFor(HasFP16, {{i32, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
783 .clampScalar(0, s32, s32)
785 [=](const LegalityQuery &Q) {
786 return (!HasFP16 && Q.Types[1].getScalarType().isFloat16()) ||
787 Q.Types[1].getScalarType().isBFloat16();
788 },
789 changeElementTo(1, f32))
790 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
792 [=](const LegalityQuery &Query) {
793 const LLT &Ty = Query.Types[0];
794 const LLT &SrcTy = Query.Types[1];
795 return Ty.isVector() && !SrcTy.isPointerVector() &&
796 Ty.getElementType() != SrcTy.getElementType();
797 },
798 0, 1)
799 .clampNumElements(1, v4s16, v8s16)
800 .clampNumElements(1, v2s32, v4s32)
801 .clampMaxNumElements(1, s64, 2)
803 .libcallFor({{s32, s128}});
804
805 // Extensions
806 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
807 unsigned DstSize = Query.Types[0].getSizeInBits();
808
809 // Handle legal vectors using legalFor
810 if (Query.Types[0].isVector())
811 return false;
812
813 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
814 return false; // Extending to a scalar s128 needs narrowing.
815
816 const LLT &SrcTy = Query.Types[1];
817
818 // Make sure we fit in a register otherwise. Don't bother checking that
819 // the source type is below 128 bits. We shouldn't be allowing anything
820 // through which is wider than the destination in the first place.
821 unsigned SrcSize = SrcTy.getSizeInBits();
822 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
823 return false;
824
825 return true;
826 };
827 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
828 .legalIf(ExtLegalFunc)
829 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
830 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
832 .clampMaxNumElements(1, s8, 8)
833 .clampMaxNumElements(1, s16, 4)
834 .clampMaxNumElements(1, s32, 2)
835 // Tries to convert a large EXTEND into two smaller EXTENDs
836 .lowerIf([=](const LegalityQuery &Query) {
837 return (Query.Types[0].getScalarSizeInBits() >
838 Query.Types[1].getScalarSizeInBits() * 2) &&
839 Query.Types[0].isVector() &&
840 (Query.Types[1].getScalarSizeInBits() == 8 ||
841 Query.Types[1].getScalarSizeInBits() == 16);
842 })
843 .clampMinNumElements(1, s8, 8)
844 .clampMinNumElements(1, s16, 4)
846
848 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
850 .clampMaxNumElements(0, s8, 8)
851 .clampMaxNumElements(0, s16, 4)
852 .clampMaxNumElements(0, s32, 2)
854 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
855 0, s8)
856 .lowerIf([=](const LegalityQuery &Query) {
857 LLT DstTy = Query.Types[0];
858 LLT SrcTy = Query.Types[1];
859 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
860 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
861 })
862 .clampMinNumElements(0, s8, 8)
863 .clampMinNumElements(0, s16, 4)
864 .alwaysLegal();
865
866 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
867 .legalFor({{v8i8, v8i16}, {v4i16, v4i32}, {v2i32, v2i64}})
868 .clampNumElements(0, v2s32, v2s32);
869
870 getActionDefinitionsBuilder(G_SEXT_INREG)
871 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
872 .maxScalar(0, s64)
873 .clampNumElements(0, v8s8, v16s8)
874 .clampNumElements(0, v4s16, v8s16)
875 .clampNumElements(0, v2s32, v4s32)
876 .clampMaxNumElements(0, s64, 2)
877 .lower();
878
879 // FP conversions
881 .legalFor(
882 {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
883 .legalFor(ST.hasBF16(), {{bf16, f32}, {v4bf16, v4f32}})
884 .libcallFor({{f16, f128}, {f32, f128}, {f64, f128}})
886 .customIf([](const LegalityQuery &Q) {
887 LLT DstTy = Q.Types[0];
888 LLT SrcTy = Q.Types[1];
889 return SrcTy.getScalarSizeInBits() == 64 &&
890 DstTy.getScalarSizeInBits() == 16;
891 })
892 .lowerFor({{bf16, f32}, {v4bf16, v4f32}})
893 // Clamp based on input
894 .clampNumElements(1, v4s32, v4s32)
895 .clampNumElements(1, v2s64, v2s64)
896 .scalarize(0);
897
898 getActionDefinitionsBuilder(G_FPEXT)
899 .legalFor({{f32, f16},
900 {f64, f16},
901 {f32, bf16},
902 {f64, f32},
903 {v4f32, v4f16},
904 {v4f32, v4bf16},
905 {v2f64, v2f32}})
906 .libcallFor({{f128, f64}, {f128, f32}, {f128, f16}})
909 [](const LegalityQuery &Q) {
910 LLT DstTy = Q.Types[0];
911 LLT SrcTy = Q.Types[1];
912 return SrcTy.isVector() && DstTy.isVector() &&
913 SrcTy.getScalarSizeInBits() == 16 &&
914 DstTy.getScalarSizeInBits() == 64;
915 },
916 changeElementTo(1, f32))
917 .clampNumElements(0, v4s32, v4s32)
918 .clampNumElements(0, v2s64, v2s64)
919 .scalarize(0);
920
921 // Conversions
922 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
923 .legalFor({{i32, f32},
924 {i64, f32},
925 {i32, f64},
926 {i64, f64},
927 {v2i32, v2f32},
928 {v4i32, v4f32},
929 {v2i64, v2f64}})
930 .legalFor(HasFP16,
931 {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
932 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
934 // The range of a fp16 value fits into an i17, so we can lower the width
935 // to i64.
937 [=](const LegalityQuery &Query) {
938 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
939 },
940 changeTo(0, i64))
943 .minScalar(0, s32)
944 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
946 [=](const LegalityQuery &Query) {
947 return Query.Types[0].getScalarSizeInBits() <= 64 &&
948 Query.Types[0].getScalarSizeInBits() >
949 Query.Types[1].getScalarSizeInBits();
950 },
952 .widenScalarIf(
953 [=](const LegalityQuery &Query) {
954 return Query.Types[1].getScalarSizeInBits() <= 64 &&
955 Query.Types[0].getScalarSizeInBits() <
956 Query.Types[1].getScalarSizeInBits();
957 },
959 .clampNumElements(0, v4s16, v8s16)
960 .clampNumElements(0, v2s32, v4s32)
961 .clampMaxNumElements(0, s64, 2)
962 .libcallFor(
963 {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
964
965 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
966 .legalFor({{i32, f32},
967 {i64, f32},
968 {i32, f64},
969 {i64, f64},
970 {v2i32, v2f32},
971 {v4i32, v4f32},
972 {v2i64, v2f64}})
973 .legalFor(
974 HasFP16,
975 {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
976 // Handle types larger than i64 by scalarizing/lowering.
977 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
979 // The range of a fp16 value fits into an i17, so we can lower the width
980 // to i64.
982 [=](const LegalityQuery &Query) {
983 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
984 },
985 changeTo(0, i64))
986 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
988 .widenScalarToNextPow2(0, /*MinSize=*/32)
989 .minScalar(0, s32)
990 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
992 [=](const LegalityQuery &Query) {
993 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
994 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
995 ITySize > Query.Types[1].getScalarSizeInBits();
996 },
998 .widenScalarIf(
999 [=](const LegalityQuery &Query) {
1000 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
1001 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
1002 Query.Types[0].getScalarSizeInBits() < FTySize;
1003 },
1006 .clampNumElements(0, v4s16, v8s16)
1007 .clampNumElements(0, v2s32, v4s32)
1008 .clampMaxNumElements(0, s64, 2);
1009
1010 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
1011 .legalFor({{f32, i32},
1012 {f64, i32},
1013 {f32, i64},
1014 {f64, i64},
1015 {v2f32, v2i32},
1016 {v4f32, v4i32},
1017 {v2f64, v2i64}})
1018 .legalFor(HasFP16,
1019 {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
1020 .unsupportedIf([&](const LegalityQuery &Query) {
1021 return Query.Types[0].getScalarType().isBFloat16();
1022 })
1023 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
1027 .minScalar(1, f32)
1028 .lowerIf([](const LegalityQuery &Query) {
1029 return Query.Types[1].isVector() &&
1030 Query.Types[1].getScalarSizeInBits() == 64 &&
1031 Query.Types[0].getScalarSizeInBits() == 16;
1032 })
1033 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
1034 .scalarizeIf(
1035 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
1036 [](const LegalityQuery &Query) {
1037 return Query.Types[0].getScalarSizeInBits() == 32 &&
1038 Query.Types[1].getScalarSizeInBits() == 64;
1039 },
1040 0)
1041 .widenScalarIf(
1042 [](const LegalityQuery &Query) {
1043 return Query.Types[1].getScalarSizeInBits() <= 64 &&
1044 Query.Types[0].getScalarSizeInBits() <
1045 Query.Types[1].getScalarSizeInBits();
1046 },
1048 .widenScalarIf(
1049 [](const LegalityQuery &Query) {
1050 return Query.Types[0].getScalarSizeInBits() <= 64 &&
1051 Query.Types[0].getScalarSizeInBits() >
1052 Query.Types[1].getScalarSizeInBits();
1053 },
1055 .clampNumElements(0, v4s16, v8s16)
1056 .clampNumElements(0, v2s32, v4s32)
1057 .clampMaxNumElements(0, s64, 2)
1058 .libcallFor({{f16, i128},
1059 {f32, i128},
1060 {f64, i128},
1061 {f128, i128},
1062 {f128, i32},
1063 {f128, i64}});
1064
1065 // Control-flow
1066 getActionDefinitionsBuilder(G_BR).alwaysLegal();
1067 getActionDefinitionsBuilder(G_BRCOND)
1068 .legalFor({s32})
1069 .clampScalar(0, s32, s32);
1070 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1071
1072 getActionDefinitionsBuilder(G_SELECT)
1073 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1074 .widenScalarToNextPow2(0)
1075 .clampScalar(0, s32, s64)
1076 .clampScalar(1, s32, s32)
1079 .lowerIf(isVector(0));
1080
1081 // Pointer-handling
1082 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1083
1084 if (TM.getCodeModel() == CodeModel::Small)
1085 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1086 else
1087 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1088
1089 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1090 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1091
1092 getActionDefinitionsBuilder(G_PTRTOINT)
1093 .legalFor({{i64, p0}, {v2i64, v2p0}})
1094 .widenScalarToNextPow2(0, 64)
1095 .clampScalar(0, s64, s64)
1096 .clampMaxNumElements(0, s64, 2);
1097
1098 getActionDefinitionsBuilder(G_INTTOPTR)
1099 .unsupportedIf([&](const LegalityQuery &Query) {
1100 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1101 })
1102 .legalFor({{p0, i64}, {v2p0, v2i64}})
1103 .clampMaxNumElements(1, s64, 2);
1104
1105 // Casts for 32 and 64-bit width type are just copies.
1106 // Same for 128-bit width type, except they are on the FPR bank.
1107 getActionDefinitionsBuilder(G_BITCAST)
1109 // Keeping 32-bit instructions legal to prevent regression in some tests
1110 .legalForCartesianProduct({s32, v2s16, v4s8})
1111 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1112 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1113 .customIf([=](const LegalityQuery &Query) {
1114 // Handle casts from i1 vectors to scalars.
1115 LLT DstTy = Query.Types[0];
1116 LLT SrcTy = Query.Types[1];
1117 return DstTy.isScalar() && SrcTy.isVector() &&
1118 SrcTy.getScalarSizeInBits() == 1;
1119 })
1120 .lowerIf([=](const LegalityQuery &Query) {
1121 return Query.Types[0].isVector() != Query.Types[1].isVector();
1122 })
1124 .clampNumElements(0, v8s8, v16s8)
1125 .clampNumElements(0, v4s16, v8s16)
1126 .clampNumElements(0, v2s32, v4s32)
1127 .clampMaxNumElements(0, s64, 2)
1128 .lower();
1129
1130 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1131
1132 // va_list must be a pointer, but most sized types are pretty easy to handle
1133 // as the destination.
1134 getActionDefinitionsBuilder(G_VAARG)
1135 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1136 .clampScalar(0, s8, s64)
1137 .widenScalarToNextPow2(0, /*Min*/ 8);
1138
1139 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1140 .lowerIf(
1141 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1142
1143 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1144
1145 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1146 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1147 .customFor(!UseOutlineAtomics, {{s128, p0}})
1148 .libcallFor(UseOutlineAtomics,
1149 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1150 .clampScalar(0, s32, s64);
1151
1152 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1153 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1154 G_ATOMICRMW_XOR})
1155 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1156 .libcallFor(UseOutlineAtomics,
1157 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1158 .clampScalar(0, s32, s64);
1159
1160 // Do not outline these atomics operations, as per comment in
1161 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1162 getActionDefinitionsBuilder(
1163 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1164 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1165 .clampScalar(0, s32, s64);
1166
1167 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1168
1169 // Merge/Unmerge
1170 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1171 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1172 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1173 getActionDefinitionsBuilder(Op)
1174 .widenScalarToNextPow2(LitTyIdx, 8)
1175 .widenScalarToNextPow2(BigTyIdx, 32)
1176 .clampScalar(LitTyIdx, s8, s64)
1177 .clampScalar(BigTyIdx, s32, s128)
1178 .legalIf([=](const LegalityQuery &Q) {
1179 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1180 case 32:
1181 case 64:
1182 case 128:
1183 break;
1184 default:
1185 return false;
1186 }
1187 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1188 case 8:
1189 case 16:
1190 case 32:
1191 case 64:
1192 return true;
1193 default:
1194 return false;
1195 }
1196 });
1197 }
1198
1199 // TODO : nxv4s16, nxv2s16, nxv2s32
1200 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1201 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1202 {s16, nxv8s16, s64},
1203 {s32, nxv4s32, s64},
1204 {s64, nxv2s64, s64}})
1205 .unsupportedIf([=](const LegalityQuery &Query) {
1206 const LLT &EltTy = Query.Types[1].getElementType();
1207 if (Query.Types[1].isScalableVector())
1208 return false;
1209 return Query.Types[0] != EltTy;
1210 })
1211 .minScalar(2, s64)
1212 .customIf([=](const LegalityQuery &Query) {
1213 const LLT &VecTy = Query.Types[1];
1214 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1215 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1216 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1217 })
1218 .minScalarOrEltIf(
1219 [=](const LegalityQuery &Query) {
1220 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1221 // cause the total vec size to be > 128b.
1222 return Query.Types[1].isFixedVector() &&
1223 Query.Types[1].getNumElements() <= 2;
1224 },
1225 0, s64)
1226 .minScalarOrEltIf(
1227 [=](const LegalityQuery &Query) {
1228 return Query.Types[1].isFixedVector() &&
1229 Query.Types[1].getNumElements() <= 4;
1230 },
1231 0, s32)
1232 .minScalarOrEltIf(
1233 [=](const LegalityQuery &Query) {
1234 return Query.Types[1].isFixedVector() &&
1235 Query.Types[1].getNumElements() <= 8;
1236 },
1237 0, s16)
1238 .minScalarOrEltIf(
1239 [=](const LegalityQuery &Query) {
1240 return Query.Types[1].isFixedVector() &&
1241 Query.Types[1].getNumElements() <= 16;
1242 },
1243 0, s8)
1244 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1245 .moreElementsToNextPow2(1)
1246 .clampMaxNumElements(1, s64, 2)
1247 .clampMaxNumElements(1, s32, 4)
1248 .clampMaxNumElements(1, s16, 8)
1249 .clampMaxNumElements(1, s8, 16)
1250 .clampMaxNumElements(1, p0, 2)
1251 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1);
1252
1253 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1254 .legalIf(
1255 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1256 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1257 {nxv8s16, s32, s64},
1258 {nxv4s32, s32, s64},
1259 {nxv2s64, s64, s64}})
1261 .widenVectorEltsToVectorMinSize(0, 64)
1262 .clampNumElements(0, v8s8, v16s8)
1263 .clampNumElements(0, v4s16, v8s16)
1264 .clampNumElements(0, v2s32, v4s32)
1265 .clampMaxNumElements(0, s64, 2)
1266 .clampMaxNumElements(0, p0, 2)
1267 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
1268
1269 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1270 .legalFor({{v8s8, s8},
1271 {v16s8, s8},
1272 {v4s16, s16},
1273 {v8s16, s16},
1274 {v2s32, s32},
1275 {v4s32, s32},
1276 {v2s64, s64},
1277 {v2p0, p0}})
1278 .clampNumElements(0, v4s32, v4s32)
1279 .clampNumElements(0, v2s64, v2s64)
1280 .minScalarOrElt(0, s8)
1281 .widenVectorEltsToVectorMinSize(0, 64)
1282 .widenScalarOrEltToNextPow2(0)
1283 .minScalarSameAs(1, 0);
1284
1285 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1286
1287 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1288 .legalIf([=](const LegalityQuery &Query) {
1289 const LLT &DstTy = Query.Types[0];
1290 const LLT &SrcTy = Query.Types[1];
1291 // For now just support the TBL2 variant which needs the source vectors
1292 // to be the same size as the dest.
1293 if (DstTy != SrcTy)
1294 return false;
1295 return llvm::is_contained(
1296 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1297 })
1298 .moreElementsIf(
1299 [](const LegalityQuery &Query) {
1300 return Query.Types[0].getNumElements() >
1301 Query.Types[1].getNumElements();
1302 },
1303 changeTo(1, 0))
1305 .moreElementsIf(
1306 [](const LegalityQuery &Query) {
1307 return Query.Types[0].getNumElements() <
1308 Query.Types[1].getNumElements();
1309 },
1310 changeTo(0, 1))
1311 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1312 .clampNumElements(0, v8s8, v16s8)
1313 .clampNumElements(0, v4s16, v8s16)
1314 .clampNumElements(0, v4s32, v4s32)
1315 .clampNumElements(0, v2s64, v2s64)
1316 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1317 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1318 // Bitcast pointers vector to i64.
1319 const LLT DstTy = Query.Types[0];
1320 return std::pair(
1321 0, LLT::vector(DstTy.getElementCount(), LLT::integer(64)));
1322 });
1323
1324 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1325 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1326 .customIf([=](const LegalityQuery &Query) {
1327 return Query.Types[0].isFixedVector() &&
1328 Query.Types[0].getScalarSizeInBits() < 8;
1329 })
1330 .bitcastIf(
1331 [=](const LegalityQuery &Query) {
1332 return Query.Types[0].isFixedVector() &&
1333 Query.Types[1].isFixedVector() &&
1334 Query.Types[0].getScalarSizeInBits() >= 8 &&
1335 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1336 Query.Types[0].getSizeInBits() <= 128 &&
1337 Query.Types[1].getSizeInBits() <= 64;
1338 },
1339 [=](const LegalityQuery &Query) {
1340 const LLT DstTy = Query.Types[0];
1341 const LLT SrcTy = Query.Types[1];
1342 return std::pair(
1343 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1346 SrcTy.getNumElements())));
1347 });
1348
1349 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1350 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1352 .immIdx(0); // Inform verifier imm idx 0 is handled.
1353
1354 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1355 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1356 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1357
1358 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1359
1360 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1361
1362 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1363
1364 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1365
1366 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1367
1368 if (ST.hasMOPS()) {
1369 // G_BZERO is not supported. Currently it is only emitted by
1370 // PreLegalizerCombiner for G_MEMSET with zero constant.
1371 getActionDefinitionsBuilder(G_BZERO).unsupported();
1372
1373 getActionDefinitionsBuilder(G_MEMSET)
1374 .legalForCartesianProduct({p0}, {s64}, {s64})
1375 .customForCartesianProduct({p0}, {s8}, {s64})
1376 .immIdx(0); // Inform verifier imm idx 0 is handled.
1377
1378 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1379 .legalForCartesianProduct({p0}, {p0}, {s64})
1380 .immIdx(0); // Inform verifier imm idx 0 is handled.
1381
1382 // G_MEMCPY_INLINE does not have a tailcall immediate
1383 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1384 .legalForCartesianProduct({p0}, {p0}, {s64});
1385
1386 } else {
1387 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1388 .libcall();
1389 }
1390
1391 // For fadd reductions we have pairwise operations available. We treat the
1392 // usual legal types as legal and handle the lowering to pairwise instructions
1393 // later.
1394 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1395 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1396 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1397 .minScalarOrElt(0, MinFPScalar)
1398 .clampMaxNumElements(1, s64, 2)
1399 .clampMaxNumElements(1, s32, 4)
1400 .clampMaxNumElements(1, s16, 8)
1401 .moreElementsToNextPow2(1)
1402 .scalarize(1)
1403 .lower();
1404
1405 // For fmul reductions we need to split up into individual operations. We
1406 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1407 // smaller types, followed by scalarizing what remains.
1408 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1409 .minScalarOrElt(0, MinFPScalar)
1410 .clampMaxNumElements(1, s64, 2)
1411 .clampMaxNumElements(1, s32, 4)
1412 .clampMaxNumElements(1, s16, 8)
1413 .clampMaxNumElements(1, s32, 2)
1414 .clampMaxNumElements(1, s16, 4)
1415 .scalarize(1)
1416 .lower();
1417
1418 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1419 .scalarize(2)
1420 .lower();
1421
1422 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1423 .legalFor({{i8, v8i8},
1424 {i8, v16i8},
1425 {i16, v4i16},
1426 {i16, v8i16},
1427 {i32, v2i32},
1428 {i32, v4i32},
1429 {i64, v2i64}})
1431 .clampMaxNumElements(1, s64, 2)
1432 .clampMaxNumElements(1, s32, 4)
1433 .clampMaxNumElements(1, s16, 8)
1434 .clampMaxNumElements(1, s8, 16)
1435 .widenVectorEltsToVectorMinSize(1, 64)
1436 .scalarize(1);
1437
1438 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1439 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1440 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1441 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1442 .minScalarOrElt(0, MinFPScalar)
1443 .clampMaxNumElements(1, s64, 2)
1444 .clampMaxNumElements(1, s32, 4)
1445 .clampMaxNumElements(1, s16, 8)
1446 .scalarize(1)
1447 .lower();
1448
1449 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1450 .clampMaxNumElements(1, s32, 2)
1451 .clampMaxNumElements(1, s16, 4)
1452 .clampMaxNumElements(1, s8, 8)
1453 .scalarize(1)
1454 .lower();
1455
1456 getActionDefinitionsBuilder(
1457 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1458 .legalFor({{i8, v8i8},
1459 {i8, v16i8},
1460 {i16, v4i16},
1461 {i16, v8i16},
1462 {i32, v2i32},
1463 {i32, v4i32}})
1464 .moreElementsIf(
1465 [=](const LegalityQuery &Query) {
1466 return Query.Types[1].isVector() &&
1467 Query.Types[1].getElementType() != s8 &&
1468 Query.Types[1].getNumElements() & 1;
1469 },
1471 .clampMaxNumElements(1, s64, 2)
1472 .clampMaxNumElements(1, s32, 4)
1473 .clampMaxNumElements(1, s16, 8)
1474 .clampMaxNumElements(1, s8, 16)
1475 .scalarize(1)
1476 .lower();
1477
1478 getActionDefinitionsBuilder(
1479 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1480 // Try to break down into smaller vectors as long as they're at least 64
1481 // bits. This lets us use vector operations for some parts of the
1482 // reduction.
1483 .fewerElementsIf(
1484 [=](const LegalityQuery &Q) {
1485 LLT SrcTy = Q.Types[1];
1486 if (SrcTy.isScalar())
1487 return false;
1488 if (!isPowerOf2_32(SrcTy.getNumElements()))
1489 return false;
1490 // We can usually perform 64b vector operations.
1491 return SrcTy.getSizeInBits() > 64;
1492 },
1493 [=](const LegalityQuery &Q) {
1494 LLT SrcTy = Q.Types[1];
1495 return std::make_pair(1, SrcTy.divide(2));
1496 })
1497 .scalarize(1)
1498 .lower();
1499
1500 // TODO: Update this to correct handling when adding AArch64/SVE support.
1501 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1502
1503 // Access to floating-point environment.
1504 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1505 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1506 .libcall();
1507
1508 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1509
1510 getActionDefinitionsBuilder(G_PREFETCH).custom();
1511
1512 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1513
1514 getActionDefinitionsBuilder({G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS})
1515 .alwaysLegal();
1516 getActionDefinitionsBuilder(G_FENCE).alwaysLegal();
1517 getActionDefinitionsBuilder(G_INVOKE_REGION_START).alwaysLegal();
1518
1519 getLegacyLegalizerInfo().computeTables();
1520 verify(*ST.getInstrInfo());
1521}
1522
1525 LostDebugLocObserver &LocObserver) const {
1526 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1527 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1528 GISelChangeObserver &Observer = Helper.Observer;
1529 switch (MI.getOpcode()) {
1530 default:
1531 // No idea what to do.
1532 return false;
1533 case TargetOpcode::G_VAARG:
1534 return legalizeVaArg(MI, MRI, MIRBuilder);
1535 case TargetOpcode::G_LOAD:
1536 case TargetOpcode::G_STORE:
1537 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1538 case TargetOpcode::G_SHL:
1539 case TargetOpcode::G_ASHR:
1540 case TargetOpcode::G_LSHR:
1541 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1542 case TargetOpcode::G_GLOBAL_VALUE:
1543 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1544 case TargetOpcode::G_SBFX:
1545 case TargetOpcode::G_UBFX:
1546 return legalizeBitfieldExtract(MI, MRI, Helper);
1547 case TargetOpcode::G_FSHL:
1548 case TargetOpcode::G_FSHR:
1549 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1550 case TargetOpcode::G_ROTR:
1551 return legalizeRotate(MI, MRI, Helper);
1552 case TargetOpcode::G_CTPOP:
1553 return legalizeCTPOP(MI, MRI, Helper);
1554 case TargetOpcode::G_ATOMIC_CMPXCHG:
1555 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1556 case TargetOpcode::G_CTTZ:
1557 return legalizeCTTZ(MI, Helper);
1558 case TargetOpcode::G_BZERO:
1559 case TargetOpcode::G_MEMCPY:
1560 case TargetOpcode::G_MEMMOVE:
1561 case TargetOpcode::G_MEMSET:
1562 return legalizeMemOps(MI, Helper);
1563 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1564 return legalizeExtractVectorElt(MI, MRI, Helper);
1565 case TargetOpcode::G_DYN_STACKALLOC:
1566 return legalizeDynStackAlloc(MI, Helper);
1567 case TargetOpcode::G_PREFETCH:
1568 return legalizePrefetch(MI, Helper);
1569 case TargetOpcode::G_ABS:
1570 return Helper.lowerAbsToCNeg(MI);
1571 case TargetOpcode::G_ICMP:
1572 return legalizeICMP(MI, MRI, MIRBuilder);
1573 case TargetOpcode::G_BITCAST:
1574 return legalizeBitcast(MI, Helper);
1575 case TargetOpcode::G_CONCAT_VECTORS:
1576 return legalizeConcatVectors(MI, MRI, MIRBuilder);
1577 case TargetOpcode::G_FPTRUNC:
1578 // In order to lower f16 to f64 properly, we need to use f32 as an
1579 // intermediary
1580 return legalizeFptrunc(MI, MIRBuilder, MRI);
1581 }
1582
1583 llvm_unreachable("expected switch to return");
1584}
1585
1586bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1587 LegalizerHelper &Helper) const {
1588 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1589 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1590 // We're trying to handle casts from i1 vectors to scalars but reloading from
1591 // stack.
1592 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1593 SrcTy.getElementType() != LLT::scalar(1))
1594 return false;
1595
1596 Helper.createStackStoreLoad(DstReg, SrcReg);
1597 MI.eraseFromParent();
1598 return true;
1599}
1600
1601bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1603 MachineIRBuilder &MIRBuilder,
1604 GISelChangeObserver &Observer,
1605 LegalizerHelper &Helper) const {
1606 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1607 MI.getOpcode() == TargetOpcode::G_FSHR);
1608
1609 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1610 // lowering
1611 Register ShiftNo = MI.getOperand(3).getReg();
1612 LLT ShiftTy = MRI.getType(ShiftNo);
1613 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1614
1615 // Adjust shift amount according to Opcode (FSHL/FSHR)
1616 // Convert FSHL to FSHR
1617 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1618 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1619
1620 // Lower non-constant shifts and leave zero shifts to the optimizer.
1621 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1622 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1624
1625 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1626
1627 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1628
1629 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1630 // in the range of 0 <-> BitWidth, it is legal
1631 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1632 VRegAndVal->Value.ult(BitWidth))
1633 return true;
1634
1635 // Cast the ShiftNumber to a 64-bit type
1636 auto Cast64 = MIRBuilder.buildConstant(LLT::integer(64), Amount.zext(64));
1637
1638 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1639 Observer.changingInstr(MI);
1640 MI.getOperand(3).setReg(Cast64.getReg(0));
1641 Observer.changedInstr(MI);
1642 }
1643 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1644 // instruction
1645 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1646 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1647 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1648 Cast64.getReg(0)});
1649 MI.eraseFromParent();
1650 }
1651 return true;
1652}
1653
1654bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1656 MachineIRBuilder &MIRBuilder) const {
1657 Register DstReg = MI.getOperand(0).getReg();
1658 Register SrcReg1 = MI.getOperand(2).getReg();
1659 Register SrcReg2 = MI.getOperand(3).getReg();
1660 LLT DstTy = MRI.getType(DstReg);
1661 LLT SrcTy = MRI.getType(SrcReg1);
1662
1663 // Check the vector types are legal
1664 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1665 DstTy.getNumElements() != SrcTy.getNumElements() ||
1666 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1667 return false;
1668
1669 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1670 // following passes
1671 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1672 if (Pred != CmpInst::ICMP_NE)
1673 return true;
1674 Register CmpReg =
1675 MIRBuilder
1676 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1677 .getReg(0);
1678 MIRBuilder.buildNot(DstReg, CmpReg);
1679
1680 MI.eraseFromParent();
1681 return true;
1682}
1683
1684bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1686 LegalizerHelper &Helper) const {
1687 // To allow for imported patterns to match, we ensure that the rotate amount
1688 // is 64b with an extension.
1689 Register AmtReg = MI.getOperand(2).getReg();
1690 LLT AmtTy = MRI.getType(AmtReg);
1691 (void)AmtTy;
1692 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1693 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1694 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::integer(64), AmtReg);
1695 Helper.Observer.changingInstr(MI);
1696 MI.getOperand(2).setReg(NewAmt.getReg(0));
1697 Helper.Observer.changedInstr(MI);
1698 return true;
1699}
1700
1701bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1703 GISelChangeObserver &Observer) const {
1704 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1705 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1706 // G_ADD_LOW instructions.
1707 // By splitting this here, we can optimize accesses in the small code model by
1708 // folding in the G_ADD_LOW into the load/store offset.
1709 auto &GlobalOp = MI.getOperand(1);
1710 // Don't modify an intrinsic call.
1711 if (GlobalOp.isSymbol())
1712 return true;
1713 const auto* GV = GlobalOp.getGlobal();
1714 if (GV->isThreadLocal())
1715 return true; // Don't want to modify TLS vars.
1716
1717 auto &TM = ST->getTargetLowering()->getTargetMachine();
1718 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1719
1720 if (OpFlags & AArch64II::MO_GOT)
1721 return true;
1722
1723 auto Offset = GlobalOp.getOffset();
1724 Register DstReg = MI.getOperand(0).getReg();
1725 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1726 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1727 // Set the regclass on the dest reg too.
1728 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1729
1730 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1731 // by creating a MOVK that sets bits 48-63 of the register to (global address
1732 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1733 // prevent an incorrect tag being generated during relocation when the
1734 // global appears before the code section. Without the offset, a global at
1735 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1736 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1737 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1738 // instead of `0xf`.
1739 // This assumes that we're in the small code model so we can assume a binary
1740 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1741 // binary must also be loaded into address range [0, 2^48). Both of these
1742 // properties need to be ensured at runtime when using tagged addresses.
1743 if (OpFlags & AArch64II::MO_TAGGED) {
1744 assert(!Offset &&
1745 "Should not have folded in an offset for a tagged global!");
1746 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1747 .addGlobalAddress(GV, 0x100000000,
1749 .addImm(48);
1750 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1751 }
1752
1753 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1754 .addGlobalAddress(GV, Offset,
1756 MI.eraseFromParent();
1757 return true;
1758}
1759
1761 MachineInstr &MI) const {
1762 MachineIRBuilder &MIB = Helper.MIRBuilder;
1763 MachineRegisterInfo &MRI = *MIB.getMRI();
1764
1765 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1766 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1767 MI.eraseFromParent();
1768 return true;
1769 };
1770 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1771 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1772 {MI.getOperand(2), MI.getOperand(3)});
1773 MI.eraseFromParent();
1774 return true;
1775 };
1776 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1777 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1778 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1779 MI.eraseFromParent();
1780 return true;
1781 };
1782
1783 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1784 switch (IntrinsicID) {
1785 case Intrinsic::vacopy: {
1786 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1787 unsigned VaListSize =
1788 (ST->isTargetDarwin() || ST->isTargetWindows())
1789 ? PtrSize
1790 : ST->isTargetILP32() ? 20 : 32;
1791
1792 MachineFunction &MF = *MI.getMF();
1794 LLT::scalar(VaListSize * 8));
1795 MIB.buildLoad(Val, MI.getOperand(2),
1798 VaListSize, Align(PtrSize)));
1799 MIB.buildStore(Val, MI.getOperand(1),
1802 VaListSize, Align(PtrSize)));
1803 MI.eraseFromParent();
1804 return true;
1805 }
1806 case Intrinsic::get_dynamic_area_offset: {
1807 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1808 MI.eraseFromParent();
1809 return true;
1810 }
1811 case Intrinsic::aarch64_mops_memset_tag: {
1812 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1813 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1814 // the instruction).
1815 auto &Value = MI.getOperand(3);
1816 Register ExtValueReg = MIB.buildAnyExt(LLT::integer(64), Value).getReg(0);
1817 Value.setReg(ExtValueReg);
1818 return true;
1819 }
1820 case Intrinsic::aarch64_prefetch: {
1821 auto &AddrVal = MI.getOperand(1);
1822
1823 int64_t IsWrite = MI.getOperand(2).getImm();
1824 int64_t Target = MI.getOperand(3).getImm();
1825 int64_t IsStream = MI.getOperand(4).getImm();
1826 int64_t IsData = MI.getOperand(5).getImm();
1827
1828 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1829 (!IsData << 3) | // IsDataCache bit
1830 (Target << 1) | // Cache level bits
1831 (unsigned)IsStream; // Stream bit
1832
1833 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1834 MI.eraseFromParent();
1835 return true;
1836 }
1837 case Intrinsic::aarch64_range_prefetch: {
1838 auto &AddrVal = MI.getOperand(1);
1839
1840 int64_t IsWrite = MI.getOperand(2).getImm();
1841 int64_t IsStream = MI.getOperand(3).getImm();
1842 unsigned PrfOp = (IsStream << 2) | IsWrite;
1843
1844 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1845 .addImm(PrfOp)
1846 .add(AddrVal)
1847 .addUse(MI.getOperand(4).getReg()); // Metadata
1848 MI.eraseFromParent();
1849 return true;
1850 }
1851 case Intrinsic::aarch64_prefetch_ir: {
1852 auto &AddrVal = MI.getOperand(1);
1853 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1854 MI.eraseFromParent();
1855 return true;
1856 }
1857 case Intrinsic::aarch64_neon_uaddv:
1858 case Intrinsic::aarch64_neon_saddv:
1859 case Intrinsic::aarch64_neon_umaxv:
1860 case Intrinsic::aarch64_neon_smaxv:
1861 case Intrinsic::aarch64_neon_uminv:
1862 case Intrinsic::aarch64_neon_sminv: {
1863 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1864 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1865 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1866
1867 auto OldDst = MI.getOperand(0).getReg();
1868 auto OldDstTy = MRI.getType(OldDst);
1869 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1870 if (OldDstTy == NewDstTy)
1871 return true;
1872
1873 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1874
1875 Helper.Observer.changingInstr(MI);
1876 MI.getOperand(0).setReg(NewDst);
1877 Helper.Observer.changedInstr(MI);
1878
1879 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1880 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1881 OldDst, NewDst);
1882
1883 return true;
1884 }
1885 case Intrinsic::aarch64_neon_uaddlp:
1886 case Intrinsic::aarch64_neon_saddlp: {
1887 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1888 ? AArch64::G_UADDLP
1889 : AArch64::G_SADDLP;
1890 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1891 MI.eraseFromParent();
1892
1893 return true;
1894 }
1895 case Intrinsic::aarch64_neon_uaddlv:
1896 case Intrinsic::aarch64_neon_saddlv: {
1897 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1898 ? AArch64::G_UADDLV
1899 : AArch64::G_SADDLV;
1900 Register DstReg = MI.getOperand(0).getReg();
1901 Register SrcReg = MI.getOperand(2).getReg();
1902 LLT DstTy = MRI.getType(DstReg);
1903
1904 LLT MidTy, ExtTy;
1905 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1906 ExtTy = LLT::integer(32);
1907 MidTy = LLT::fixed_vector(4, ExtTy);
1908 } else {
1909 ExtTy = LLT::integer(64);
1910 MidTy = LLT::fixed_vector(2, ExtTy);
1911 }
1912
1913 Register MidReg =
1914 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1915 Register ZeroReg =
1916 MIB.buildConstant(LLT::integer(64), 0)->getOperand(0).getReg();
1917 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1918 {MidReg, ZeroReg})
1919 .getReg(0);
1920
1921 if (DstTy.getScalarSizeInBits() < 32)
1922 MIB.buildTrunc(DstReg, ExtReg);
1923 else
1924 MIB.buildCopy(DstReg, ExtReg);
1925
1926 MI.eraseFromParent();
1927
1928 return true;
1929 }
1930 case Intrinsic::aarch64_neon_smax:
1931 return LowerBinOp(TargetOpcode::G_SMAX);
1932 case Intrinsic::aarch64_neon_smin:
1933 return LowerBinOp(TargetOpcode::G_SMIN);
1934 case Intrinsic::aarch64_neon_umax:
1935 return LowerBinOp(TargetOpcode::G_UMAX);
1936 case Intrinsic::aarch64_neon_umin:
1937 return LowerBinOp(TargetOpcode::G_UMIN);
1938 case Intrinsic::aarch64_neon_fmax:
1939 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1940 case Intrinsic::aarch64_neon_fmin:
1941 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1942 case Intrinsic::aarch64_neon_fmaxnm:
1943 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1944 case Intrinsic::aarch64_neon_fminnm:
1945 return LowerBinOp(TargetOpcode::G_FMINNUM);
1946 case Intrinsic::aarch64_neon_pmul:
1947 return LowerBinOp(TargetOpcode::G_CLMUL);
1948 case Intrinsic::aarch64_neon_pmull:
1949 case Intrinsic::aarch64_neon_pmull64:
1950 return LowerBinOp(AArch64::G_PMULL);
1951 case Intrinsic::aarch64_neon_smull:
1952 return LowerBinOp(AArch64::G_SMULL);
1953 case Intrinsic::aarch64_neon_umull:
1954 return LowerBinOp(AArch64::G_UMULL);
1955 case Intrinsic::aarch64_neon_sabd:
1956 return LowerBinOp(TargetOpcode::G_ABDS);
1957 case Intrinsic::aarch64_neon_uabd:
1958 return LowerBinOp(TargetOpcode::G_ABDU);
1959 case Intrinsic::aarch64_neon_uhadd:
1960 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1961 case Intrinsic::aarch64_neon_urhadd:
1962 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1963 case Intrinsic::aarch64_neon_shadd:
1964 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1965 case Intrinsic::aarch64_neon_srhadd:
1966 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1967 case Intrinsic::aarch64_neon_sqshrn: {
1968 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1969 return true;
1970 // Create right shift instruction. Store the output register in Shr.
1971 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1972 {MRI.getType(MI.getOperand(2).getReg())},
1973 {MI.getOperand(2), MI.getOperand(3).getImm()});
1974 // Build the narrow intrinsic, taking in Shr.
1975 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1976 MI.eraseFromParent();
1977 return true;
1978 }
1979 case Intrinsic::aarch64_neon_sqshrun: {
1980 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1981 return true;
1982 // Create right shift instruction. Store the output register in Shr.
1983 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1984 {MRI.getType(MI.getOperand(2).getReg())},
1985 {MI.getOperand(2), MI.getOperand(3).getImm()});
1986 // Build the narrow intrinsic, taking in Shr.
1987 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1988 MI.eraseFromParent();
1989 return true;
1990 }
1991 case Intrinsic::aarch64_neon_sqrshrn: {
1992 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1993 return true;
1994 // Create right shift instruction. Store the output register in Shr.
1995 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1996 {MRI.getType(MI.getOperand(2).getReg())},
1997 {MI.getOperand(2), MI.getOperand(3).getImm()});
1998 // Build the narrow intrinsic, taking in Shr.
1999 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
2000 MI.eraseFromParent();
2001 return true;
2002 }
2003 case Intrinsic::aarch64_neon_sqrshrun: {
2004 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2005 return true;
2006 // Create right shift instruction. Store the output register in Shr.
2007 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
2008 {MRI.getType(MI.getOperand(2).getReg())},
2009 {MI.getOperand(2), MI.getOperand(3).getImm()});
2010 // Build the narrow intrinsic, taking in Shr.
2011 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
2012 MI.eraseFromParent();
2013 return true;
2014 }
2015 case Intrinsic::aarch64_neon_uqrshrn: {
2016 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2017 return true;
2018 // Create right shift instruction. Store the output register in Shr.
2019 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
2020 {MRI.getType(MI.getOperand(2).getReg())},
2021 {MI.getOperand(2), MI.getOperand(3).getImm()});
2022 // Build the narrow intrinsic, taking in Shr.
2023 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2024 MI.eraseFromParent();
2025 return true;
2026 }
2027 case Intrinsic::aarch64_neon_uqshrn: {
2028 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2029 return true;
2030 // Create right shift instruction. Store the output register in Shr.
2031 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
2032 {MRI.getType(MI.getOperand(2).getReg())},
2033 {MI.getOperand(2), MI.getOperand(3).getImm()});
2034 // Build the narrow intrinsic, taking in Shr.
2035 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2036 MI.eraseFromParent();
2037 return true;
2038 }
2039 case Intrinsic::aarch64_neon_sqshlu: {
2040 // Check if last operand is constant vector dup
2041 auto ShiftAmount = isConstantOrConstantSplatVector(
2042 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
2043 if (ShiftAmount) {
2044 // If so, create a new intrinsic with the correct shift amount
2045 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
2046 {MI.getOperand(2)})
2047 .addImm(ShiftAmount->getSExtValue());
2048 MI.eraseFromParent();
2049 return true;
2050 }
2051 return false;
2052 }
2053 case Intrinsic::aarch64_neon_vsli: {
2054 MIB.buildInstr(
2055 AArch64::G_SLI, {MI.getOperand(0)},
2056 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2057 MI.eraseFromParent();
2058 break;
2059 }
2060 case Intrinsic::aarch64_neon_vsri: {
2061 MIB.buildInstr(
2062 AArch64::G_SRI, {MI.getOperand(0)},
2063 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2064 MI.eraseFromParent();
2065 break;
2066 }
2067 case Intrinsic::aarch64_neon_abs: {
2068 // Lower the intrinsic to G_ABS.
2069 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
2070 MI.eraseFromParent();
2071 return true;
2072 }
2073 case Intrinsic::aarch64_neon_sqadd: {
2074 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2075 return LowerBinOp(TargetOpcode::G_SADDSAT);
2076 break;
2077 }
2078 case Intrinsic::aarch64_neon_sqsub: {
2079 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2080 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2081 break;
2082 }
2083 case Intrinsic::aarch64_neon_uqadd: {
2084 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2085 return LowerBinOp(TargetOpcode::G_UADDSAT);
2086 break;
2087 }
2088 case Intrinsic::aarch64_neon_uqsub: {
2089 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2090 return LowerBinOp(TargetOpcode::G_USUBSAT);
2091 break;
2092 }
2093 case Intrinsic::aarch64_neon_udot:
2094 return LowerTriOp(AArch64::G_UDOT);
2095 case Intrinsic::aarch64_neon_sdot:
2096 return LowerTriOp(AArch64::G_SDOT);
2097 case Intrinsic::aarch64_neon_usdot:
2098 return LowerTriOp(AArch64::G_USDOT);
2099 case Intrinsic::aarch64_neon_sqxtn:
2100 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2101 case Intrinsic::aarch64_neon_sqxtun:
2102 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2103 case Intrinsic::aarch64_neon_uqxtn:
2104 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2105 case Intrinsic::aarch64_neon_fcvtzu:
2106 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2107 case Intrinsic::aarch64_neon_fcvtzs:
2108 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2109
2110 case Intrinsic::vector_reverse:
2111 // TODO: Add support for vector_reverse
2112 return false;
2113 }
2114
2115 return true;
2116}
2117
2118bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2120 GISelChangeObserver &Observer) const {
2121 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2122 MI.getOpcode() == TargetOpcode::G_LSHR ||
2123 MI.getOpcode() == TargetOpcode::G_SHL);
2124 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2125 // imported patterns can select it later. Either way, it will be legal.
2126 Register AmtReg = MI.getOperand(2).getReg();
2127 LLT AmtRegEltTy = MRI.getType(AmtReg).getScalarType();
2128 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2129 if (!VRegAndVal)
2130 return true;
2131 // Check the shift amount is in range for an immediate form.
2132 int64_t Amount = VRegAndVal->Value.getSExtValue();
2133 if (Amount > 31)
2134 return true; // This will have to remain a register variant.
2135 auto ExtCst =
2136 MIRBuilder.buildConstant(AmtRegEltTy.changeElementSize(64), Amount);
2137 Observer.changingInstr(MI);
2138 MI.getOperand(2).setReg(ExtCst.getReg(0));
2139 Observer.changedInstr(MI);
2140 return true;
2141}
2142
2144 MachineRegisterInfo &MRI) {
2145 Base = Root;
2146 Offset = 0;
2147
2148 Register NewBase;
2149 int64_t NewOffset;
2150 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2151 isShiftedInt<7, 3>(NewOffset)) {
2152 Base = NewBase;
2153 Offset = NewOffset;
2154 }
2155}
2156
2157// FIXME: This should be removed and replaced with the generic bitcast legalize
2158// action.
2159bool AArch64LegalizerInfo::legalizeLoadStore(
2161 GISelChangeObserver &Observer) const {
2162 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2163 MI.getOpcode() == TargetOpcode::G_LOAD);
2164 // Here we just try to handle vector loads/stores where our value type might
2165 // have pointer elements, which the SelectionDAG importer can't handle. To
2166 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2167 // the value to use s64 types.
2168
2169 // Custom legalization requires the instruction, if not deleted, must be fully
2170 // legalized. In order to allow further legalization of the inst, we create
2171 // a new instruction and erase the existing one.
2172
2173 Register ValReg = MI.getOperand(0).getReg();
2174 const LLT ValTy = MRI.getType(ValReg);
2175
2176 if (ValTy == LLT::scalar(128)) {
2177
2178 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2179 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2180 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2181 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2182 bool IsRcpC3 =
2183 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2184
2185 LLT s64 = LLT::integer(64);
2186
2187 unsigned Opcode;
2188 if (IsRcpC3) {
2189 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2190 } else {
2191 // For LSE2, loads/stores should have been converted to monotonic and had
2192 // a fence inserted after them.
2193 assert(Ordering == AtomicOrdering::Monotonic ||
2194 Ordering == AtomicOrdering::Unordered);
2195 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2196
2197 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2198 }
2199
2200 MachineInstrBuilder NewI;
2201 if (IsLoad) {
2202 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2203 MIRBuilder.buildMergeLikeInstr(
2204 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2205 } else {
2206 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2207 NewI = MIRBuilder.buildInstr(
2208 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2209 }
2210
2211 if (IsRcpC3) {
2212 NewI.addUse(MI.getOperand(1).getReg());
2213 } else {
2214 Register Base;
2215 int Offset;
2216 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2217 NewI.addUse(Base);
2218 NewI.addImm(Offset / 8);
2219 }
2220
2221 NewI.cloneMemRefs(MI);
2222 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2223 *MRI.getTargetRegisterInfo(),
2224 *ST->getRegBankInfo());
2225 MI.eraseFromParent();
2226 return true;
2227 }
2228
2229 if (!ValTy.isPointerVector() ||
2230 ValTy.getElementType().getAddressSpace() != 0) {
2231 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2232 return false;
2233 }
2234
2235 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2236 const LLT NewTy = LLT::vector(ValTy.getElementCount(), LLT::integer(PtrSize));
2237 auto &MMO = **MI.memoperands_begin();
2238 MMO.setType(NewTy);
2239
2240 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2241 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2242 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2243 } else {
2244 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2245 MIRBuilder.buildBitcast(ValReg, NewLoad);
2246 }
2247 MI.eraseFromParent();
2248 return true;
2249}
2250
2251bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2253 MachineIRBuilder &MIRBuilder) const {
2254 MachineFunction &MF = MIRBuilder.getMF();
2255 Align Alignment(MI.getOperand(2).getImm());
2256 Register Dst = MI.getOperand(0).getReg();
2257 Register ListPtr = MI.getOperand(1).getReg();
2258
2259 LLT PtrTy = MRI.getType(ListPtr);
2260 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2261
2262 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2263 const Align PtrAlign = Align(PtrSize);
2264 auto List = MIRBuilder.buildLoad(
2265 PtrTy, ListPtr,
2266 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2267 PtrTy, PtrAlign));
2268
2269 MachineInstrBuilder DstPtr;
2270 if (Alignment > PtrAlign) {
2271 // Realign the list to the actual required alignment.
2272 auto AlignMinus1 =
2273 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2274 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2275 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2276 } else
2277 DstPtr = List;
2278
2279 LLT ValTy = MRI.getType(Dst);
2280 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2281 MIRBuilder.buildLoad(
2282 Dst, DstPtr,
2283 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2284 ValTy, std::max(Alignment, PtrAlign)));
2285
2286 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2287
2288 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2289
2290 MIRBuilder.buildStore(NewList, ListPtr,
2291 *MF.getMachineMemOperand(MachinePointerInfo(),
2293 PtrTy, PtrAlign));
2294
2295 MI.eraseFromParent();
2296 return true;
2297}
2298
2299bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2300 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2301 // Only legal if we can select immediate forms.
2302 // TODO: Lower this otherwise.
2303 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2304 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2305}
2306
2307bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2309 LegalizerHelper &Helper) const {
2310 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2311 // it can be more efficiently lowered to the following sequence that uses
2312 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2313 // registers are cheap.
2314 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2315 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2316 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2317 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2318 //
2319 // For 128 bit vector popcounts, we lower to the following sequence:
2320 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2321 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2322 // uaddlp.4s v0, v0 // v4s32, v2s64
2323 // uaddlp.2d v0, v0 // v2s64
2324 //
2325 // For 64 bit vector popcounts, we lower to the following sequence:
2326 // cnt.8b v0, v0 // v4s16, v2s32
2327 // uaddlp.4h v0, v0 // v4s16, v2s32
2328 // uaddlp.2s v0, v0 // v2s32
2329
2330 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2331 Register Dst = MI.getOperand(0).getReg();
2332 Register Val = MI.getOperand(1).getReg();
2333 LLT Ty = MRI.getType(Val);
2334
2335 LLT i64 = LLT::integer(64);
2336 LLT i32 = LLT::integer(32);
2337 LLT i16 = LLT::integer(16);
2338 LLT i8 = LLT::integer(8);
2339 unsigned Size = Ty.getSizeInBits();
2340
2341 assert(Ty == MRI.getType(Dst) &&
2342 "Expected src and dst to have the same type!");
2343
2344 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2345
2346 auto Split = MIRBuilder.buildUnmerge(i64, Val);
2347 auto CTPOP1 = MIRBuilder.buildCTPOP(i64, Split->getOperand(0));
2348 auto CTPOP2 = MIRBuilder.buildCTPOP(i64, Split->getOperand(1));
2349 auto Add = MIRBuilder.buildAdd(i64, CTPOP1, CTPOP2);
2350
2351 MIRBuilder.buildZExt(Dst, Add);
2352 MI.eraseFromParent();
2353 return true;
2354 }
2355
2356 if (!ST->hasNEON() ||
2357 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2358 // Use generic lowering when custom lowering is not possible.
2359 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2360 Helper.lowerBitCount(MI) ==
2362 }
2363
2364 // Pre-conditioning: widen Val up to the nearest vector type.
2365 // s32,s64,v4s16,v2s32 -> v8i8
2366 // v8s16,v4s32,v2s64 -> v16i8
2367 LLT VTy = Size == 128 ? LLT::fixed_vector(16, i8) : LLT::fixed_vector(8, i8);
2368 if (Ty.isScalar()) {
2369 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2370 if (Size == 32) {
2371 Val = MIRBuilder.buildZExt(i64, Val).getReg(0);
2372 }
2373 }
2374 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2375
2376 // Count bits in each byte-sized lane.
2377 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2378
2379 // Sum across lanes.
2380 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2381 Ty.getScalarSizeInBits() != 16) {
2382 LLT Dt = Ty == LLT::fixed_vector(2, i64) ? LLT::fixed_vector(4, i32) : Ty;
2383 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2384 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2385 MachineInstrBuilder Sum;
2386
2387 if (Ty == LLT::fixed_vector(2, i64)) {
2388 auto UDOT =
2389 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2390 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2391 } else if (Ty == LLT::fixed_vector(4, i32)) {
2392 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2393 } else if (Ty == LLT::fixed_vector(2, i32)) {
2394 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2395 } else {
2396 llvm_unreachable("unexpected vector shape");
2397 }
2398
2399 Sum->getOperand(0).setReg(Dst);
2400 MI.eraseFromParent();
2401 return true;
2402 }
2403
2404 Register HSum = CTPOP.getReg(0);
2405 unsigned Opc;
2406 SmallVector<LLT> HAddTys;
2407 if (Ty.isScalar()) {
2408 Opc = Intrinsic::aarch64_neon_uaddlv;
2409 HAddTys.push_back(i32);
2410 } else if (Ty == LLT::fixed_vector(8, i16)) {
2411 Opc = Intrinsic::aarch64_neon_uaddlp;
2412 HAddTys.push_back(LLT::fixed_vector(8, i16));
2413 } else if (Ty == LLT::fixed_vector(4, i32)) {
2414 Opc = Intrinsic::aarch64_neon_uaddlp;
2415 HAddTys.push_back(LLT::fixed_vector(8, i16));
2416 HAddTys.push_back(LLT::fixed_vector(4, i32));
2417 } else if (Ty == LLT::fixed_vector(2, i64)) {
2418 Opc = Intrinsic::aarch64_neon_uaddlp;
2419 HAddTys.push_back(LLT::fixed_vector(8, i16));
2420 HAddTys.push_back(LLT::fixed_vector(4, i32));
2421 HAddTys.push_back(LLT::fixed_vector(2, i64));
2422 } else if (Ty == LLT::fixed_vector(4, i16)) {
2423 Opc = Intrinsic::aarch64_neon_uaddlp;
2424 HAddTys.push_back(LLT::fixed_vector(4, i16));
2425 } else if (Ty == LLT::fixed_vector(2, i32)) {
2426 Opc = Intrinsic::aarch64_neon_uaddlp;
2427 HAddTys.push_back(LLT::fixed_vector(4, i16));
2428 HAddTys.push_back(LLT::fixed_vector(2, i32));
2429 } else
2430 llvm_unreachable("unexpected vector shape");
2432 for (LLT HTy : HAddTys) {
2433 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2434 HSum = UADD.getReg(0);
2435 }
2436
2437 // Post-conditioning.
2438 if (Ty.isScalar() && (Size == 64 || Size == 128))
2439 MIRBuilder.buildZExt(Dst, UADD);
2440 else
2441 UADD->getOperand(0).setReg(Dst);
2442 MI.eraseFromParent();
2443 return true;
2444}
2445
2446bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2447 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2448 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2449 LLT i64 = LLT::integer(64);
2450 auto Addr = MI.getOperand(1).getReg();
2451 auto DesiredI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(2));
2452 auto NewI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(3));
2453 auto DstLo = MRI.createGenericVirtualRegister(i64);
2454 auto DstHi = MRI.createGenericVirtualRegister(i64);
2455
2456 MachineInstrBuilder CAS;
2457 if (ST->hasLSE()) {
2458 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2459 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2460 // the rest of the MIR so we must reassemble the extracted registers into a
2461 // 128-bit known-regclass one with code like this:
2462 //
2463 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2464 // %out = CASP %in1, ...
2465 // %OldLo = G_EXTRACT %out, 0
2466 // %OldHi = G_EXTRACT %out, 64
2467 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2468 unsigned Opcode;
2469 switch (Ordering) {
2471 Opcode = AArch64::CASPAX;
2472 break;
2474 Opcode = AArch64::CASPLX;
2475 break;
2478 Opcode = AArch64::CASPALX;
2479 break;
2480 default:
2481 Opcode = AArch64::CASPX;
2482 break;
2483 }
2484
2485 LLT s128 = LLT::scalar(128);
2486 auto CASDst = MRI.createGenericVirtualRegister(s128);
2487 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2488 auto CASNew = MRI.createGenericVirtualRegister(s128);
2489 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2490 .addUse(DesiredI->getOperand(0).getReg())
2491 .addImm(AArch64::sube64)
2492 .addUse(DesiredI->getOperand(1).getReg())
2493 .addImm(AArch64::subo64);
2494 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2495 .addUse(NewI->getOperand(0).getReg())
2496 .addImm(AArch64::sube64)
2497 .addUse(NewI->getOperand(1).getReg())
2498 .addImm(AArch64::subo64);
2499
2500 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2501
2502 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2503 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2504 } else {
2505 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2506 // can take arbitrary registers so it just has the normal GPR64 operands the
2507 // rest of AArch64 is expecting.
2508 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2509 unsigned Opcode;
2510 switch (Ordering) {
2512 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2513 break;
2515 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2516 break;
2519 Opcode = AArch64::CMP_SWAP_128;
2520 break;
2521 default:
2522 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2523 break;
2524 }
2525
2526 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2527 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2528 {Addr, DesiredI->getOperand(0),
2529 DesiredI->getOperand(1), NewI->getOperand(0),
2530 NewI->getOperand(1)});
2531 }
2532
2533 CAS.cloneMemRefs(MI);
2534 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2535 *MRI.getTargetRegisterInfo(),
2536 *ST->getRegBankInfo());
2537
2538 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2539 MI.eraseFromParent();
2540 return true;
2541}
2542
2543bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2544 LegalizerHelper &Helper) const {
2545 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2546 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2547 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2548 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2549 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2550 MI.eraseFromParent();
2551 return true;
2552}
2553
2554bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2555 LegalizerHelper &Helper) const {
2556 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2557
2558 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2559 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2560 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2561 // the instruction).
2562 auto &Value = MI.getOperand(1);
2563 Register ExtValueReg =
2564 MIRBuilder.buildAnyExt(LLT::integer(64), Value).getReg(0);
2565 Value.setReg(ExtValueReg);
2566 return true;
2567 }
2568
2569 return false;
2570}
2571
2572bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2573 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2574 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2575 auto VRegAndVal =
2577 if (VRegAndVal)
2578 return true;
2579 LLT VecTy = MRI.getType(Element->getVectorReg());
2580 if (VecTy.isScalableVector())
2581 return true;
2582 return Helper.lowerExtractInsertVectorElt(MI) !=
2584}
2585
2586bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2587 MachineInstr &MI, LegalizerHelper &Helper) const {
2588 MachineFunction &MF = *MI.getParent()->getParent();
2589 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2590 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2591
2592 // If stack probing is not enabled for this function, use the default
2593 // lowering.
2594 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2595 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2596 "inline-asm") {
2597 Helper.lowerDynStackAlloc(MI);
2598 return true;
2599 }
2600
2601 Register Dst = MI.getOperand(0).getReg();
2602 Register AllocSize = MI.getOperand(1).getReg();
2603 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2604
2605 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2606 "Unexpected type for dynamic alloca");
2607 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2608 "Unexpected type for dynamic alloca");
2609
2610 LLT PtrTy = MRI.getType(Dst);
2611 Register SPReg =
2613 Register SPTmp =
2614 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2615 auto NewMI =
2616 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2617 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2618 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2619 MIRBuilder.buildCopy(Dst, SPTmp);
2620
2621 MI.eraseFromParent();
2622 return true;
2623}
2624
2625bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2626 LegalizerHelper &Helper) const {
2627 MachineIRBuilder &MIB = Helper.MIRBuilder;
2628 auto &AddrVal = MI.getOperand(0);
2629
2630 int64_t IsWrite = MI.getOperand(1).getImm();
2631 int64_t Locality = MI.getOperand(2).getImm();
2632 int64_t IsData = MI.getOperand(3).getImm();
2633
2634 bool IsStream = Locality == 0;
2635 if (Locality != 0) {
2636 assert(Locality <= 3 && "Prefetch locality out-of-range");
2637 // The locality degree is the opposite of the cache speed.
2638 // Put the number the other way around.
2639 // The encoding starts at 0 for level 1
2640 Locality = 3 - Locality;
2641 }
2642
2643 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2644
2645 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2646 MI.eraseFromParent();
2647 return true;
2648}
2649
2650bool AArch64LegalizerInfo::legalizeConcatVectors(
2652 MachineIRBuilder &MIRBuilder) const {
2653 // Widen sub-byte element vectors to byte-sized elements before concatenating.
2654 // This is analogous to SDAG's integer type promotion for sub-byte types.
2656 Register DstReg = Concat.getReg(0);
2657 LLT DstTy = MRI.getType(DstReg);
2658 assert(DstTy.getScalarSizeInBits() < 8 && "Expected dst ty to be < 8b");
2659
2660 unsigned WideEltSize =
2661 std::max(8u, (unsigned)PowerOf2Ceil(DstTy.getScalarSizeInBits()));
2662 LLT SrcTy = MRI.getType(Concat.getSourceReg(0));
2663 LLT WideSrcTy = SrcTy.changeElementSize(WideEltSize);
2664 LLT WideDstTy = DstTy.changeElementSize(WideEltSize);
2665
2666 SmallVector<Register> WideSrcs;
2667 for (unsigned I = 0; I < Concat.getNumSources(); ++I) {
2668 auto Wide = MIRBuilder.buildAnyExt(WideSrcTy, Concat.getSourceReg(I));
2669 WideSrcs.push_back(Wide.getReg(0));
2670 }
2671
2672 auto WideConcat = MIRBuilder.buildConcatVectors(WideDstTy, WideSrcs);
2673 MIRBuilder.buildTrunc(DstReg, WideConcat);
2674 MI.eraseFromParent();
2675 return true;
2676}
2677
2678bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2679 MachineIRBuilder &MIRBuilder,
2680 MachineRegisterInfo &MRI) const {
2681 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2682
2683 // This function legalizes f64 -> bf16 and f64 -> f16 truncations via f64 ->
2684 // f32 G_FPTRUNC_ODD and f32 -> [b]f16 G_FPTRUNC, which apparently avoids the
2685 // usual double-rounding issue that could be present from using twin
2686 // G_FPTRUNC.
2687
2688 if (DstTy.isBFloat16() && SrcTy.isFloat64()) {
2689 auto Mid =
2690 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {LLT::float32()}, {Src});
2691 MIRBuilder.buildInstr(AArch64::G_FPTRUNC, {Dst}, {Mid});
2692 MI.eraseFromParent();
2693 return true;
2694 }
2695
2696 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2697 "Expected a power of 2 elements");
2698
2699 // We must mutate types here as FPTrunc may be used on a IEEE floating point
2700 // or a brainfloat.
2701 LLT v2s16 = DstTy.changeElementCount(2);
2702 LLT v4s16 = DstTy.changeElementCount(4);
2703 LLT v2s32 = SrcTy.changeElementCount(2).changeElementSize(32);
2704 LLT v4s32 = SrcTy.changeElementCount(4).changeElementSize(32);
2705 LLT v2s64 = SrcTy.changeElementCount(2);
2706
2707 SmallVector<Register> RegsToUnmergeTo;
2708 SmallVector<Register> TruncOddDstRegs;
2709 SmallVector<Register> RegsToMerge;
2710
2711 unsigned ElemCount = SrcTy.getNumElements();
2712
2713 // Find the biggest size chunks we can work with
2714 int StepSize = ElemCount % 4 ? 2 : 4;
2715
2716 // If we have a power of 2 greater than 2, we need to first unmerge into
2717 // enough pieces
2718 if (ElemCount <= 2)
2719 RegsToUnmergeTo.push_back(Src);
2720 else {
2721 for (unsigned i = 0; i < ElemCount / 2; ++i)
2722 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2723
2724 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2725 }
2726
2727 // Create all of the round-to-odd instructions and store them
2728 for (auto SrcReg : RegsToUnmergeTo) {
2729 Register Mid =
2730 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2731 .getReg(0);
2732 TruncOddDstRegs.push_back(Mid);
2733 }
2734
2735 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2736 // truncate 2s32 to 2s16.
2737 unsigned Index = 0;
2738 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2739 if (StepSize == 4) {
2740 Register ConcatDst =
2741 MIRBuilder
2743 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2744 .getReg(0);
2745
2746 RegsToMerge.push_back(
2747 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2748 } else {
2749 RegsToMerge.push_back(
2750 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2751 }
2752 }
2753
2754 // If there is only one register, replace the destination
2755 if (RegsToMerge.size() == 1) {
2756 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2757 MI.eraseFromParent();
2758 return true;
2759 }
2760
2761 // Merge the rest of the instructions & replace the register
2762 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2763 MRI.replaceRegWith(Dst, Fin);
2764 MI.eraseFromParent();
2765 return true;
2766}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:77
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_NE
not equal
Definition InstrTypes.h:762
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
static constexpr LLT float128()
Get a 128-bit IEEE quad value.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
LLT divide(int Factor) const
Return a type that is Factor times smaller.
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
bool isFloat64() const
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:156
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1527
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...