LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 const LLT bf16 = LLT::bfloat16();
69 const LLT v4bf16 = LLT::fixed_vector(4, bf16);
70
71 const LLT f16 = LLT::float16();
72 const LLT v4f16 = LLT::fixed_vector(4, f16);
73 const LLT v8f16 = LLT::fixed_vector(8, f16);
74
75 const LLT f32 = LLT::float32();
76 const LLT v2f32 = LLT::fixed_vector(2, f32);
77 const LLT v4f32 = LLT::fixed_vector(4, f32);
78
79 const LLT f64 = LLT::float64();
80 const LLT v2f64 = LLT::fixed_vector(2, f64);
81
82 const LLT f128 = LLT::float128();
83
84 const LLT i8 = LLT::integer(8);
85 const LLT v8i8 = LLT::fixed_vector(8, i8);
86 const LLT v16i8 = LLT::fixed_vector(16, i8);
87
88 const LLT i16 = LLT::integer(16);
89 const LLT v8i16 = LLT::fixed_vector(8, i16);
90 const LLT v4i16 = LLT::fixed_vector(4, i16);
91
92 const LLT i32 = LLT::integer(32);
93 const LLT v2i32 = LLT::fixed_vector(2, i32);
94 const LLT v4i32 = LLT::fixed_vector(4, i32);
95
96 const LLT i64 = LLT::integer(64);
97 const LLT v2i64 = LLT::fixed_vector(2, i64);
98
99 const LLT i128 = LLT::integer(128);
100
101 const LLT nxv16i8 = LLT::scalable_vector(16, i8);
102 const LLT nxv8i16 = LLT::scalable_vector(8, i16);
103 const LLT nxv4i32 = LLT::scalable_vector(4, i32);
104 const LLT nxv2i64 = LLT::scalable_vector(2, i64);
105
106 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
107 v16s8, v8s16, v4s32,
108 v2s64, v2p0,
109 /* End 128bit types */
110 /* Begin 64bit types */
111 v8s8, v4s16, v2s32};
112 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
113 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
114 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
115
116 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
117
118 // FIXME: support subtargets which have neon/fp-armv8 disabled.
119 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
121 return;
122 }
123
124 // Some instructions only support s16 if the subtarget has full 16-bit FP
125 // support.
126 const bool HasFP16 = ST.hasFullFP16();
127 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
128
129 const bool HasCSSC = ST.hasCSSC();
130 const bool HasRCPC3 = ST.hasRCPC3();
131 const bool HasSVE = ST.hasSVE();
132
134 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
135 .legalFor({p0, s8, s16, s32, s64})
136 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
137 v2s64, v2p0})
138 .widenScalarToNextPow2(0)
139 .clampScalar(0, s8, s64)
142 .clampNumElements(0, v8s8, v16s8)
143 .clampNumElements(0, v4s16, v8s16)
144 .clampNumElements(0, v2s32, v4s32)
145 .clampMaxNumElements(0, s64, 2)
146 .clampMaxNumElements(0, p0, 2)
148
150 .legalFor({p0, s16, s32, s64})
151 .legalFor(PackedVectorAllTypeList)
155 .clampScalar(0, s16, s64)
156 .clampNumElements(0, v8s8, v16s8)
157 .clampNumElements(0, v4s16, v8s16)
158 .clampNumElements(0, v2s32, v4s32)
159 .clampMaxNumElements(0, s64, 2)
160 .clampMaxNumElements(0, p0, 2);
161
163 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
164 smallerThan(1, 0)))
165 .widenScalarToNextPow2(0)
166 .clampScalar(0, s32, s64)
168 .minScalar(1, s8)
169 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
170 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
171
173 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
174 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
175 .widenScalarToNextPow2(1)
176 .clampScalar(1, s32, s128)
178 .minScalar(0, s16)
179 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
180 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
181 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
182
183 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
184 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
185 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
186 .widenScalarToNextPow2(0)
187 .clampScalar(0, s32, s64)
188 .clampMaxNumElements(0, s8, 16)
189 .clampMaxNumElements(0, s16, 8)
190 .clampNumElements(0, v2s32, v4s32)
191 .clampNumElements(0, v2s64, v2s64)
193 [=](const LegalityQuery &Query) {
194 return Query.Types[0].getNumElements() <= 2;
195 },
196 0, s32)
197 .minScalarOrEltIf(
198 [=](const LegalityQuery &Query) {
199 return Query.Types[0].getNumElements() <= 4;
200 },
201 0, s16)
202 .minScalarOrEltIf(
203 [=](const LegalityQuery &Query) {
204 return Query.Types[0].getNumElements() <= 16;
205 },
206 0, s8)
207 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
209
211 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
212 .widenScalarToNextPow2(0)
213 .clampScalar(0, s32, s64)
214 .clampMaxNumElements(0, s8, 16)
215 .clampMaxNumElements(0, s16, 8)
216 .clampNumElements(0, v2s32, v4s32)
217 .clampNumElements(0, v2s64, v2s64)
219 [=](const LegalityQuery &Query) {
220 return Query.Types[0].getNumElements() <= 2;
221 },
222 0, s32)
223 .minScalarOrEltIf(
224 [=](const LegalityQuery &Query) {
225 return Query.Types[0].getNumElements() <= 4;
226 },
227 0, s16)
228 .minScalarOrEltIf(
229 [=](const LegalityQuery &Query) {
230 return Query.Types[0].getNumElements() <= 16;
231 },
232 0, s8)
233 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
235
236 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
237 .customIf([=](const LegalityQuery &Query) {
238 const auto &SrcTy = Query.Types[0];
239 const auto &AmtTy = Query.Types[1];
240 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
241 AmtTy.getSizeInBits() == 32;
242 })
243 .legalFor({
244 {i32, i32},
245 {i32, i64},
246 {i64, i64},
247 {v8i8, v8i8},
248 {v16i8, v16i8},
249 {v4i16, v4i16},
250 {v8i16, v8i16},
251 {v2i32, v2i32},
252 {v4i32, v4i32},
253 {v2i64, v2i64},
254 })
255 .widenScalarToNextPow2(0)
256 .clampScalar(1, s32, s64)
257 .clampScalar(0, s32, s64)
258 .clampNumElements(0, v8s8, v16s8)
259 .clampNumElements(0, v4s16, v8s16)
260 .clampNumElements(0, v2s32, v4s32)
261 .clampNumElements(0, v2s64, v2s64)
263 .minScalarSameAs(1, 0)
267
269 .legalFor({{p0, i64}, {v2p0, v2i64}})
270 .clampScalarOrElt(1, s64, s64)
271 .clampNumElements(0, v2p0, v2p0);
272
273 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
274
275 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
276 .legalFor({i32, i64})
277 .libcallFor({i128})
278 .clampScalar(0, s32, s64)
280 .scalarize(0);
281
282 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
283 .lowerFor({i8, i16, i32, i64, v2i32, v4i32, v2i64})
284 .libcallFor({i128})
286 .minScalarOrElt(0, s32)
287 .clampNumElements(0, v2s32, v4s32)
288 .clampNumElements(0, v2s64, v2s64)
289 .scalarize(0);
290
291 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
292 .widenScalarToNextPow2(0, /*Min = */ 32)
293 .clampScalar(0, s32, s64)
294 .lower();
295
296 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
297 .legalFor({i64, v16i8, v8i16, v4i32})
298 .lower();
299
301
302 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
303 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
304 .legalFor(HasCSSC, {i32, i64})
305 .minScalar(HasCSSC, 0, s32)
306 .clampNumElements(0, v8s8, v16s8)
307 .clampNumElements(0, v4s16, v8s16)
308 .clampNumElements(0, v2s32, v4s32)
309 .lower();
310
311 // FIXME: Legal vector types are only legal with NEON.
313 .legalFor(HasCSSC, {i32, i64})
314 .legalFor({v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
315 .customIf([=](const LegalityQuery &Q) {
316 // TODO: Fix suboptimal codegen for 128+ bit types.
317 LLT SrcTy = Q.Types[0];
318 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
319 })
320 .widenScalarIf(
321 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
322 [=](const LegalityQuery &Query) { return std::make_pair(0, v4i16); })
323 .widenScalarIf(
324 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
325 [=](const LegalityQuery &Query) { return std::make_pair(0, v2i32); })
326 .clampNumElements(0, v8s8, v16s8)
327 .clampNumElements(0, v4s16, v8s16)
328 .clampNumElements(0, v2s32, v4s32)
329 .clampNumElements(0, v2s64, v2s64)
331 .lower();
332
334 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
335 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
336 .lower();
337
339 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
340 .legalFor({{i32, i32}, {i64, i32}})
341 .clampScalar(0, s32, s64)
342 .clampScalar(1, s32, s64)
344
345 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
346 .customFor({{i32, i32}, {i32, i64}, {i64, i64}})
347 .lower();
348
350 .legalFor({{i32, i64}, {i64, i64}})
351 .customIf([=](const LegalityQuery &Q) {
352 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
353 })
354 .lower();
356
357 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
358 .customFor({{s32, s32}, {s64, s64}});
359
360 auto always = [=](const LegalityQuery &Q) { return true; };
362 .legalFor(HasCSSC, {{i32, i32}, {i64, i64}})
363 .legalFor({{v8i8, v8i8}, {v16i8, v16i8}})
364 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
365 .customFor({{s128, s128},
366 {v4s16, v4s16},
367 {v8s16, v8s16},
368 {v2s32, v2s32},
369 {v4s32, v4s32},
370 {v2s64, v2s64}})
371 .clampScalar(0, s32, s128)
374 .minScalarEltSameAsIf(always, 1, 0)
375 .maxScalarEltSameAsIf(always, 1, 0)
376 .clampNumElements(0, v8s8, v16s8)
377 .clampNumElements(0, v4s16, v8s16)
378 .clampNumElements(0, v2s32, v4s32)
379 .clampNumElements(0, v2s64, v2s64)
382
383 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
384 .legalFor({{i32, i32},
385 {i64, i64},
386 {v8i8, v8i8},
387 {v16i8, v16i8},
388 {v4i16, v4i16},
389 {v8i16, v8i16},
390 {v2i32, v2i32},
391 {v4i32, v4i32}})
392 .widenScalarToNextPow2(1, /*Min=*/32)
393 .clampScalar(1, s32, s64)
395 .clampNumElements(0, v8s8, v16s8)
396 .clampNumElements(0, v4s16, v8s16)
397 .clampNumElements(0, v2s32, v4s32)
400 .scalarSameSizeAs(0, 1);
401
402 getActionDefinitionsBuilder(G_CTLZ_ZERO_POISON).lower();
403
405 .lowerIf(isVector(0))
406 .widenScalarToNextPow2(1, /*Min=*/32)
407 .clampScalar(1, s32, s64)
408 .scalarSameSizeAs(0, 1)
409 .legalFor(HasCSSC, {s32, s64})
410 .customFor(!HasCSSC, {s32, s64});
411
412 getActionDefinitionsBuilder(G_CTTZ_ZERO_POISON).lower();
413
414 getActionDefinitionsBuilder(G_BITREVERSE)
415 .legalFor({i32, i64, v8i8, v16i8})
416 .widenScalarToNextPow2(0, /*Min = */ 32)
418 .clampScalar(0, s32, s64)
419 .clampNumElements(0, v8s8, v16s8)
420 .clampNumElements(0, v4s16, v8s16)
421 .clampNumElements(0, v2s32, v4s32)
422 .clampNumElements(0, v2s64, v2s64)
425 .lower();
426
428 .legalFor({i32, i64, v4i16, v8i16, v2i32, v4i32, v2i64})
430 .clampScalar(0, s32, s64)
431 .clampNumElements(0, v4s16, v8s16)
432 .clampNumElements(0, v2s32, v4s32)
433 .clampNumElements(0, v2s64, v2s64)
435
436 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
437 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
438 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
439 .clampNumElements(0, v8s8, v16s8)
440 .clampNumElements(0, v4s16, v8s16)
441 .clampNumElements(0, v2s32, v4s32)
442 .clampMaxNumElements(0, s64, 2)
445 .lower();
446
448 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
449 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
450 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
451 .legalFor({f32, f64, v2f32, v4f32, v2f64})
452 .legalFor(HasFP16, {f16, v4f16, v8f16})
453 .libcallFor({f128})
454 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
456 [=](const LegalityQuery &Q) {
457 return (!HasFP16 && Q.Types[0].getScalarType().isFloat16()) ||
458 Q.Types[0].getScalarType().isBFloat16();
459 },
460 changeElementTo(0, f32))
461 .clampNumElements(0, v4s16, v8s16)
462 .clampNumElements(0, v2s32, v4s32)
463 .clampNumElements(0, v2s64, v2s64)
465
466 getActionDefinitionsBuilder({G_FABS, G_FNEG})
467 .legalFor({f32, f64, v2f32, v4f32, v2f64})
468 .legalFor(HasFP16, {f16, v4f16, v8f16})
469 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
471 .clampNumElements(0, v4s16, v8s16)
472 .clampNumElements(0, v2s32, v4s32)
473 .clampNumElements(0, v2s64, v2s64)
475 .lowerFor({f16, v4f16, v8f16});
476
478 .libcallFor({f32, f64, f128})
479 .minScalar(0, f32)
480 .scalarize(0);
481
482 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
483 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
484 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
485 G_FSINH, G_FTANH, G_FMODF})
486 // We need a call for these, so we always need to scalarize.
487 .scalarize(0)
488 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
489 .minScalar(0, f32)
490 .libcallFor({f32, f64, f128});
491 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
492 .scalarize(0)
493 .minScalar(0, f32)
494 .libcallFor({{f32, i32}, {f64, i32}, {f128, i32}});
495
496 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
497 .legalFor({{i32, f32}, {i32, f64}, {i64, f32}, {i64, f64}})
498 .legalFor(HasFP16, {{i32, f16}, {i64, f16}})
499 .minScalar(1, s32)
500 .libcallFor({{s64, s128}})
501 .lower();
502 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
503 .legalFor({{i64, f32}, {i64, f64}})
504 .legalFor(HasFP16, {{i64, f16}})
505 .minScalar(0, s64)
506 .minScalar(1, s32)
507 .libcallFor({{s64, s128}})
508 .lower();
509
510 // TODO: Custom legalization for mismatched types.
511 getActionDefinitionsBuilder(G_FCOPYSIGN)
513 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
514 [=](const LegalityQuery &Query) {
515 const LLT Ty = Query.Types[0];
516 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
517 })
518 .lower();
519
521
522 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
523 auto &Actions = getActionDefinitionsBuilder(Op);
524
525 if (Op == G_SEXTLOAD)
527
528 // Atomics have zero extending behavior.
529 Actions
530 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
531 {s32, p0, s16, 8},
532 {s32, p0, s32, 8},
533 {s64, p0, s8, 2},
534 {s64, p0, s16, 2},
535 {s64, p0, s32, 4},
536 {s64, p0, s64, 8},
537 {p0, p0, s64, 8},
538 {v2s32, p0, s64, 8}})
539 .widenScalarToNextPow2(0)
540 .clampScalar(0, s32, s64)
541 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
542 // how to do that yet.
543 .unsupportedIfMemSizeNotPow2()
544 // Lower anything left over into G_*EXT and G_LOAD
545 .lower();
546 }
547
548 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
549 const LLT &ValTy = Query.Types[0];
550 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
551 };
552
554 .customIf([=](const LegalityQuery &Query) {
555 return HasRCPC3 && Query.Types[0] == s128 &&
556 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
557 })
558 .customIf([=](const LegalityQuery &Query) {
559 return Query.Types[0] == s128 &&
560 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
561 })
562 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
563 {s16, p0, s16, 8},
564 {s32, p0, s32, 8},
565 {s64, p0, s64, 8},
566 {p0, p0, s64, 8},
567 {s128, p0, s128, 8},
568 {v8s8, p0, s64, 8},
569 {v16s8, p0, s128, 8},
570 {v4s16, p0, s64, 8},
571 {v8s16, p0, s128, 8},
572 {v2s32, p0, s64, 8},
573 {v4s32, p0, s128, 8},
574 {v2s64, p0, s128, 8}})
575 // These extends are also legal
576 .legalForTypesWithMemDesc(
577 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
578 .legalForTypesWithMemDesc({
579 // SVE vscale x 128 bit base sizes
580 {nxv16s8, p0, nxv16s8, 8},
581 {nxv8s16, p0, nxv8s16, 8},
582 {nxv4s32, p0, nxv4s32, 8},
583 {nxv2s64, p0, nxv2s64, 8},
584 })
585 .widenScalarToNextPow2(0, /* MinSize = */ 8)
586 .clampMaxNumElements(0, s8, 16)
587 .clampMaxNumElements(0, s16, 8)
588 .clampMaxNumElements(0, s32, 4)
589 .clampMaxNumElements(0, s64, 2)
590 .clampMaxNumElements(0, p0, 2)
592 .clampScalar(0, s8, s64)
594 [=](const LegalityQuery &Query) {
595 // Clamp extending load results to 32-bits.
596 return Query.Types[0].isScalar() &&
597 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
598 Query.Types[0].getSizeInBits() > 32;
599 },
600 changeTo(0, s32))
601 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
602 .bitcastIf(typeInSet(0, {v4s8}),
603 [=](const LegalityQuery &Query) {
604 const LLT VecTy = Query.Types[0];
605 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
606 })
607 .customIf(IsPtrVecPred)
608 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
609 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
610
612 .customIf([=](const LegalityQuery &Query) {
613 return HasRCPC3 && Query.Types[0] == s128 &&
614 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
615 })
616 .customIf([=](const LegalityQuery &Query) {
617 return Query.Types[0] == s128 &&
618 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
619 })
620 .widenScalarIf(
621 all(scalarNarrowerThan(0, 32),
623 changeTo(0, s32))
625 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
626 {s32, p0, s8, 8}, // truncstorei8 from s32
627 {s64, p0, s8, 8}, // truncstorei8 from s64
628 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
629 {s64, p0, s16, 8}, // truncstorei16 from s64
630 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
631 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
632 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
633 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
634 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
635 .legalForTypesWithMemDesc({
636 // SVE vscale x 128 bit base sizes
637 // TODO: Add nxv2p0. Consider bitcastIf.
638 // See #92130
639 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
640 {nxv16s8, p0, nxv16s8, 8},
641 {nxv8s16, p0, nxv8s16, 8},
642 {nxv4s32, p0, nxv4s32, 8},
643 {nxv2s64, p0, nxv2s64, 8},
644 })
645 .clampScalar(0, s8, s64)
646 .minScalarOrElt(0, s8)
647 .lowerIf([=](const LegalityQuery &Query) {
648 return Query.Types[0].isScalar() &&
649 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
650 })
651 // Maximum: sN * k = 128
652 .clampMaxNumElements(0, s8, 16)
653 .clampMaxNumElements(0, s16, 8)
654 .clampMaxNumElements(0, s32, 4)
655 .clampMaxNumElements(0, s64, 2)
656 .clampMaxNumElements(0, p0, 2)
658 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
659 .bitcastIf(all(typeInSet(0, {v4s8}),
660 LegalityPredicate([=](const LegalityQuery &Query) {
661 return Query.Types[0].getSizeInBits() ==
662 Query.MMODescrs[0].MemoryTy.getSizeInBits();
663 })),
664 [=](const LegalityQuery &Query) {
665 const LLT VecTy = Query.Types[0];
666 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
667 })
668 .customIf(IsPtrVecPred)
669 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
670 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
671 .lower();
672
673 getActionDefinitionsBuilder(G_INDEXED_STORE)
674 // Idx 0 == Ptr, Idx 1 == Val
675 // TODO: we can implement legalizations but as of now these are
676 // generated in a very specific way.
678 {p0, s8, s8, 8},
679 {p0, s16, s16, 8},
680 {p0, s32, s8, 8},
681 {p0, s32, s16, 8},
682 {p0, s32, s32, 8},
683 {p0, s64, s64, 8},
684 {p0, p0, p0, 8},
685 {p0, v8s8, v8s8, 8},
686 {p0, v16s8, v16s8, 8},
687 {p0, v4s16, v4s16, 8},
688 {p0, v8s16, v8s16, 8},
689 {p0, v2s32, v2s32, 8},
690 {p0, v4s32, v4s32, 8},
691 {p0, v2s64, v2s64, 8},
692 {p0, v2p0, v2p0, 8},
693 {p0, s128, s128, 8},
694 })
695 .unsupported();
696
697 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
698 LLT LdTy = Query.Types[0];
699 LLT PtrTy = Query.Types[1];
700 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
701 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
702 return false;
703 if (PtrTy != p0)
704 return false;
705 return true;
706 };
707 getActionDefinitionsBuilder(G_INDEXED_LOAD)
710 .legalIf(IndexedLoadBasicPred)
711 .unsupported();
712 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
713 .unsupportedIf(
715 .legalIf(all(typeInSet(0, {s16, s32, s64}),
716 LegalityPredicate([=](const LegalityQuery &Q) {
717 LLT LdTy = Q.Types[0];
718 LLT PtrTy = Q.Types[1];
719 LLT MemTy = Q.MMODescrs[0].MemoryTy;
720 if (PtrTy != p0)
721 return false;
722 if (LdTy == s16)
723 return MemTy == s8;
724 if (LdTy == s32)
725 return MemTy == s8 || MemTy == s16;
726 if (LdTy == s64)
727 return MemTy == s8 || MemTy == s16 || MemTy == s32;
728 return false;
729 })))
730 .unsupported();
731
732 // Constants
734 .legalFor({p0, s8, s16, s32, s64})
735 .widenScalarToNextPow2(0)
736 .clampScalar(0, s8, s64);
737 getActionDefinitionsBuilder(G_FCONSTANT)
738 .legalFor({s16, s32, s64, s128})
739 .clampScalar(0, MinFPScalar, s128);
740
741 // FIXME: fix moreElementsToNextPow2
743 .legalFor({{i32, i32}, {i32, i64}, {i32, p0}})
745 .minScalarOrElt(1, s8)
746 .clampScalar(1, s32, s64)
747 .clampScalar(0, s32, s32)
750 [=](const LegalityQuery &Query) {
751 const LLT &Ty = Query.Types[0];
752 const LLT &SrcTy = Query.Types[1];
753 return Ty.isVector() && !SrcTy.isPointerVector() &&
754 Ty.getElementType() != SrcTy.getElementType();
755 },
756 0, 1)
757 .minScalarOrEltIf(
758 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
759 1, s32)
760 .minScalarOrEltIf(
761 [=](const LegalityQuery &Query) {
762 return Query.Types[1].isPointerVector();
763 },
764 0, s64)
766 .clampNumElements(1, v8s8, v16s8)
767 .clampNumElements(1, v4s16, v8s16)
768 .clampNumElements(1, v2s32, v4s32)
769 .clampNumElements(1, v2s64, v2s64)
770 .clampNumElements(1, v2p0, v2p0)
771 .customIf(isVector(0));
772
774 .legalFor({{i32, f32},
775 {i32, f64},
776 {v4i32, v4f32},
777 {v2i32, v2f32},
778 {v2i64, v2f64}})
779 .legalFor(HasFP16, {{i32, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
781 .clampScalar(0, s32, s32)
783 [=](const LegalityQuery &Q) {
784 return (!HasFP16 && Q.Types[1].getScalarType().isFloat16()) ||
785 Q.Types[1].getScalarType().isBFloat16();
786 },
787 changeElementTo(1, f32))
788 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
790 [=](const LegalityQuery &Query) {
791 const LLT &Ty = Query.Types[0];
792 const LLT &SrcTy = Query.Types[1];
793 return Ty.isVector() && !SrcTy.isPointerVector() &&
794 Ty.getElementType() != SrcTy.getElementType();
795 },
796 0, 1)
797 .clampNumElements(1, v4s16, v8s16)
798 .clampNumElements(1, v2s32, v4s32)
799 .clampMaxNumElements(1, s64, 2)
801 .libcallFor({{s32, s128}});
802
803 // Extensions
804 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
805 unsigned DstSize = Query.Types[0].getSizeInBits();
806
807 // Handle legal vectors using legalFor
808 if (Query.Types[0].isVector())
809 return false;
810
811 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
812 return false; // Extending to a scalar s128 needs narrowing.
813
814 const LLT &SrcTy = Query.Types[1];
815
816 // Make sure we fit in a register otherwise. Don't bother checking that
817 // the source type is below 128 bits. We shouldn't be allowing anything
818 // through which is wider than the destination in the first place.
819 unsigned SrcSize = SrcTy.getSizeInBits();
820 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
821 return false;
822
823 return true;
824 };
825 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
826 .legalIf(ExtLegalFunc)
827 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
828 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
830 .clampMaxNumElements(1, s8, 8)
831 .clampMaxNumElements(1, s16, 4)
832 .clampMaxNumElements(1, s32, 2)
833 // Tries to convert a large EXTEND into two smaller EXTENDs
834 .lowerIf([=](const LegalityQuery &Query) {
835 return (Query.Types[0].getScalarSizeInBits() >
836 Query.Types[1].getScalarSizeInBits() * 2) &&
837 Query.Types[0].isVector() &&
838 (Query.Types[1].getScalarSizeInBits() == 8 ||
839 Query.Types[1].getScalarSizeInBits() == 16);
840 })
841 .clampMinNumElements(1, s8, 8)
842 .clampMinNumElements(1, s16, 4)
844
846 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
848 .clampMaxNumElements(0, s8, 8)
849 .clampMaxNumElements(0, s16, 4)
850 .clampMaxNumElements(0, s32, 2)
852 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
853 0, s8)
854 .lowerIf([=](const LegalityQuery &Query) {
855 LLT DstTy = Query.Types[0];
856 LLT SrcTy = Query.Types[1];
857 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
858 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
859 })
860 .clampMinNumElements(0, s8, 8)
861 .clampMinNumElements(0, s16, 4)
862 .alwaysLegal();
863
864 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
865 .legalFor({{v8i8, v8i16}, {v4i16, v4i32}, {v2i32, v2i64}})
866 .clampNumElements(0, v2s32, v2s32);
867
868 getActionDefinitionsBuilder(G_SEXT_INREG)
869 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
870 .maxScalar(0, s64)
871 .clampNumElements(0, v8s8, v16s8)
872 .clampNumElements(0, v4s16, v8s16)
873 .clampNumElements(0, v2s32, v4s32)
874 .clampMaxNumElements(0, s64, 2)
875 .lower();
876
877 // FP conversions
879 .legalFor(
880 {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
881 .legalFor(ST.hasBF16(), {{bf16, f32}, {v4bf16, v4f32}})
882 .libcallFor({{f16, f128}, {f32, f128}, {f64, f128}})
884 .customIf([](const LegalityQuery &Q) {
885 LLT DstTy = Q.Types[0];
886 LLT SrcTy = Q.Types[1];
887 return SrcTy.getScalarSizeInBits() == 64 &&
888 DstTy.getScalarSizeInBits() == 16;
889 })
890 .lowerFor({{bf16, f32}, {v4bf16, v4f32}})
891 // Clamp based on input
892 .clampNumElements(1, v4s32, v4s32)
893 .clampNumElements(1, v2s64, v2s64)
894 .scalarize(0);
895
896 getActionDefinitionsBuilder(G_FPEXT)
897 .legalFor({{f32, f16},
898 {f64, f16},
899 {f32, bf16},
900 {f64, f32},
901 {v4f32, v4f16},
902 {v4f32, v4bf16},
903 {v2f64, v2f32}})
904 .libcallFor({{f128, f64}, {f128, f32}, {f128, f16}})
907 [](const LegalityQuery &Q) {
908 LLT DstTy = Q.Types[0];
909 LLT SrcTy = Q.Types[1];
910 return SrcTy.isVector() && DstTy.isVector() &&
911 SrcTy.getScalarSizeInBits() == 16 &&
912 DstTy.getScalarSizeInBits() == 64;
913 },
914 changeElementTo(1, f32))
915 .clampNumElements(0, v4s32, v4s32)
916 .clampNumElements(0, v2s64, v2s64)
917 .scalarize(0);
918
919 // Conversions
920 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
921 .legalFor({{i32, f32},
922 {i64, f32},
923 {i32, f64},
924 {i64, f64},
925 {v2i32, v2f32},
926 {v4i32, v4f32},
927 {v2i64, v2f64}})
928 .legalFor(HasFP16,
929 {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
930 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
932 // The range of a fp16 value fits into an i17, so we can lower the width
933 // to i64.
935 [=](const LegalityQuery &Query) {
936 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
937 },
938 changeTo(0, i64))
941 .minScalar(0, s32)
942 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
944 [=](const LegalityQuery &Query) {
945 return Query.Types[0].getScalarSizeInBits() <= 64 &&
946 Query.Types[0].getScalarSizeInBits() >
947 Query.Types[1].getScalarSizeInBits();
948 },
950 .widenScalarIf(
951 [=](const LegalityQuery &Query) {
952 return Query.Types[1].getScalarSizeInBits() <= 64 &&
953 Query.Types[0].getScalarSizeInBits() <
954 Query.Types[1].getScalarSizeInBits();
955 },
957 .clampNumElements(0, v4s16, v8s16)
958 .clampNumElements(0, v2s32, v4s32)
959 .clampMaxNumElements(0, s64, 2)
960 .libcallFor(
961 {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
962
963 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
964 .legalFor({{i32, f32},
965 {i64, f32},
966 {i32, f64},
967 {i64, f64},
968 {v2i32, v2f32},
969 {v4i32, v4f32},
970 {v2i64, v2f64}})
971 .legalFor(
972 HasFP16,
973 {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
974 // Handle types larger than i64 by scalarizing/lowering.
975 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
977 // The range of a fp16 value fits into an i17, so we can lower the width
978 // to i64.
980 [=](const LegalityQuery &Query) {
981 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
982 },
983 changeTo(0, i64))
984 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
986 .widenScalarToNextPow2(0, /*MinSize=*/32)
987 .minScalar(0, s32)
988 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
990 [=](const LegalityQuery &Query) {
991 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
992 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
993 ITySize > Query.Types[1].getScalarSizeInBits();
994 },
996 .widenScalarIf(
997 [=](const LegalityQuery &Query) {
998 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
999 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
1000 Query.Types[0].getScalarSizeInBits() < FTySize;
1001 },
1004 .clampNumElements(0, v4s16, v8s16)
1005 .clampNumElements(0, v2s32, v4s32)
1006 .clampMaxNumElements(0, s64, 2);
1007
1008 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
1009 .legalFor({{f32, i32},
1010 {f64, i32},
1011 {f32, i64},
1012 {f64, i64},
1013 {v2f32, v2i32},
1014 {v4f32, v4i32},
1015 {v2f64, v2i64}})
1016 .legalFor(HasFP16,
1017 {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
1018 .unsupportedIf([&](const LegalityQuery &Query) {
1019 return Query.Types[0].getScalarType().isBFloat16();
1020 })
1021 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
1025 .minScalar(1, f32)
1026 .lowerIf([](const LegalityQuery &Query) {
1027 return Query.Types[1].isVector() &&
1028 Query.Types[1].getScalarSizeInBits() == 64 &&
1029 Query.Types[0].getScalarSizeInBits() == 16;
1030 })
1031 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
1032 .scalarizeIf(
1033 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
1034 [](const LegalityQuery &Query) {
1035 return Query.Types[0].getScalarSizeInBits() == 32 &&
1036 Query.Types[1].getScalarSizeInBits() == 64;
1037 },
1038 0)
1039 .widenScalarIf(
1040 [](const LegalityQuery &Query) {
1041 return Query.Types[1].getScalarSizeInBits() <= 64 &&
1042 Query.Types[0].getScalarSizeInBits() <
1043 Query.Types[1].getScalarSizeInBits();
1044 },
1046 .widenScalarIf(
1047 [](const LegalityQuery &Query) {
1048 return Query.Types[0].getScalarSizeInBits() <= 64 &&
1049 Query.Types[0].getScalarSizeInBits() >
1050 Query.Types[1].getScalarSizeInBits();
1051 },
1053 .clampNumElements(0, v4s16, v8s16)
1054 .clampNumElements(0, v2s32, v4s32)
1055 .clampMaxNumElements(0, s64, 2)
1056 .libcallFor({{f16, i128},
1057 {f32, i128},
1058 {f64, i128},
1059 {f128, i128},
1060 {f128, i32},
1061 {f128, i64}});
1062
1063 // Control-flow
1064 getActionDefinitionsBuilder(G_BR).alwaysLegal();
1065 getActionDefinitionsBuilder(G_BRCOND)
1066 .legalFor({s32})
1067 .clampScalar(0, s32, s32);
1068 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1069
1070 getActionDefinitionsBuilder(G_SELECT)
1071 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1072 .widenScalarToNextPow2(0)
1073 .clampScalar(0, s32, s64)
1074 .clampScalar(1, s32, s32)
1077 .lowerIf(isVector(0));
1078
1079 // Pointer-handling
1080 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1081
1082 if (TM.getCodeModel() == CodeModel::Small)
1083 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1084 else
1085 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1086
1087 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1088 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1089
1090 getActionDefinitionsBuilder(G_PTRTOINT)
1091 .legalFor({{i64, p0}, {v2i64, v2p0}})
1092 .widenScalarToNextPow2(0, 64)
1093 .clampScalar(0, s64, s64)
1094 .clampMaxNumElements(0, s64, 2);
1095
1096 getActionDefinitionsBuilder(G_INTTOPTR)
1097 .unsupportedIf([&](const LegalityQuery &Query) {
1098 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1099 })
1100 .legalFor({{p0, i64}, {v2p0, v2i64}})
1101 .clampMaxNumElements(1, s64, 2);
1102
1103 // Casts for 32 and 64-bit width type are just copies.
1104 // Same for 128-bit width type, except they are on the FPR bank.
1105 getActionDefinitionsBuilder(G_BITCAST)
1107 // Keeping 32-bit instructions legal to prevent regression in some tests
1108 .legalForCartesianProduct({s32, v2s16, v4s8})
1109 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1110 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1111 .customIf([=](const LegalityQuery &Query) {
1112 // Handle casts from i1 vectors to scalars.
1113 LLT DstTy = Query.Types[0];
1114 LLT SrcTy = Query.Types[1];
1115 return DstTy.isScalar() && SrcTy.isVector() &&
1116 SrcTy.getScalarSizeInBits() == 1;
1117 })
1118 .lowerIf([=](const LegalityQuery &Query) {
1119 return Query.Types[0].isVector() != Query.Types[1].isVector();
1120 })
1122 .clampNumElements(0, v8s8, v16s8)
1123 .clampNumElements(0, v4s16, v8s16)
1124 .clampNumElements(0, v2s32, v4s32)
1125 .clampMaxNumElements(0, s64, 2)
1126 .lower();
1127
1128 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1129
1130 // va_list must be a pointer, but most sized types are pretty easy to handle
1131 // as the destination.
1132 getActionDefinitionsBuilder(G_VAARG)
1133 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1134 .clampScalar(0, s8, s64)
1135 .widenScalarToNextPow2(0, /*Min*/ 8);
1136
1137 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1138 .lowerIf(
1139 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1140
1141 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1142
1143 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1144 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1145 .customFor(!UseOutlineAtomics, {{s128, p0}})
1146 .libcallFor(UseOutlineAtomics,
1147 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1148 .clampScalar(0, s32, s64);
1149
1150 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1151 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1152 G_ATOMICRMW_XOR})
1153 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1154 .libcallFor(UseOutlineAtomics,
1155 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1156 .clampScalar(0, s32, s64);
1157
1158 // Do not outline these atomics operations, as per comment in
1159 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1160 getActionDefinitionsBuilder(
1161 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1162 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1163 .clampScalar(0, s32, s64);
1164
1165 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1166
1167 // Merge/Unmerge
1168 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1169 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1170 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1171 getActionDefinitionsBuilder(Op)
1172 .widenScalarToNextPow2(LitTyIdx, 8)
1173 .widenScalarToNextPow2(BigTyIdx, 32)
1174 .clampScalar(LitTyIdx, s8, s64)
1175 .clampScalar(BigTyIdx, s32, s128)
1176 .legalIf([=](const LegalityQuery &Q) {
1177 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1178 case 32:
1179 case 64:
1180 case 128:
1181 break;
1182 default:
1183 return false;
1184 }
1185 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1186 case 8:
1187 case 16:
1188 case 32:
1189 case 64:
1190 return true;
1191 default:
1192 return false;
1193 }
1194 });
1195 }
1196
1197 // TODO : nxv4s16, nxv2s16, nxv2s32
1198 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1199 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1200 {s16, nxv8s16, s64},
1201 {s32, nxv4s32, s64},
1202 {s64, nxv2s64, s64}})
1203 .unsupportedIf([=](const LegalityQuery &Query) {
1204 const LLT &EltTy = Query.Types[1].getElementType();
1205 if (Query.Types[1].isScalableVector())
1206 return false;
1207 return Query.Types[0] != EltTy;
1208 })
1209 .minScalar(2, s64)
1210 .customIf([=](const LegalityQuery &Query) {
1211 const LLT &VecTy = Query.Types[1];
1212 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1213 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1214 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1215 })
1216 .minScalarOrEltIf(
1217 [=](const LegalityQuery &Query) {
1218 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1219 // cause the total vec size to be > 128b.
1220 return Query.Types[1].isFixedVector() &&
1221 Query.Types[1].getNumElements() <= 2;
1222 },
1223 0, s64)
1224 .minScalarOrEltIf(
1225 [=](const LegalityQuery &Query) {
1226 return Query.Types[1].isFixedVector() &&
1227 Query.Types[1].getNumElements() <= 4;
1228 },
1229 0, s32)
1230 .minScalarOrEltIf(
1231 [=](const LegalityQuery &Query) {
1232 return Query.Types[1].isFixedVector() &&
1233 Query.Types[1].getNumElements() <= 8;
1234 },
1235 0, s16)
1236 .minScalarOrEltIf(
1237 [=](const LegalityQuery &Query) {
1238 return Query.Types[1].isFixedVector() &&
1239 Query.Types[1].getNumElements() <= 16;
1240 },
1241 0, s8)
1242 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1243 .moreElementsToNextPow2(1)
1244 .clampMaxNumElements(1, s64, 2)
1245 .clampMaxNumElements(1, s32, 4)
1246 .clampMaxNumElements(1, s16, 8)
1247 .clampMaxNumElements(1, s8, 16)
1248 .clampMaxNumElements(1, p0, 2)
1249 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1);
1250
1251 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1252 .legalIf(
1253 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1254 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1255 {nxv8s16, s32, s64},
1256 {nxv4s32, s32, s64},
1257 {nxv2s64, s64, s64}})
1259 .widenVectorEltsToVectorMinSize(0, 64)
1260 .clampNumElements(0, v8s8, v16s8)
1261 .clampNumElements(0, v4s16, v8s16)
1262 .clampNumElements(0, v2s32, v4s32)
1263 .clampMaxNumElements(0, s64, 2)
1264 .clampMaxNumElements(0, p0, 2)
1265 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
1266
1267 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1268 .legalFor({{v8s8, s8},
1269 {v16s8, s8},
1270 {v4s16, s16},
1271 {v8s16, s16},
1272 {v2s32, s32},
1273 {v4s32, s32},
1274 {v2s64, s64},
1275 {v2p0, p0}})
1276 .clampNumElements(0, v4s32, v4s32)
1277 .clampNumElements(0, v2s64, v2s64)
1278 .minScalarOrElt(0, s8)
1279 .widenVectorEltsToVectorMinSize(0, 64)
1280 .widenScalarOrEltToNextPow2(0)
1281 .minScalarSameAs(1, 0);
1282
1283 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1284
1285 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1286 .legalIf([=](const LegalityQuery &Query) {
1287 const LLT &DstTy = Query.Types[0];
1288 const LLT &SrcTy = Query.Types[1];
1289 // For now just support the TBL2 variant which needs the source vectors
1290 // to be the same size as the dest.
1291 if (DstTy != SrcTy)
1292 return false;
1293 return llvm::is_contained(
1294 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1295 })
1296 .moreElementsIf(
1297 [](const LegalityQuery &Query) {
1298 return Query.Types[0].getNumElements() >
1299 Query.Types[1].getNumElements();
1300 },
1301 changeTo(1, 0))
1303 .moreElementsIf(
1304 [](const LegalityQuery &Query) {
1305 return Query.Types[0].getNumElements() <
1306 Query.Types[1].getNumElements();
1307 },
1308 changeTo(0, 1))
1309 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1310 .clampNumElements(0, v8s8, v16s8)
1311 .clampNumElements(0, v4s16, v8s16)
1312 .clampNumElements(0, v4s32, v4s32)
1313 .clampNumElements(0, v2s64, v2s64)
1314 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1315 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1316 // Bitcast pointers vector to i64.
1317 const LLT DstTy = Query.Types[0];
1318 return std::pair(
1319 0, LLT::vector(DstTy.getElementCount(), LLT::integer(64)));
1320 });
1321
1322 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1323 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1324 .customIf([=](const LegalityQuery &Query) {
1325 return Query.Types[0].isFixedVector() &&
1326 Query.Types[0].getScalarSizeInBits() < 8;
1327 })
1328 .bitcastIf(
1329 [=](const LegalityQuery &Query) {
1330 return Query.Types[0].isFixedVector() &&
1331 Query.Types[1].isFixedVector() &&
1332 Query.Types[0].getScalarSizeInBits() >= 8 &&
1333 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1334 Query.Types[0].getSizeInBits() <= 128 &&
1335 Query.Types[1].getSizeInBits() <= 64;
1336 },
1337 [=](const LegalityQuery &Query) {
1338 const LLT DstTy = Query.Types[0];
1339 const LLT SrcTy = Query.Types[1];
1340 return std::pair(
1341 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1344 SrcTy.getNumElements())));
1345 });
1346
1347 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1348 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1350 .immIdx(0); // Inform verifier imm idx 0 is handled.
1351
1352 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1353 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1354 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1355
1356 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1357
1358 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1359
1360 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1361
1362 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1363
1364 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1365
1366 if (ST.hasMOPS()) {
1367 // G_BZERO is not supported. Currently it is only emitted by
1368 // PreLegalizerCombiner for G_MEMSET with zero constant.
1369 getActionDefinitionsBuilder(G_BZERO).unsupported();
1370
1371 getActionDefinitionsBuilder(G_MEMSET)
1372 .legalForCartesianProduct({p0}, {s64}, {s64})
1373 .customForCartesianProduct({p0}, {s8}, {s64})
1374 .immIdx(0); // Inform verifier imm idx 0 is handled.
1375
1376 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1377 .legalForCartesianProduct({p0}, {p0}, {s64})
1378 .immIdx(0); // Inform verifier imm idx 0 is handled.
1379
1380 // G_MEMCPY_INLINE does not have a tailcall immediate
1381 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1382 .legalForCartesianProduct({p0}, {p0}, {s64});
1383
1384 } else {
1385 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1386 .libcall();
1387 }
1388
1389 // For fadd reductions we have pairwise operations available. We treat the
1390 // usual legal types as legal and handle the lowering to pairwise instructions
1391 // later.
1392 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1393 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1394 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1395 .minScalarOrElt(0, MinFPScalar)
1396 .clampMaxNumElements(1, s64, 2)
1397 .clampMaxNumElements(1, s32, 4)
1398 .clampMaxNumElements(1, s16, 8)
1399 .moreElementsToNextPow2(1)
1400 .scalarize(1)
1401 .lower();
1402
1403 // For fmul reductions we need to split up into individual operations. We
1404 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1405 // smaller types, followed by scalarizing what remains.
1406 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1407 .minScalarOrElt(0, MinFPScalar)
1408 .clampMaxNumElements(1, s64, 2)
1409 .clampMaxNumElements(1, s32, 4)
1410 .clampMaxNumElements(1, s16, 8)
1411 .clampMaxNumElements(1, s32, 2)
1412 .clampMaxNumElements(1, s16, 4)
1413 .scalarize(1)
1414 .lower();
1415
1416 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1417 .scalarize(2)
1418 .lower();
1419
1420 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1421 .legalFor({{i8, v8i8},
1422 {i8, v16i8},
1423 {i16, v4i16},
1424 {i16, v8i16},
1425 {i32, v2i32},
1426 {i32, v4i32},
1427 {i64, v2i64}})
1429 .clampMaxNumElements(1, s64, 2)
1430 .clampMaxNumElements(1, s32, 4)
1431 .clampMaxNumElements(1, s16, 8)
1432 .clampMaxNumElements(1, s8, 16)
1433 .widenVectorEltsToVectorMinSize(1, 64)
1434 .scalarize(1);
1435
1436 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1437 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1438 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1439 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1440 .minScalarOrElt(0, MinFPScalar)
1441 .clampMaxNumElements(1, s64, 2)
1442 .clampMaxNumElements(1, s32, 4)
1443 .clampMaxNumElements(1, s16, 8)
1444 .scalarize(1)
1445 .lower();
1446
1447 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1448 .clampMaxNumElements(1, s32, 2)
1449 .clampMaxNumElements(1, s16, 4)
1450 .clampMaxNumElements(1, s8, 8)
1451 .scalarize(1)
1452 .lower();
1453
1454 getActionDefinitionsBuilder(
1455 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1456 .legalFor({{i8, v8i8},
1457 {i8, v16i8},
1458 {i16, v4i16},
1459 {i16, v8i16},
1460 {i32, v2i32},
1461 {i32, v4i32}})
1462 .moreElementsIf(
1463 [=](const LegalityQuery &Query) {
1464 return Query.Types[1].isVector() &&
1465 Query.Types[1].getElementType() != s8 &&
1466 Query.Types[1].getNumElements() & 1;
1467 },
1469 .clampMaxNumElements(1, s64, 2)
1470 .clampMaxNumElements(1, s32, 4)
1471 .clampMaxNumElements(1, s16, 8)
1472 .clampMaxNumElements(1, s8, 16)
1473 .scalarize(1)
1474 .lower();
1475
1476 getActionDefinitionsBuilder(
1477 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1478 // Try to break down into smaller vectors as long as they're at least 64
1479 // bits. This lets us use vector operations for some parts of the
1480 // reduction.
1481 .fewerElementsIf(
1482 [=](const LegalityQuery &Q) {
1483 LLT SrcTy = Q.Types[1];
1484 if (SrcTy.isScalar())
1485 return false;
1486 if (!isPowerOf2_32(SrcTy.getNumElements()))
1487 return false;
1488 // We can usually perform 64b vector operations.
1489 return SrcTy.getSizeInBits() > 64;
1490 },
1491 [=](const LegalityQuery &Q) {
1492 LLT SrcTy = Q.Types[1];
1493 return std::make_pair(1, SrcTy.divide(2));
1494 })
1495 .scalarize(1)
1496 .lower();
1497
1498 // TODO: Update this to correct handling when adding AArch64/SVE support.
1499 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1500
1501 // Access to floating-point environment.
1502 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1503 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1504 .libcall();
1505
1506 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1507
1508 getActionDefinitionsBuilder(G_PREFETCH).custom();
1509
1510 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1511
1512 getLegacyLegalizerInfo().computeTables();
1513 verify(*ST.getInstrInfo());
1514}
1515
1518 LostDebugLocObserver &LocObserver) const {
1519 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1520 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1521 GISelChangeObserver &Observer = Helper.Observer;
1522 switch (MI.getOpcode()) {
1523 default:
1524 // No idea what to do.
1525 return false;
1526 case TargetOpcode::G_VAARG:
1527 return legalizeVaArg(MI, MRI, MIRBuilder);
1528 case TargetOpcode::G_LOAD:
1529 case TargetOpcode::G_STORE:
1530 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1531 case TargetOpcode::G_SHL:
1532 case TargetOpcode::G_ASHR:
1533 case TargetOpcode::G_LSHR:
1534 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1535 case TargetOpcode::G_GLOBAL_VALUE:
1536 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1537 case TargetOpcode::G_SBFX:
1538 case TargetOpcode::G_UBFX:
1539 return legalizeBitfieldExtract(MI, MRI, Helper);
1540 case TargetOpcode::G_FSHL:
1541 case TargetOpcode::G_FSHR:
1542 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1543 case TargetOpcode::G_ROTR:
1544 return legalizeRotate(MI, MRI, Helper);
1545 case TargetOpcode::G_CTPOP:
1546 return legalizeCTPOP(MI, MRI, Helper);
1547 case TargetOpcode::G_ATOMIC_CMPXCHG:
1548 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1549 case TargetOpcode::G_CTTZ:
1550 return legalizeCTTZ(MI, Helper);
1551 case TargetOpcode::G_BZERO:
1552 case TargetOpcode::G_MEMCPY:
1553 case TargetOpcode::G_MEMMOVE:
1554 case TargetOpcode::G_MEMSET:
1555 return legalizeMemOps(MI, Helper);
1556 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1557 return legalizeExtractVectorElt(MI, MRI, Helper);
1558 case TargetOpcode::G_DYN_STACKALLOC:
1559 return legalizeDynStackAlloc(MI, Helper);
1560 case TargetOpcode::G_PREFETCH:
1561 return legalizePrefetch(MI, Helper);
1562 case TargetOpcode::G_ABS:
1563 return Helper.lowerAbsToCNeg(MI);
1564 case TargetOpcode::G_ICMP:
1565 return legalizeICMP(MI, MRI, MIRBuilder);
1566 case TargetOpcode::G_BITCAST:
1567 return legalizeBitcast(MI, Helper);
1568 case TargetOpcode::G_CONCAT_VECTORS:
1569 return legalizeConcatVectors(MI, MRI, MIRBuilder);
1570 case TargetOpcode::G_FPTRUNC:
1571 // In order to lower f16 to f64 properly, we need to use f32 as an
1572 // intermediary
1573 return legalizeFptrunc(MI, MIRBuilder, MRI);
1574 }
1575
1576 llvm_unreachable("expected switch to return");
1577}
1578
1579bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1580 LegalizerHelper &Helper) const {
1581 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1582 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1583 // We're trying to handle casts from i1 vectors to scalars but reloading from
1584 // stack.
1585 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1586 SrcTy.getElementType() != LLT::scalar(1))
1587 return false;
1588
1589 Helper.createStackStoreLoad(DstReg, SrcReg);
1590 MI.eraseFromParent();
1591 return true;
1592}
1593
1594bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1596 MachineIRBuilder &MIRBuilder,
1597 GISelChangeObserver &Observer,
1598 LegalizerHelper &Helper) const {
1599 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1600 MI.getOpcode() == TargetOpcode::G_FSHR);
1601
1602 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1603 // lowering
1604 Register ShiftNo = MI.getOperand(3).getReg();
1605 LLT ShiftTy = MRI.getType(ShiftNo);
1606 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1607
1608 // Adjust shift amount according to Opcode (FSHL/FSHR)
1609 // Convert FSHL to FSHR
1610 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1611 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1612
1613 // Lower non-constant shifts and leave zero shifts to the optimizer.
1614 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1615 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1617
1618 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1619
1620 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1621
1622 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1623 // in the range of 0 <-> BitWidth, it is legal
1624 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1625 VRegAndVal->Value.ult(BitWidth))
1626 return true;
1627
1628 // Cast the ShiftNumber to a 64-bit type
1629 auto Cast64 = MIRBuilder.buildConstant(LLT::integer(64), Amount.zext(64));
1630
1631 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1632 Observer.changingInstr(MI);
1633 MI.getOperand(3).setReg(Cast64.getReg(0));
1634 Observer.changedInstr(MI);
1635 }
1636 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1637 // instruction
1638 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1639 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1640 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1641 Cast64.getReg(0)});
1642 MI.eraseFromParent();
1643 }
1644 return true;
1645}
1646
1647bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1649 MachineIRBuilder &MIRBuilder) const {
1650 Register DstReg = MI.getOperand(0).getReg();
1651 Register SrcReg1 = MI.getOperand(2).getReg();
1652 Register SrcReg2 = MI.getOperand(3).getReg();
1653 LLT DstTy = MRI.getType(DstReg);
1654 LLT SrcTy = MRI.getType(SrcReg1);
1655
1656 // Check the vector types are legal
1657 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1658 DstTy.getNumElements() != SrcTy.getNumElements() ||
1659 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1660 return false;
1661
1662 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1663 // following passes
1664 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1665 if (Pred != CmpInst::ICMP_NE)
1666 return true;
1667 Register CmpReg =
1668 MIRBuilder
1669 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1670 .getReg(0);
1671 MIRBuilder.buildNot(DstReg, CmpReg);
1672
1673 MI.eraseFromParent();
1674 return true;
1675}
1676
1677bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1679 LegalizerHelper &Helper) const {
1680 // To allow for imported patterns to match, we ensure that the rotate amount
1681 // is 64b with an extension.
1682 Register AmtReg = MI.getOperand(2).getReg();
1683 LLT AmtTy = MRI.getType(AmtReg);
1684 (void)AmtTy;
1685 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1686 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1687 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::integer(64), AmtReg);
1688 Helper.Observer.changingInstr(MI);
1689 MI.getOperand(2).setReg(NewAmt.getReg(0));
1690 Helper.Observer.changedInstr(MI);
1691 return true;
1692}
1693
1694bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1696 GISelChangeObserver &Observer) const {
1697 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1698 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1699 // G_ADD_LOW instructions.
1700 // By splitting this here, we can optimize accesses in the small code model by
1701 // folding in the G_ADD_LOW into the load/store offset.
1702 auto &GlobalOp = MI.getOperand(1);
1703 // Don't modify an intrinsic call.
1704 if (GlobalOp.isSymbol())
1705 return true;
1706 const auto* GV = GlobalOp.getGlobal();
1707 if (GV->isThreadLocal())
1708 return true; // Don't want to modify TLS vars.
1709
1710 auto &TM = ST->getTargetLowering()->getTargetMachine();
1711 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1712
1713 if (OpFlags & AArch64II::MO_GOT)
1714 return true;
1715
1716 auto Offset = GlobalOp.getOffset();
1717 Register DstReg = MI.getOperand(0).getReg();
1718 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1719 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1720 // Set the regclass on the dest reg too.
1721 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1722
1723 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1724 // by creating a MOVK that sets bits 48-63 of the register to (global address
1725 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1726 // prevent an incorrect tag being generated during relocation when the
1727 // global appears before the code section. Without the offset, a global at
1728 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1729 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1730 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1731 // instead of `0xf`.
1732 // This assumes that we're in the small code model so we can assume a binary
1733 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1734 // binary must also be loaded into address range [0, 2^48). Both of these
1735 // properties need to be ensured at runtime when using tagged addresses.
1736 if (OpFlags & AArch64II::MO_TAGGED) {
1737 assert(!Offset &&
1738 "Should not have folded in an offset for a tagged global!");
1739 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1740 .addGlobalAddress(GV, 0x100000000,
1742 .addImm(48);
1743 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1744 }
1745
1746 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1747 .addGlobalAddress(GV, Offset,
1749 MI.eraseFromParent();
1750 return true;
1751}
1752
1754 MachineInstr &MI) const {
1755 MachineIRBuilder &MIB = Helper.MIRBuilder;
1756 MachineRegisterInfo &MRI = *MIB.getMRI();
1757
1758 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1759 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1760 MI.eraseFromParent();
1761 return true;
1762 };
1763 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1764 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1765 {MI.getOperand(2), MI.getOperand(3)});
1766 MI.eraseFromParent();
1767 return true;
1768 };
1769 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1770 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1771 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1772 MI.eraseFromParent();
1773 return true;
1774 };
1775
1776 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1777 switch (IntrinsicID) {
1778 case Intrinsic::vacopy: {
1779 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1780 unsigned VaListSize =
1781 (ST->isTargetDarwin() || ST->isTargetWindows())
1782 ? PtrSize
1783 : ST->isTargetILP32() ? 20 : 32;
1784
1785 MachineFunction &MF = *MI.getMF();
1787 LLT::scalar(VaListSize * 8));
1788 MIB.buildLoad(Val, MI.getOperand(2),
1791 VaListSize, Align(PtrSize)));
1792 MIB.buildStore(Val, MI.getOperand(1),
1795 VaListSize, Align(PtrSize)));
1796 MI.eraseFromParent();
1797 return true;
1798 }
1799 case Intrinsic::get_dynamic_area_offset: {
1800 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1801 MI.eraseFromParent();
1802 return true;
1803 }
1804 case Intrinsic::aarch64_mops_memset_tag: {
1805 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1806 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1807 // the instruction).
1808 auto &Value = MI.getOperand(3);
1809 Register ExtValueReg = MIB.buildAnyExt(LLT::integer(64), Value).getReg(0);
1810 Value.setReg(ExtValueReg);
1811 return true;
1812 }
1813 case Intrinsic::aarch64_prefetch: {
1814 auto &AddrVal = MI.getOperand(1);
1815
1816 int64_t IsWrite = MI.getOperand(2).getImm();
1817 int64_t Target = MI.getOperand(3).getImm();
1818 int64_t IsStream = MI.getOperand(4).getImm();
1819 int64_t IsData = MI.getOperand(5).getImm();
1820
1821 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1822 (!IsData << 3) | // IsDataCache bit
1823 (Target << 1) | // Cache level bits
1824 (unsigned)IsStream; // Stream bit
1825
1826 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1827 MI.eraseFromParent();
1828 return true;
1829 }
1830 case Intrinsic::aarch64_range_prefetch: {
1831 auto &AddrVal = MI.getOperand(1);
1832
1833 int64_t IsWrite = MI.getOperand(2).getImm();
1834 int64_t IsStream = MI.getOperand(3).getImm();
1835 unsigned PrfOp = (IsStream << 2) | IsWrite;
1836
1837 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1838 .addImm(PrfOp)
1839 .add(AddrVal)
1840 .addUse(MI.getOperand(4).getReg()); // Metadata
1841 MI.eraseFromParent();
1842 return true;
1843 }
1844 case Intrinsic::aarch64_prefetch_ir: {
1845 auto &AddrVal = MI.getOperand(1);
1846 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1847 MI.eraseFromParent();
1848 return true;
1849 }
1850 case Intrinsic::aarch64_neon_uaddv:
1851 case Intrinsic::aarch64_neon_saddv:
1852 case Intrinsic::aarch64_neon_umaxv:
1853 case Intrinsic::aarch64_neon_smaxv:
1854 case Intrinsic::aarch64_neon_uminv:
1855 case Intrinsic::aarch64_neon_sminv: {
1856 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1857 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1858 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1859
1860 auto OldDst = MI.getOperand(0).getReg();
1861 auto OldDstTy = MRI.getType(OldDst);
1862 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1863 if (OldDstTy == NewDstTy)
1864 return true;
1865
1866 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1867
1868 Helper.Observer.changingInstr(MI);
1869 MI.getOperand(0).setReg(NewDst);
1870 Helper.Observer.changedInstr(MI);
1871
1872 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1873 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1874 OldDst, NewDst);
1875
1876 return true;
1877 }
1878 case Intrinsic::aarch64_neon_uaddlp:
1879 case Intrinsic::aarch64_neon_saddlp: {
1880 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1881 ? AArch64::G_UADDLP
1882 : AArch64::G_SADDLP;
1883 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1884 MI.eraseFromParent();
1885
1886 return true;
1887 }
1888 case Intrinsic::aarch64_neon_uaddlv:
1889 case Intrinsic::aarch64_neon_saddlv: {
1890 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1891 ? AArch64::G_UADDLV
1892 : AArch64::G_SADDLV;
1893 Register DstReg = MI.getOperand(0).getReg();
1894 Register SrcReg = MI.getOperand(2).getReg();
1895 LLT DstTy = MRI.getType(DstReg);
1896
1897 LLT MidTy, ExtTy;
1898 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1899 ExtTy = LLT::integer(32);
1900 MidTy = LLT::fixed_vector(4, ExtTy);
1901 } else {
1902 ExtTy = LLT::integer(64);
1903 MidTy = LLT::fixed_vector(2, ExtTy);
1904 }
1905
1906 Register MidReg =
1907 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1908 Register ZeroReg =
1909 MIB.buildConstant(LLT::integer(64), 0)->getOperand(0).getReg();
1910 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1911 {MidReg, ZeroReg})
1912 .getReg(0);
1913
1914 if (DstTy.getScalarSizeInBits() < 32)
1915 MIB.buildTrunc(DstReg, ExtReg);
1916 else
1917 MIB.buildCopy(DstReg, ExtReg);
1918
1919 MI.eraseFromParent();
1920
1921 return true;
1922 }
1923 case Intrinsic::aarch64_neon_smax:
1924 return LowerBinOp(TargetOpcode::G_SMAX);
1925 case Intrinsic::aarch64_neon_smin:
1926 return LowerBinOp(TargetOpcode::G_SMIN);
1927 case Intrinsic::aarch64_neon_umax:
1928 return LowerBinOp(TargetOpcode::G_UMAX);
1929 case Intrinsic::aarch64_neon_umin:
1930 return LowerBinOp(TargetOpcode::G_UMIN);
1931 case Intrinsic::aarch64_neon_fmax:
1932 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1933 case Intrinsic::aarch64_neon_fmin:
1934 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1935 case Intrinsic::aarch64_neon_fmaxnm:
1936 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1937 case Intrinsic::aarch64_neon_fminnm:
1938 return LowerBinOp(TargetOpcode::G_FMINNUM);
1939 case Intrinsic::aarch64_neon_pmull:
1940 case Intrinsic::aarch64_neon_pmull64:
1941 return LowerBinOp(AArch64::G_PMULL);
1942 case Intrinsic::aarch64_neon_smull:
1943 return LowerBinOp(AArch64::G_SMULL);
1944 case Intrinsic::aarch64_neon_umull:
1945 return LowerBinOp(AArch64::G_UMULL);
1946 case Intrinsic::aarch64_neon_sabd:
1947 return LowerBinOp(TargetOpcode::G_ABDS);
1948 case Intrinsic::aarch64_neon_uabd:
1949 return LowerBinOp(TargetOpcode::G_ABDU);
1950 case Intrinsic::aarch64_neon_uhadd:
1951 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1952 case Intrinsic::aarch64_neon_urhadd:
1953 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1954 case Intrinsic::aarch64_neon_shadd:
1955 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1956 case Intrinsic::aarch64_neon_srhadd:
1957 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1958 case Intrinsic::aarch64_neon_sqshrn: {
1959 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1960 return true;
1961 // Create right shift instruction. Store the output register in Shr.
1962 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1963 {MRI.getType(MI.getOperand(2).getReg())},
1964 {MI.getOperand(2), MI.getOperand(3).getImm()});
1965 // Build the narrow intrinsic, taking in Shr.
1966 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1967 MI.eraseFromParent();
1968 return true;
1969 }
1970 case Intrinsic::aarch64_neon_sqshrun: {
1971 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1972 return true;
1973 // Create right shift instruction. Store the output register in Shr.
1974 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1975 {MRI.getType(MI.getOperand(2).getReg())},
1976 {MI.getOperand(2), MI.getOperand(3).getImm()});
1977 // Build the narrow intrinsic, taking in Shr.
1978 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1979 MI.eraseFromParent();
1980 return true;
1981 }
1982 case Intrinsic::aarch64_neon_sqrshrn: {
1983 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1984 return true;
1985 // Create right shift instruction. Store the output register in Shr.
1986 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1987 {MRI.getType(MI.getOperand(2).getReg())},
1988 {MI.getOperand(2), MI.getOperand(3).getImm()});
1989 // Build the narrow intrinsic, taking in Shr.
1990 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1991 MI.eraseFromParent();
1992 return true;
1993 }
1994 case Intrinsic::aarch64_neon_sqrshrun: {
1995 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1996 return true;
1997 // Create right shift instruction. Store the output register in Shr.
1998 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1999 {MRI.getType(MI.getOperand(2).getReg())},
2000 {MI.getOperand(2), MI.getOperand(3).getImm()});
2001 // Build the narrow intrinsic, taking in Shr.
2002 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
2003 MI.eraseFromParent();
2004 return true;
2005 }
2006 case Intrinsic::aarch64_neon_uqrshrn: {
2007 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2008 return true;
2009 // Create right shift instruction. Store the output register in Shr.
2010 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
2011 {MRI.getType(MI.getOperand(2).getReg())},
2012 {MI.getOperand(2), MI.getOperand(3).getImm()});
2013 // Build the narrow intrinsic, taking in Shr.
2014 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2015 MI.eraseFromParent();
2016 return true;
2017 }
2018 case Intrinsic::aarch64_neon_uqshrn: {
2019 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2020 return true;
2021 // Create right shift instruction. Store the output register in Shr.
2022 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
2023 {MRI.getType(MI.getOperand(2).getReg())},
2024 {MI.getOperand(2), MI.getOperand(3).getImm()});
2025 // Build the narrow intrinsic, taking in Shr.
2026 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2027 MI.eraseFromParent();
2028 return true;
2029 }
2030 case Intrinsic::aarch64_neon_sqshlu: {
2031 // Check if last operand is constant vector dup
2032 auto ShiftAmount = isConstantOrConstantSplatVector(
2033 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
2034 if (ShiftAmount) {
2035 // If so, create a new intrinsic with the correct shift amount
2036 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
2037 {MI.getOperand(2)})
2038 .addImm(ShiftAmount->getSExtValue());
2039 MI.eraseFromParent();
2040 return true;
2041 }
2042 return false;
2043 }
2044 case Intrinsic::aarch64_neon_vsli: {
2045 MIB.buildInstr(
2046 AArch64::G_SLI, {MI.getOperand(0)},
2047 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2048 MI.eraseFromParent();
2049 break;
2050 }
2051 case Intrinsic::aarch64_neon_vsri: {
2052 MIB.buildInstr(
2053 AArch64::G_SRI, {MI.getOperand(0)},
2054 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2055 MI.eraseFromParent();
2056 break;
2057 }
2058 case Intrinsic::aarch64_neon_abs: {
2059 // Lower the intrinsic to G_ABS.
2060 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
2061 MI.eraseFromParent();
2062 return true;
2063 }
2064 case Intrinsic::aarch64_neon_sqadd: {
2065 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2066 return LowerBinOp(TargetOpcode::G_SADDSAT);
2067 break;
2068 }
2069 case Intrinsic::aarch64_neon_sqsub: {
2070 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2071 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2072 break;
2073 }
2074 case Intrinsic::aarch64_neon_uqadd: {
2075 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2076 return LowerBinOp(TargetOpcode::G_UADDSAT);
2077 break;
2078 }
2079 case Intrinsic::aarch64_neon_uqsub: {
2080 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2081 return LowerBinOp(TargetOpcode::G_USUBSAT);
2082 break;
2083 }
2084 case Intrinsic::aarch64_neon_udot:
2085 return LowerTriOp(AArch64::G_UDOT);
2086 case Intrinsic::aarch64_neon_sdot:
2087 return LowerTriOp(AArch64::G_SDOT);
2088 case Intrinsic::aarch64_neon_usdot:
2089 return LowerTriOp(AArch64::G_USDOT);
2090 case Intrinsic::aarch64_neon_sqxtn:
2091 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2092 case Intrinsic::aarch64_neon_sqxtun:
2093 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2094 case Intrinsic::aarch64_neon_uqxtn:
2095 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2096 case Intrinsic::aarch64_neon_fcvtzu:
2097 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2098 case Intrinsic::aarch64_neon_fcvtzs:
2099 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2100
2101 case Intrinsic::vector_reverse:
2102 // TODO: Add support for vector_reverse
2103 return false;
2104 }
2105
2106 return true;
2107}
2108
2109bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2111 GISelChangeObserver &Observer) const {
2112 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2113 MI.getOpcode() == TargetOpcode::G_LSHR ||
2114 MI.getOpcode() == TargetOpcode::G_SHL);
2115 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2116 // imported patterns can select it later. Either way, it will be legal.
2117 Register AmtReg = MI.getOperand(2).getReg();
2118 LLT AmtRegEltTy = MRI.getType(AmtReg).getScalarType();
2119 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2120 if (!VRegAndVal)
2121 return true;
2122 // Check the shift amount is in range for an immediate form.
2123 int64_t Amount = VRegAndVal->Value.getSExtValue();
2124 if (Amount > 31)
2125 return true; // This will have to remain a register variant.
2126 auto ExtCst =
2127 MIRBuilder.buildConstant(AmtRegEltTy.changeElementSize(64), Amount);
2128 Observer.changingInstr(MI);
2129 MI.getOperand(2).setReg(ExtCst.getReg(0));
2130 Observer.changedInstr(MI);
2131 return true;
2132}
2133
2135 MachineRegisterInfo &MRI) {
2136 Base = Root;
2137 Offset = 0;
2138
2139 Register NewBase;
2140 int64_t NewOffset;
2141 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2142 isShiftedInt<7, 3>(NewOffset)) {
2143 Base = NewBase;
2144 Offset = NewOffset;
2145 }
2146}
2147
2148// FIXME: This should be removed and replaced with the generic bitcast legalize
2149// action.
2150bool AArch64LegalizerInfo::legalizeLoadStore(
2152 GISelChangeObserver &Observer) const {
2153 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2154 MI.getOpcode() == TargetOpcode::G_LOAD);
2155 // Here we just try to handle vector loads/stores where our value type might
2156 // have pointer elements, which the SelectionDAG importer can't handle. To
2157 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2158 // the value to use s64 types.
2159
2160 // Custom legalization requires the instruction, if not deleted, must be fully
2161 // legalized. In order to allow further legalization of the inst, we create
2162 // a new instruction and erase the existing one.
2163
2164 Register ValReg = MI.getOperand(0).getReg();
2165 const LLT ValTy = MRI.getType(ValReg);
2166
2167 if (ValTy == LLT::scalar(128)) {
2168
2169 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2170 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2171 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2172 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2173 bool IsRcpC3 =
2174 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2175
2176 LLT s64 = LLT::integer(64);
2177
2178 unsigned Opcode;
2179 if (IsRcpC3) {
2180 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2181 } else {
2182 // For LSE2, loads/stores should have been converted to monotonic and had
2183 // a fence inserted after them.
2184 assert(Ordering == AtomicOrdering::Monotonic ||
2185 Ordering == AtomicOrdering::Unordered);
2186 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2187
2188 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2189 }
2190
2191 MachineInstrBuilder NewI;
2192 if (IsLoad) {
2193 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2194 MIRBuilder.buildMergeLikeInstr(
2195 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2196 } else {
2197 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2198 NewI = MIRBuilder.buildInstr(
2199 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2200 }
2201
2202 if (IsRcpC3) {
2203 NewI.addUse(MI.getOperand(1).getReg());
2204 } else {
2205 Register Base;
2206 int Offset;
2207 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2208 NewI.addUse(Base);
2209 NewI.addImm(Offset / 8);
2210 }
2211
2212 NewI.cloneMemRefs(MI);
2213 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2214 *MRI.getTargetRegisterInfo(),
2215 *ST->getRegBankInfo());
2216 MI.eraseFromParent();
2217 return true;
2218 }
2219
2220 if (!ValTy.isPointerVector() ||
2221 ValTy.getElementType().getAddressSpace() != 0) {
2222 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2223 return false;
2224 }
2225
2226 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2227 const LLT NewTy = LLT::vector(ValTy.getElementCount(), LLT::integer(PtrSize));
2228 auto &MMO = **MI.memoperands_begin();
2229 MMO.setType(NewTy);
2230
2231 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2232 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2233 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2234 } else {
2235 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2236 MIRBuilder.buildBitcast(ValReg, NewLoad);
2237 }
2238 MI.eraseFromParent();
2239 return true;
2240}
2241
2242bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2244 MachineIRBuilder &MIRBuilder) const {
2245 MachineFunction &MF = MIRBuilder.getMF();
2246 Align Alignment(MI.getOperand(2).getImm());
2247 Register Dst = MI.getOperand(0).getReg();
2248 Register ListPtr = MI.getOperand(1).getReg();
2249
2250 LLT PtrTy = MRI.getType(ListPtr);
2251 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2252
2253 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2254 const Align PtrAlign = Align(PtrSize);
2255 auto List = MIRBuilder.buildLoad(
2256 PtrTy, ListPtr,
2257 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2258 PtrTy, PtrAlign));
2259
2260 MachineInstrBuilder DstPtr;
2261 if (Alignment > PtrAlign) {
2262 // Realign the list to the actual required alignment.
2263 auto AlignMinus1 =
2264 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2265 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2266 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2267 } else
2268 DstPtr = List;
2269
2270 LLT ValTy = MRI.getType(Dst);
2271 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2272 MIRBuilder.buildLoad(
2273 Dst, DstPtr,
2274 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2275 ValTy, std::max(Alignment, PtrAlign)));
2276
2277 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2278
2279 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2280
2281 MIRBuilder.buildStore(NewList, ListPtr,
2282 *MF.getMachineMemOperand(MachinePointerInfo(),
2284 PtrTy, PtrAlign));
2285
2286 MI.eraseFromParent();
2287 return true;
2288}
2289
2290bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2291 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2292 // Only legal if we can select immediate forms.
2293 // TODO: Lower this otherwise.
2294 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2295 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2296}
2297
2298bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2300 LegalizerHelper &Helper) const {
2301 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2302 // it can be more efficiently lowered to the following sequence that uses
2303 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2304 // registers are cheap.
2305 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2306 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2307 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2308 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2309 //
2310 // For 128 bit vector popcounts, we lower to the following sequence:
2311 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2312 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2313 // uaddlp.4s v0, v0 // v4s32, v2s64
2314 // uaddlp.2d v0, v0 // v2s64
2315 //
2316 // For 64 bit vector popcounts, we lower to the following sequence:
2317 // cnt.8b v0, v0 // v4s16, v2s32
2318 // uaddlp.4h v0, v0 // v4s16, v2s32
2319 // uaddlp.2s v0, v0 // v2s32
2320
2321 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2322 Register Dst = MI.getOperand(0).getReg();
2323 Register Val = MI.getOperand(1).getReg();
2324 LLT Ty = MRI.getType(Val);
2325
2326 LLT i64 = LLT::integer(64);
2327 LLT i32 = LLT::integer(32);
2328 LLT i16 = LLT::integer(16);
2329 LLT i8 = LLT::integer(8);
2330 unsigned Size = Ty.getSizeInBits();
2331
2332 assert(Ty == MRI.getType(Dst) &&
2333 "Expected src and dst to have the same type!");
2334
2335 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2336
2337 auto Split = MIRBuilder.buildUnmerge(i64, Val);
2338 auto CTPOP1 = MIRBuilder.buildCTPOP(i64, Split->getOperand(0));
2339 auto CTPOP2 = MIRBuilder.buildCTPOP(i64, Split->getOperand(1));
2340 auto Add = MIRBuilder.buildAdd(i64, CTPOP1, CTPOP2);
2341
2342 MIRBuilder.buildZExt(Dst, Add);
2343 MI.eraseFromParent();
2344 return true;
2345 }
2346
2347 if (!ST->hasNEON() ||
2348 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2349 // Use generic lowering when custom lowering is not possible.
2350 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2351 Helper.lowerBitCount(MI) ==
2353 }
2354
2355 // Pre-conditioning: widen Val up to the nearest vector type.
2356 // s32,s64,v4s16,v2s32 -> v8i8
2357 // v8s16,v4s32,v2s64 -> v16i8
2358 LLT VTy = Size == 128 ? LLT::fixed_vector(16, i8) : LLT::fixed_vector(8, i8);
2359 if (Ty.isScalar()) {
2360 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2361 if (Size == 32) {
2362 Val = MIRBuilder.buildZExt(i64, Val).getReg(0);
2363 }
2364 }
2365 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2366
2367 // Count bits in each byte-sized lane.
2368 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2369
2370 // Sum across lanes.
2371 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2372 Ty.getScalarSizeInBits() != 16) {
2373 LLT Dt = Ty == LLT::fixed_vector(2, i64) ? LLT::fixed_vector(4, i32) : Ty;
2374 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2375 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2376 MachineInstrBuilder Sum;
2377
2378 if (Ty == LLT::fixed_vector(2, i64)) {
2379 auto UDOT =
2380 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2381 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2382 } else if (Ty == LLT::fixed_vector(4, i32)) {
2383 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2384 } else if (Ty == LLT::fixed_vector(2, i32)) {
2385 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2386 } else {
2387 llvm_unreachable("unexpected vector shape");
2388 }
2389
2390 Sum->getOperand(0).setReg(Dst);
2391 MI.eraseFromParent();
2392 return true;
2393 }
2394
2395 Register HSum = CTPOP.getReg(0);
2396 unsigned Opc;
2397 SmallVector<LLT> HAddTys;
2398 if (Ty.isScalar()) {
2399 Opc = Intrinsic::aarch64_neon_uaddlv;
2400 HAddTys.push_back(i32);
2401 } else if (Ty == LLT::fixed_vector(8, i16)) {
2402 Opc = Intrinsic::aarch64_neon_uaddlp;
2403 HAddTys.push_back(LLT::fixed_vector(8, i16));
2404 } else if (Ty == LLT::fixed_vector(4, i32)) {
2405 Opc = Intrinsic::aarch64_neon_uaddlp;
2406 HAddTys.push_back(LLT::fixed_vector(8, i16));
2407 HAddTys.push_back(LLT::fixed_vector(4, i32));
2408 } else if (Ty == LLT::fixed_vector(2, i64)) {
2409 Opc = Intrinsic::aarch64_neon_uaddlp;
2410 HAddTys.push_back(LLT::fixed_vector(8, i16));
2411 HAddTys.push_back(LLT::fixed_vector(4, i32));
2412 HAddTys.push_back(LLT::fixed_vector(2, i64));
2413 } else if (Ty == LLT::fixed_vector(4, i16)) {
2414 Opc = Intrinsic::aarch64_neon_uaddlp;
2415 HAddTys.push_back(LLT::fixed_vector(4, i16));
2416 } else if (Ty == LLT::fixed_vector(2, i32)) {
2417 Opc = Intrinsic::aarch64_neon_uaddlp;
2418 HAddTys.push_back(LLT::fixed_vector(4, i16));
2419 HAddTys.push_back(LLT::fixed_vector(2, i32));
2420 } else
2421 llvm_unreachable("unexpected vector shape");
2423 for (LLT HTy : HAddTys) {
2424 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2425 HSum = UADD.getReg(0);
2426 }
2427
2428 // Post-conditioning.
2429 if (Ty.isScalar() && (Size == 64 || Size == 128))
2430 MIRBuilder.buildZExt(Dst, UADD);
2431 else
2432 UADD->getOperand(0).setReg(Dst);
2433 MI.eraseFromParent();
2434 return true;
2435}
2436
2437bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2438 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2439 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2440 LLT i64 = LLT::integer(64);
2441 auto Addr = MI.getOperand(1).getReg();
2442 auto DesiredI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(2));
2443 auto NewI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(3));
2444 auto DstLo = MRI.createGenericVirtualRegister(i64);
2445 auto DstHi = MRI.createGenericVirtualRegister(i64);
2446
2447 MachineInstrBuilder CAS;
2448 if (ST->hasLSE()) {
2449 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2450 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2451 // the rest of the MIR so we must reassemble the extracted registers into a
2452 // 128-bit known-regclass one with code like this:
2453 //
2454 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2455 // %out = CASP %in1, ...
2456 // %OldLo = G_EXTRACT %out, 0
2457 // %OldHi = G_EXTRACT %out, 64
2458 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2459 unsigned Opcode;
2460 switch (Ordering) {
2462 Opcode = AArch64::CASPAX;
2463 break;
2465 Opcode = AArch64::CASPLX;
2466 break;
2469 Opcode = AArch64::CASPALX;
2470 break;
2471 default:
2472 Opcode = AArch64::CASPX;
2473 break;
2474 }
2475
2476 LLT s128 = LLT::scalar(128);
2477 auto CASDst = MRI.createGenericVirtualRegister(s128);
2478 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2479 auto CASNew = MRI.createGenericVirtualRegister(s128);
2480 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2481 .addUse(DesiredI->getOperand(0).getReg())
2482 .addImm(AArch64::sube64)
2483 .addUse(DesiredI->getOperand(1).getReg())
2484 .addImm(AArch64::subo64);
2485 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2486 .addUse(NewI->getOperand(0).getReg())
2487 .addImm(AArch64::sube64)
2488 .addUse(NewI->getOperand(1).getReg())
2489 .addImm(AArch64::subo64);
2490
2491 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2492
2493 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2494 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2495 } else {
2496 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2497 // can take arbitrary registers so it just has the normal GPR64 operands the
2498 // rest of AArch64 is expecting.
2499 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2500 unsigned Opcode;
2501 switch (Ordering) {
2503 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2504 break;
2506 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2507 break;
2510 Opcode = AArch64::CMP_SWAP_128;
2511 break;
2512 default:
2513 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2514 break;
2515 }
2516
2517 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2518 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2519 {Addr, DesiredI->getOperand(0),
2520 DesiredI->getOperand(1), NewI->getOperand(0),
2521 NewI->getOperand(1)});
2522 }
2523
2524 CAS.cloneMemRefs(MI);
2525 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2526 *MRI.getTargetRegisterInfo(),
2527 *ST->getRegBankInfo());
2528
2529 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2530 MI.eraseFromParent();
2531 return true;
2532}
2533
2534bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2535 LegalizerHelper &Helper) const {
2536 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2537 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2538 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2539 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2540 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2541 MI.eraseFromParent();
2542 return true;
2543}
2544
2545bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2546 LegalizerHelper &Helper) const {
2547 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2548
2549 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2550 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2551 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2552 // the instruction).
2553 auto &Value = MI.getOperand(1);
2554 Register ExtValueReg =
2555 MIRBuilder.buildAnyExt(LLT::integer(64), Value).getReg(0);
2556 Value.setReg(ExtValueReg);
2557 return true;
2558 }
2559
2560 return false;
2561}
2562
2563bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2564 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2565 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2566 auto VRegAndVal =
2568 if (VRegAndVal)
2569 return true;
2570 LLT VecTy = MRI.getType(Element->getVectorReg());
2571 if (VecTy.isScalableVector())
2572 return true;
2573 return Helper.lowerExtractInsertVectorElt(MI) !=
2575}
2576
2577bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2578 MachineInstr &MI, LegalizerHelper &Helper) const {
2579 MachineFunction &MF = *MI.getParent()->getParent();
2580 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2581 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2582
2583 // If stack probing is not enabled for this function, use the default
2584 // lowering.
2585 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2586 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2587 "inline-asm") {
2588 Helper.lowerDynStackAlloc(MI);
2589 return true;
2590 }
2591
2592 Register Dst = MI.getOperand(0).getReg();
2593 Register AllocSize = MI.getOperand(1).getReg();
2594 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2595
2596 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2597 "Unexpected type for dynamic alloca");
2598 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2599 "Unexpected type for dynamic alloca");
2600
2601 LLT PtrTy = MRI.getType(Dst);
2602 Register SPReg =
2604 Register SPTmp =
2605 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2606 auto NewMI =
2607 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2608 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2609 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2610 MIRBuilder.buildCopy(Dst, SPTmp);
2611
2612 MI.eraseFromParent();
2613 return true;
2614}
2615
2616bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2617 LegalizerHelper &Helper) const {
2618 MachineIRBuilder &MIB = Helper.MIRBuilder;
2619 auto &AddrVal = MI.getOperand(0);
2620
2621 int64_t IsWrite = MI.getOperand(1).getImm();
2622 int64_t Locality = MI.getOperand(2).getImm();
2623 int64_t IsData = MI.getOperand(3).getImm();
2624
2625 bool IsStream = Locality == 0;
2626 if (Locality != 0) {
2627 assert(Locality <= 3 && "Prefetch locality out-of-range");
2628 // The locality degree is the opposite of the cache speed.
2629 // Put the number the other way around.
2630 // The encoding starts at 0 for level 1
2631 Locality = 3 - Locality;
2632 }
2633
2634 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2635
2636 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2637 MI.eraseFromParent();
2638 return true;
2639}
2640
2641bool AArch64LegalizerInfo::legalizeConcatVectors(
2643 MachineIRBuilder &MIRBuilder) const {
2644 // Widen sub-byte element vectors to byte-sized elements before concatenating.
2645 // This is analogous to SDAG's integer type promotion for sub-byte types.
2647 Register DstReg = Concat.getReg(0);
2648 LLT DstTy = MRI.getType(DstReg);
2649 assert(DstTy.getScalarSizeInBits() < 8 && "Expected dst ty to be < 8b");
2650
2651 unsigned WideEltSize =
2652 std::max(8u, (unsigned)PowerOf2Ceil(DstTy.getScalarSizeInBits()));
2653 LLT SrcTy = MRI.getType(Concat.getSourceReg(0));
2654 LLT WideSrcTy = SrcTy.changeElementSize(WideEltSize);
2655 LLT WideDstTy = DstTy.changeElementSize(WideEltSize);
2656
2657 SmallVector<Register> WideSrcs;
2658 for (unsigned I = 0; I < Concat.getNumSources(); ++I) {
2659 auto Wide = MIRBuilder.buildAnyExt(WideSrcTy, Concat.getSourceReg(I));
2660 WideSrcs.push_back(Wide.getReg(0));
2661 }
2662
2663 auto WideConcat = MIRBuilder.buildConcatVectors(WideDstTy, WideSrcs);
2664 MIRBuilder.buildTrunc(DstReg, WideConcat);
2665 MI.eraseFromParent();
2666 return true;
2667}
2668
2669bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2670 MachineIRBuilder &MIRBuilder,
2671 MachineRegisterInfo &MRI) const {
2672 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2673
2674 // This function legalizes f64 -> bf16 and f64 -> f16 truncations via f64 ->
2675 // f32 G_FPTRUNC_ODD and f32 -> [b]f16 G_FPTRUNC, which apparently avoids the
2676 // usual double-rounding issue that could be present from using twin
2677 // G_FPTRUNC.
2678
2679 if (DstTy.isBFloat16() && SrcTy.isFloat64()) {
2680 auto Mid =
2681 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {LLT::float32()}, {Src});
2682 MIRBuilder.buildInstr(AArch64::G_FPTRUNC, {Dst}, {Mid});
2683 MI.eraseFromParent();
2684 return true;
2685 }
2686
2687 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2688 "Expected a power of 2 elements");
2689
2690 // We must mutate types here as FPTrunc may be used on a IEEE floating point
2691 // or a brainfloat.
2692 LLT v2s16 = DstTy.changeElementCount(2);
2693 LLT v4s16 = DstTy.changeElementCount(4);
2694 LLT v2s32 = SrcTy.changeElementCount(2).changeElementSize(32);
2695 LLT v4s32 = SrcTy.changeElementCount(4).changeElementSize(32);
2696 LLT v2s64 = SrcTy.changeElementCount(2);
2697
2698 SmallVector<Register> RegsToUnmergeTo;
2699 SmallVector<Register> TruncOddDstRegs;
2700 SmallVector<Register> RegsToMerge;
2701
2702 unsigned ElemCount = SrcTy.getNumElements();
2703
2704 // Find the biggest size chunks we can work with
2705 int StepSize = ElemCount % 4 ? 2 : 4;
2706
2707 // If we have a power of 2 greater than 2, we need to first unmerge into
2708 // enough pieces
2709 if (ElemCount <= 2)
2710 RegsToUnmergeTo.push_back(Src);
2711 else {
2712 for (unsigned i = 0; i < ElemCount / 2; ++i)
2713 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2714
2715 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2716 }
2717
2718 // Create all of the round-to-odd instructions and store them
2719 for (auto SrcReg : RegsToUnmergeTo) {
2720 Register Mid =
2721 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2722 .getReg(0);
2723 TruncOddDstRegs.push_back(Mid);
2724 }
2725
2726 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2727 // truncate 2s32 to 2s16.
2728 unsigned Index = 0;
2729 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2730 if (StepSize == 4) {
2731 Register ConcatDst =
2732 MIRBuilder
2734 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2735 .getReg(0);
2736
2737 RegsToMerge.push_back(
2738 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2739 } else {
2740 RegsToMerge.push_back(
2741 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2742 }
2743 }
2744
2745 // If there is only one register, replace the destination
2746 if (RegsToMerge.size() == 1) {
2747 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2748 MI.eraseFromParent();
2749 return true;
2750 }
2751
2752 // Merge the rest of the instructions & replace the register
2753 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2754 MRI.replaceRegWith(Dst, Fin);
2755 MI.eraseFromParent();
2756 return true;
2757}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:77
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
static constexpr LLT float128()
Get a 128-bit IEEE quad value.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
LLT divide(int Factor) const
Return a type that is Factor times smaller.
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
bool isFloat64() const
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:156
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1523
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...