LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 const LLT bf16 = LLT::bfloat16();
69
70 const LLT f16 = LLT::float16();
71 const LLT v4f16 = LLT::fixed_vector(4, f16);
72 const LLT v8f16 = LLT::fixed_vector(8, f16);
73
74 const LLT f32 = LLT::float32();
75 const LLT v2f32 = LLT::fixed_vector(2, f32);
76 const LLT v4f32 = LLT::fixed_vector(4, f32);
77
78 const LLT f64 = LLT::float64();
79 const LLT v2f64 = LLT::fixed_vector(2, f64);
80
81 const LLT f128 = LLT::float128();
82
83 const LLT i8 = LLT::integer(8);
84 const LLT v8i8 = LLT::fixed_vector(8, i8);
85 const LLT v16i8 = LLT::fixed_vector(16, i8);
86
87 const LLT i16 = LLT::integer(16);
88 const LLT v8i16 = LLT::fixed_vector(8, i16);
89 const LLT v4i16 = LLT::fixed_vector(4, i16);
90
91 const LLT i32 = LLT::integer(32);
92 const LLT v2i32 = LLT::fixed_vector(2, i32);
93 const LLT v4i32 = LLT::fixed_vector(4, i32);
94
95 const LLT i64 = LLT::integer(64);
96 const LLT v2i64 = LLT::fixed_vector(2, i64);
97
98 const LLT i128 = LLT::integer(128);
99
100 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
101 v16s8, v8s16, v4s32,
102 v2s64, v2p0,
103 /* End 128bit types */
104 /* Begin 64bit types */
105 v8s8, v4s16, v2s32};
106 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
107 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
108 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
109
110 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
111
112 // FIXME: support subtargets which have neon/fp-armv8 disabled.
113 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
115 return;
116 }
117
118 // Some instructions only support s16 if the subtarget has full 16-bit FP
119 // support.
120 const bool HasFP16 = ST.hasFullFP16();
121 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
122
123 const bool HasCSSC = ST.hasCSSC();
124 const bool HasRCPC3 = ST.hasRCPC3();
125 const bool HasSVE = ST.hasSVE();
126
128 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
129 .legalFor({p0, s8, s16, s32, s64})
130 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
131 v2s64, v2p0})
132 .widenScalarToNextPow2(0)
133 .clampScalar(0, s8, s64)
136 .clampNumElements(0, v8s8, v16s8)
137 .clampNumElements(0, v4s16, v8s16)
138 .clampNumElements(0, v2s32, v4s32)
139 .clampMaxNumElements(0, s64, 2)
140 .clampMaxNumElements(0, p0, 2)
142
144 .legalFor({p0, s16, s32, s64})
145 .legalFor(PackedVectorAllTypeList)
149 .clampScalar(0, s16, s64)
150 .clampNumElements(0, v8s8, v16s8)
151 .clampNumElements(0, v4s16, v8s16)
152 .clampNumElements(0, v2s32, v4s32)
153 .clampMaxNumElements(0, s64, 2)
154 .clampMaxNumElements(0, p0, 2);
155
157 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
158 smallerThan(1, 0)))
159 .widenScalarToNextPow2(0)
160 .clampScalar(0, s32, s64)
162 .minScalar(1, s8)
163 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
164 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
165
167 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
168 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
169 .widenScalarToNextPow2(1)
170 .clampScalar(1, s32, s128)
172 .minScalar(0, s16)
173 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
174 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
175 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
176
177 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
178 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
179 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
180 .widenScalarToNextPow2(0)
181 .clampScalar(0, s32, s64)
182 .clampMaxNumElements(0, s8, 16)
183 .clampMaxNumElements(0, s16, 8)
184 .clampNumElements(0, v2s32, v4s32)
185 .clampNumElements(0, v2s64, v2s64)
187 [=](const LegalityQuery &Query) {
188 return Query.Types[0].getNumElements() <= 2;
189 },
190 0, s32)
191 .minScalarOrEltIf(
192 [=](const LegalityQuery &Query) {
193 return Query.Types[0].getNumElements() <= 4;
194 },
195 0, s16)
196 .minScalarOrEltIf(
197 [=](const LegalityQuery &Query) {
198 return Query.Types[0].getNumElements() <= 16;
199 },
200 0, s8)
201 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
203
205 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
206 .widenScalarToNextPow2(0)
207 .clampScalar(0, s32, s64)
208 .clampMaxNumElements(0, s8, 16)
209 .clampMaxNumElements(0, s16, 8)
210 .clampNumElements(0, v2s32, v4s32)
211 .clampNumElements(0, v2s64, v2s64)
213 [=](const LegalityQuery &Query) {
214 return Query.Types[0].getNumElements() <= 2;
215 },
216 0, s32)
217 .minScalarOrEltIf(
218 [=](const LegalityQuery &Query) {
219 return Query.Types[0].getNumElements() <= 4;
220 },
221 0, s16)
222 .minScalarOrEltIf(
223 [=](const LegalityQuery &Query) {
224 return Query.Types[0].getNumElements() <= 16;
225 },
226 0, s8)
227 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
229
230 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
231 .customIf([=](const LegalityQuery &Query) {
232 const auto &SrcTy = Query.Types[0];
233 const auto &AmtTy = Query.Types[1];
234 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
235 AmtTy.getSizeInBits() == 32;
236 })
237 .legalFor({
238 {i32, i32},
239 {i32, i64},
240 {i64, i64},
241 {v8i8, v8i8},
242 {v16i8, v16i8},
243 {v4i16, v4i16},
244 {v8i16, v8i16},
245 {v2i32, v2i32},
246 {v4i32, v4i32},
247 {v2i64, v2i64},
248 })
249 .widenScalarToNextPow2(0)
250 .clampScalar(1, s32, s64)
251 .clampScalar(0, s32, s64)
252 .clampNumElements(0, v8s8, v16s8)
253 .clampNumElements(0, v4s16, v8s16)
254 .clampNumElements(0, v2s32, v4s32)
255 .clampNumElements(0, v2s64, v2s64)
257 .minScalarSameAs(1, 0)
261
263 .legalFor({{p0, i64}, {v2p0, v2i64}})
264 .clampScalarOrElt(1, s64, s64)
265 .clampNumElements(0, v2p0, v2p0);
266
267 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
268
269 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
270 .legalFor({i32, i64})
271 .libcallFor({i128})
272 .clampScalar(0, s32, s64)
274 .scalarize(0);
275
276 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
277 .lowerFor({i8, i16, i32, i64, v2i32, v4i32, v2i64})
278 .libcallFor({i128})
280 .minScalarOrElt(0, s32)
281 .clampNumElements(0, v2s32, v4s32)
282 .clampNumElements(0, v2s64, v2s64)
283 .scalarize(0);
284
285 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
286 .widenScalarToNextPow2(0, /*Min = */ 32)
287 .clampScalar(0, s32, s64)
288 .lower();
289
290 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
291 .legalFor({s64, v16s8, v8s16, v4s32})
292 .lower();
293
294 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
295 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
296 .legalFor(HasCSSC, {i32, i64})
297 .minScalar(HasCSSC, 0, s32)
298 .clampNumElements(0, v8s8, v16s8)
299 .clampNumElements(0, v4s16, v8s16)
300 .clampNumElements(0, v2s32, v4s32)
301 .lower();
302
303 // FIXME: Legal vector types are only legal with NEON.
305 .legalFor(HasCSSC, {s32, s64})
306 .legalFor({v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
307 .customIf([=](const LegalityQuery &Q) {
308 // TODO: Fix suboptimal codegen for 128+ bit types.
309 LLT SrcTy = Q.Types[0];
310 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
311 })
312 .widenScalarIf(
313 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
314 [=](const LegalityQuery &Query) { return std::make_pair(0, v4i16); })
315 .widenScalarIf(
316 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
317 [=](const LegalityQuery &Query) { return std::make_pair(0, v2i32); })
318 .clampNumElements(0, v8s8, v16s8)
319 .clampNumElements(0, v4s16, v8s16)
320 .clampNumElements(0, v2s32, v4s32)
321 .clampNumElements(0, v2s64, v2s64)
323 .lower();
324
326 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
327 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
328 .lower();
329
331 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
332 .legalFor({{s32, s32}, {s64, s32}})
333 .clampScalar(0, s32, s64)
334 .clampScalar(1, s32, s64)
336
337 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
338 .customFor({{i32, i32}, {i32, i64}, {i64, i64}})
339 .lower();
340
342 .legalFor({{i32, i64}, {i64, i64}})
343 .customIf([=](const LegalityQuery &Q) {
344 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
345 })
346 .lower();
348
349 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
350 .customFor({{s32, s32}, {s64, s64}});
351
352 auto always = [=](const LegalityQuery &Q) { return true; };
354 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
355 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
356 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
357 .customFor({{s128, s128},
358 {v4s16, v4s16},
359 {v8s16, v8s16},
360 {v2s32, v2s32},
361 {v4s32, v4s32},
362 {v2s64, v2s64}})
363 .clampScalar(0, s32, s128)
366 .minScalarEltSameAsIf(always, 1, 0)
367 .maxScalarEltSameAsIf(always, 1, 0)
368 .clampNumElements(0, v8s8, v16s8)
369 .clampNumElements(0, v4s16, v8s16)
370 .clampNumElements(0, v2s32, v4s32)
371 .clampNumElements(0, v2s64, v2s64)
374
375 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
376 .legalFor({{s32, s32},
377 {s64, s64},
378 {v8s8, v8s8},
379 {v16s8, v16s8},
380 {v4s16, v4s16},
381 {v8s16, v8s16},
382 {v2s32, v2s32},
383 {v4s32, v4s32}})
384 .widenScalarToNextPow2(1, /*Min=*/32)
385 .clampScalar(1, s32, s64)
387 .clampNumElements(0, v8s8, v16s8)
388 .clampNumElements(0, v4s16, v8s16)
389 .clampNumElements(0, v2s32, v4s32)
392 .scalarSameSizeAs(0, 1);
393
394 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
395
397 .lowerIf(isVector(0))
398 .widenScalarToNextPow2(1, /*Min=*/32)
399 .clampScalar(1, s32, s64)
400 .scalarSameSizeAs(0, 1)
401 .legalFor(HasCSSC, {s32, s64})
402 .customFor(!HasCSSC, {s32, s64});
403
404 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
405
406 getActionDefinitionsBuilder(G_BITREVERSE)
407 .legalFor({s32, s64, v8s8, v16s8})
408 .widenScalarToNextPow2(0, /*Min = */ 32)
410 .clampScalar(0, s32, s64)
411 .clampNumElements(0, v8s8, v16s8)
412 .clampNumElements(0, v4s16, v8s16)
413 .clampNumElements(0, v2s32, v4s32)
414 .clampNumElements(0, v2s64, v2s64)
417 .lower();
418
420 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
422 .clampScalar(0, s32, s64)
423 .clampNumElements(0, v4s16, v8s16)
424 .clampNumElements(0, v2s32, v4s32)
425 .clampNumElements(0, v2s64, v2s64)
427
428 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
429 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
430 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
431 .clampNumElements(0, v8s8, v16s8)
432 .clampNumElements(0, v4s16, v8s16)
433 .clampNumElements(0, v2s32, v4s32)
434 .clampMaxNumElements(0, s64, 2)
437 .lower();
438
440 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
441 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
442 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
443 .legalFor({f32, f64, v2f32, v4f32, v2f64})
444 .legalFor(HasFP16, {f16, v4f16, v8f16})
445 .libcallFor({f128})
446 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
447 .minScalarOrElt(0, MinFPScalar)
448 .clampNumElements(0, v4s16, v8s16)
449 .clampNumElements(0, v2s32, v4s32)
450 .clampNumElements(0, v2s64, v2s64)
452
453 getActionDefinitionsBuilder({G_FABS, G_FNEG})
454 .legalFor({f32, f64, v2f32, v4f32, v2f64})
455 .legalFor(HasFP16, {f16, v4f16, v8f16})
456 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
458 .clampNumElements(0, v4s16, v8s16)
459 .clampNumElements(0, v2s32, v4s32)
460 .clampNumElements(0, v2s64, v2s64)
462 .lowerFor({f16, v4f16, v8f16});
463
465 .libcallFor({f32, f64, f128})
466 .minScalar(0, f32)
467 .scalarize(0);
468
469 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
470 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
471 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
472 G_FSINH, G_FTANH, G_FMODF})
473 // We need a call for these, so we always need to scalarize.
474 .scalarize(0)
475 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
476 .minScalar(0, f32)
477 .libcallFor({f32, f64, f128});
478 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
479 .scalarize(0)
480 .minScalar(0, f32)
481 .libcallFor({{f32, i32}, {f64, i32}, {f128, i32}});
482
483 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
484 .legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
485 .legalFor(HasFP16, {{s32, s16}, {s64, s16}})
486 .minScalar(1, s32)
487 .libcallFor({{s64, s128}})
488 .lower();
489 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
490 .legalFor({{s64, s32}, {s64, s64}})
491 .legalFor(HasFP16, {{s64, s16}})
492 .minScalar(0, s64)
493 .minScalar(1, s32)
494 .libcallFor({{s64, s128}})
495 .lower();
496
497 // TODO: Custom legalization for mismatched types.
498 getActionDefinitionsBuilder(G_FCOPYSIGN)
500 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
501 [=](const LegalityQuery &Query) {
502 const LLT Ty = Query.Types[0];
503 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
504 })
505 .lower();
506
508
509 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
510 auto &Actions = getActionDefinitionsBuilder(Op);
511
512 if (Op == G_SEXTLOAD)
514
515 // Atomics have zero extending behavior.
516 Actions
517 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
518 {s32, p0, s16, 8},
519 {s32, p0, s32, 8},
520 {s64, p0, s8, 2},
521 {s64, p0, s16, 2},
522 {s64, p0, s32, 4},
523 {s64, p0, s64, 8},
524 {p0, p0, s64, 8},
525 {v2s32, p0, s64, 8}})
526 .widenScalarToNextPow2(0)
527 .clampScalar(0, s32, s64)
528 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
529 // how to do that yet.
530 .unsupportedIfMemSizeNotPow2()
531 // Lower anything left over into G_*EXT and G_LOAD
532 .lower();
533 }
534
535 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
536 const LLT &ValTy = Query.Types[0];
537 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
538 };
539
541 .customIf([=](const LegalityQuery &Query) {
542 return HasRCPC3 && Query.Types[0] == s128 &&
543 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
544 })
545 .customIf([=](const LegalityQuery &Query) {
546 return Query.Types[0] == s128 &&
547 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
548 })
549 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
550 {s16, p0, s16, 8},
551 {s32, p0, s32, 8},
552 {s64, p0, s64, 8},
553 {p0, p0, s64, 8},
554 {s128, p0, s128, 8},
555 {v8s8, p0, s64, 8},
556 {v16s8, p0, s128, 8},
557 {v4s16, p0, s64, 8},
558 {v8s16, p0, s128, 8},
559 {v2s32, p0, s64, 8},
560 {v4s32, p0, s128, 8},
561 {v2s64, p0, s128, 8}})
562 // These extends are also legal
563 .legalForTypesWithMemDesc(
564 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
565 .legalForTypesWithMemDesc({
566 // SVE vscale x 128 bit base sizes
567 {nxv16s8, p0, nxv16s8, 8},
568 {nxv8s16, p0, nxv8s16, 8},
569 {nxv4s32, p0, nxv4s32, 8},
570 {nxv2s64, p0, nxv2s64, 8},
571 })
572 .widenScalarToNextPow2(0, /* MinSize = */ 8)
573 .clampMaxNumElements(0, s8, 16)
574 .clampMaxNumElements(0, s16, 8)
575 .clampMaxNumElements(0, s32, 4)
576 .clampMaxNumElements(0, s64, 2)
577 .clampMaxNumElements(0, p0, 2)
579 .clampScalar(0, s8, s64)
581 [=](const LegalityQuery &Query) {
582 // Clamp extending load results to 32-bits.
583 return Query.Types[0].isScalar() &&
584 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
585 Query.Types[0].getSizeInBits() > 32;
586 },
587 changeTo(0, s32))
588 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
589 .bitcastIf(typeInSet(0, {v4s8}),
590 [=](const LegalityQuery &Query) {
591 const LLT VecTy = Query.Types[0];
592 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
593 })
594 .customIf(IsPtrVecPred)
595 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
596 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
597
599 .customIf([=](const LegalityQuery &Query) {
600 return HasRCPC3 && Query.Types[0] == s128 &&
601 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
602 })
603 .customIf([=](const LegalityQuery &Query) {
604 return Query.Types[0] == s128 &&
605 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
606 })
607 .widenScalarIf(
608 all(scalarNarrowerThan(0, 32),
610 changeTo(0, s32))
612 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
613 {s32, p0, s8, 8}, // truncstorei8 from s32
614 {s64, p0, s8, 8}, // truncstorei8 from s64
615 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
616 {s64, p0, s16, 8}, // truncstorei16 from s64
617 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
618 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
619 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
620 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
621 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
622 .legalForTypesWithMemDesc({
623 // SVE vscale x 128 bit base sizes
624 // TODO: Add nxv2p0. Consider bitcastIf.
625 // See #92130
626 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
627 {nxv16s8, p0, nxv16s8, 8},
628 {nxv8s16, p0, nxv8s16, 8},
629 {nxv4s32, p0, nxv4s32, 8},
630 {nxv2s64, p0, nxv2s64, 8},
631 })
632 .clampScalar(0, s8, s64)
633 .minScalarOrElt(0, s8)
634 .lowerIf([=](const LegalityQuery &Query) {
635 return Query.Types[0].isScalar() &&
636 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
637 })
638 // Maximum: sN * k = 128
639 .clampMaxNumElements(0, s8, 16)
640 .clampMaxNumElements(0, s16, 8)
641 .clampMaxNumElements(0, s32, 4)
642 .clampMaxNumElements(0, s64, 2)
643 .clampMaxNumElements(0, p0, 2)
645 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
646 .bitcastIf(all(typeInSet(0, {v4s8}),
647 LegalityPredicate([=](const LegalityQuery &Query) {
648 return Query.Types[0].getSizeInBits() ==
649 Query.MMODescrs[0].MemoryTy.getSizeInBits();
650 })),
651 [=](const LegalityQuery &Query) {
652 const LLT VecTy = Query.Types[0];
653 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
654 })
655 .customIf(IsPtrVecPred)
656 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
657 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
658 .lower();
659
660 getActionDefinitionsBuilder(G_INDEXED_STORE)
661 // Idx 0 == Ptr, Idx 1 == Val
662 // TODO: we can implement legalizations but as of now these are
663 // generated in a very specific way.
665 {p0, s8, s8, 8},
666 {p0, s16, s16, 8},
667 {p0, s32, s8, 8},
668 {p0, s32, s16, 8},
669 {p0, s32, s32, 8},
670 {p0, s64, s64, 8},
671 {p0, p0, p0, 8},
672 {p0, v8s8, v8s8, 8},
673 {p0, v16s8, v16s8, 8},
674 {p0, v4s16, v4s16, 8},
675 {p0, v8s16, v8s16, 8},
676 {p0, v2s32, v2s32, 8},
677 {p0, v4s32, v4s32, 8},
678 {p0, v2s64, v2s64, 8},
679 {p0, v2p0, v2p0, 8},
680 {p0, s128, s128, 8},
681 })
682 .unsupported();
683
684 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
685 LLT LdTy = Query.Types[0];
686 LLT PtrTy = Query.Types[1];
687 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
688 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
689 return false;
690 if (PtrTy != p0)
691 return false;
692 return true;
693 };
694 getActionDefinitionsBuilder(G_INDEXED_LOAD)
697 .legalIf(IndexedLoadBasicPred)
698 .unsupported();
699 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
700 .unsupportedIf(
702 .legalIf(all(typeInSet(0, {s16, s32, s64}),
703 LegalityPredicate([=](const LegalityQuery &Q) {
704 LLT LdTy = Q.Types[0];
705 LLT PtrTy = Q.Types[1];
706 LLT MemTy = Q.MMODescrs[0].MemoryTy;
707 if (PtrTy != p0)
708 return false;
709 if (LdTy == s16)
710 return MemTy == s8;
711 if (LdTy == s32)
712 return MemTy == s8 || MemTy == s16;
713 if (LdTy == s64)
714 return MemTy == s8 || MemTy == s16 || MemTy == s32;
715 return false;
716 })))
717 .unsupported();
718
719 // Constants
721 .legalFor({p0, s8, s16, s32, s64})
722 .widenScalarToNextPow2(0)
723 .clampScalar(0, s8, s64);
724 getActionDefinitionsBuilder(G_FCONSTANT)
725 .legalFor({s16, s32, s64, s128})
726 .clampScalar(0, MinFPScalar, s128);
727
728 // FIXME: fix moreElementsToNextPow2
730 .legalFor({{i32, i32}, {i32, i64}, {i32, p0}})
732 .minScalarOrElt(1, s8)
733 .clampScalar(1, s32, s64)
734 .clampScalar(0, s32, s32)
737 [=](const LegalityQuery &Query) {
738 const LLT &Ty = Query.Types[0];
739 const LLT &SrcTy = Query.Types[1];
740 return Ty.isVector() && !SrcTy.isPointerVector() &&
741 Ty.getElementType() != SrcTy.getElementType();
742 },
743 0, 1)
744 .minScalarOrEltIf(
745 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
746 1, s32)
747 .minScalarOrEltIf(
748 [=](const LegalityQuery &Query) {
749 return Query.Types[1].isPointerVector();
750 },
751 0, s64)
753 .clampNumElements(1, v8s8, v16s8)
754 .clampNumElements(1, v4s16, v8s16)
755 .clampNumElements(1, v2s32, v4s32)
756 .clampNumElements(1, v2s64, v2s64)
757 .clampNumElements(1, v2p0, v2p0)
758 .customIf(isVector(0));
759
761 .legalFor({{s32, f32},
762 {s32, f64},
763 {v4s32, v4f32},
764 {v2s32, v2f32},
765 {v2s64, v2f64}})
766 .legalFor(HasFP16, {{s32, f16}, {v4s16, v4f16}, {v8s16, v8f16}})
768 .clampScalar(0, s32, s32)
769 .minScalarOrElt(1, MinFPScalar)
772 [=](const LegalityQuery &Query) {
773 const LLT &Ty = Query.Types[0];
774 const LLT &SrcTy = Query.Types[1];
775 return Ty.isVector() && !SrcTy.isPointerVector() &&
776 Ty.getElementType() != SrcTy.getElementType();
777 },
778 0, 1)
779 .clampNumElements(1, v4s16, v8s16)
780 .clampNumElements(1, v2s32, v4s32)
781 .clampMaxNumElements(1, s64, 2)
783 .libcallFor({{s32, s128}});
784
785 // Extensions
786 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
787 unsigned DstSize = Query.Types[0].getSizeInBits();
788
789 // Handle legal vectors using legalFor
790 if (Query.Types[0].isVector())
791 return false;
792
793 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
794 return false; // Extending to a scalar s128 needs narrowing.
795
796 const LLT &SrcTy = Query.Types[1];
797
798 // Make sure we fit in a register otherwise. Don't bother checking that
799 // the source type is below 128 bits. We shouldn't be allowing anything
800 // through which is wider than the destination in the first place.
801 unsigned SrcSize = SrcTy.getSizeInBits();
802 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
803 return false;
804
805 return true;
806 };
807 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
808 .legalIf(ExtLegalFunc)
809 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
810 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
812 .clampMaxNumElements(1, s8, 8)
813 .clampMaxNumElements(1, s16, 4)
814 .clampMaxNumElements(1, s32, 2)
815 // Tries to convert a large EXTEND into two smaller EXTENDs
816 .lowerIf([=](const LegalityQuery &Query) {
817 return (Query.Types[0].getScalarSizeInBits() >
818 Query.Types[1].getScalarSizeInBits() * 2) &&
819 Query.Types[0].isVector() &&
820 (Query.Types[1].getScalarSizeInBits() == 8 ||
821 Query.Types[1].getScalarSizeInBits() == 16);
822 })
823 .clampMinNumElements(1, s8, 8)
824 .clampMinNumElements(1, s16, 4)
826
828 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
830 .clampMaxNumElements(0, s8, 8)
831 .clampMaxNumElements(0, s16, 4)
832 .clampMaxNumElements(0, s32, 2)
834 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
835 0, s8)
836 .lowerIf([=](const LegalityQuery &Query) {
837 LLT DstTy = Query.Types[0];
838 LLT SrcTy = Query.Types[1];
839 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
840 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
841 })
842 .clampMinNumElements(0, s8, 8)
843 .clampMinNumElements(0, s16, 4)
844 .alwaysLegal();
845
846 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
847 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
848 .clampNumElements(0, v2s32, v2s32);
849
850 getActionDefinitionsBuilder(G_SEXT_INREG)
851 .legalFor({s32, s64})
852 .legalFor(PackedVectorAllTypeList)
853 .maxScalar(0, s64)
854 .clampNumElements(0, v8s8, v16s8)
855 .clampNumElements(0, v4s16, v8s16)
856 .clampNumElements(0, v2s32, v4s32)
857 .clampMaxNumElements(0, s64, 2)
858 .lower();
859
860 // FP conversions
862 .legalFor(
863 {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
864 .libcallFor({{f16, f128}, {f32, f128}, {f64, f128}})
866 .customIf([](const LegalityQuery &Q) {
867 LLT DstTy = Q.Types[0];
868 LLT SrcTy = Q.Types[1];
869 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
870 SrcTy.getScalarSizeInBits() == 64 &&
871 DstTy.getScalarSizeInBits() == 16;
872 })
873 // Clamp based on input
874 .clampNumElements(1, v4s32, v4s32)
875 .clampNumElements(1, v2s64, v2s64)
876 .scalarize(0);
877
879 .legalFor(
880 {{f32, f16}, {f64, f16}, {f64, f32}, {v4f32, v4f16}, {v2f64, v2f32}})
881 .libcallFor({{f128, f64}, {f128, f32}, {f128, f16}})
884 [](const LegalityQuery &Q) {
885 LLT DstTy = Q.Types[0];
886 LLT SrcTy = Q.Types[1];
887 return SrcTy.isVector() && DstTy.isVector() &&
888 SrcTy.getScalarSizeInBits() == 16 &&
889 DstTy.getScalarSizeInBits() == 64;
890 },
891 changeElementTo(1, f32))
892 .clampNumElements(0, v4s32, v4s32)
893 .clampNumElements(0, v2s64, v2s64)
894 .scalarize(0);
895
896 // Conversions
897 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
898 .legalFor({{i32, f32},
899 {i64, f32},
900 {i32, f64},
901 {i64, f64},
902 {v2i32, v2f32},
903 {v4i32, v4f32},
904 {v2i64, v2f64}})
905 .legalFor(HasFP16,
906 {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
907 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
909 // The range of a fp16 value fits into an i17, so we can lower the width
910 // to i64.
912 [=](const LegalityQuery &Query) {
913 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
914 },
915 changeTo(0, i64))
918 .minScalar(0, s32)
919 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
921 [=](const LegalityQuery &Query) {
922 return Query.Types[0].getScalarSizeInBits() <= 64 &&
923 Query.Types[0].getScalarSizeInBits() >
924 Query.Types[1].getScalarSizeInBits();
925 },
927 .widenScalarIf(
928 [=](const LegalityQuery &Query) {
929 return Query.Types[1].getScalarSizeInBits() <= 64 &&
930 Query.Types[0].getScalarSizeInBits() <
931 Query.Types[1].getScalarSizeInBits();
932 },
934 .clampNumElements(0, v4s16, v8s16)
935 .clampNumElements(0, v2s32, v4s32)
936 .clampMaxNumElements(0, s64, 2)
937 .libcallFor(
938 {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
939
940 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
941 .legalFor({{i32, f32},
942 {i64, f32},
943 {i32, f64},
944 {i64, f64},
945 {v2i32, v2f32},
946 {v4i32, v4f32},
947 {v2i64, v2f64}})
948 .legalFor(
949 HasFP16,
950 {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
951 // Handle types larger than i64 by scalarizing/lowering.
952 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
954 // The range of a fp16 value fits into an i17, so we can lower the width
955 // to i64.
957 [=](const LegalityQuery &Query) {
958 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
959 },
960 changeTo(0, i64))
961 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
963 .widenScalarToNextPow2(0, /*MinSize=*/32)
964 .minScalar(0, s32)
965 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
967 [=](const LegalityQuery &Query) {
968 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
969 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
970 ITySize > Query.Types[1].getScalarSizeInBits();
971 },
973 .widenScalarIf(
974 [=](const LegalityQuery &Query) {
975 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
976 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
977 Query.Types[0].getScalarSizeInBits() < FTySize;
978 },
981 .clampNumElements(0, v4s16, v8s16)
982 .clampNumElements(0, v2s32, v4s32)
983 .clampMaxNumElements(0, s64, 2);
984
985 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
986 .legalFor({{f32, i32},
987 {f64, i32},
988 {f32, i64},
989 {f64, i64},
990 {v2f32, v2i32},
991 {v4f32, v4i32},
992 {v2f64, v2i64}})
993 .legalFor(HasFP16,
994 {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
995 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
999 .minScalar(1, f32)
1000 .lowerIf([](const LegalityQuery &Query) {
1001 return Query.Types[1].isVector() &&
1002 Query.Types[1].getScalarSizeInBits() == 64 &&
1003 Query.Types[0].getScalarSizeInBits() == 16;
1004 })
1005 .widenScalarIf(
1006 [=](const LegalityQuery &Query) {
1007 return Query.Types[0].getScalarType() == bf16;
1008 },
1009 changeElementTo(0, f32))
1010 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
1011 .scalarizeIf(
1012 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
1013 [](const LegalityQuery &Query) {
1014 return Query.Types[0].getScalarSizeInBits() == 32 &&
1015 Query.Types[1].getScalarSizeInBits() == 64;
1016 },
1017 0)
1018 .widenScalarIf(
1019 [](const LegalityQuery &Query) {
1020 return Query.Types[1].getScalarSizeInBits() <= 64 &&
1021 Query.Types[0].getScalarSizeInBits() <
1022 Query.Types[1].getScalarSizeInBits();
1023 },
1025 .widenScalarIf(
1026 [](const LegalityQuery &Query) {
1027 return Query.Types[0].getScalarSizeInBits() <= 64 &&
1028 Query.Types[0].getScalarSizeInBits() >
1029 Query.Types[1].getScalarSizeInBits();
1030 },
1032 .clampNumElements(0, v4s16, v8s16)
1033 .clampNumElements(0, v2s32, v4s32)
1034 .clampMaxNumElements(0, s64, 2)
1035 .libcallFor({{f16, i128},
1036 {f32, i128},
1037 {f64, i128},
1038 {f128, i128},
1039 {f128, i32},
1040 {f128, i64}});
1041
1042 // Control-flow
1045 .legalFor({s32})
1046 .clampScalar(0, s32, s32);
1047 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1048
1050 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1051 .widenScalarToNextPow2(0)
1052 .clampScalar(0, s32, s64)
1053 .clampScalar(1, s32, s32)
1056 .lowerIf(isVector(0));
1057
1058 // Pointer-handling
1059 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1060
1061 if (TM.getCodeModel() == CodeModel::Small)
1062 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1063 else
1064 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1065
1066 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1067 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1068
1069 getActionDefinitionsBuilder(G_PTRTOINT)
1070 .legalFor({{i64, p0}, {v2i64, v2p0}})
1071 .widenScalarToNextPow2(0, 64)
1072 .clampScalar(0, s64, s64)
1073 .clampMaxNumElements(0, s64, 2);
1074
1075 getActionDefinitionsBuilder(G_INTTOPTR)
1076 .unsupportedIf([&](const LegalityQuery &Query) {
1077 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1078 })
1079 .legalFor({{p0, i64}, {v2p0, v2i64}})
1080 .clampMaxNumElements(1, s64, 2);
1081
1082 // Casts for 32 and 64-bit width type are just copies.
1083 // Same for 128-bit width type, except they are on the FPR bank.
1086 // Keeping 32-bit instructions legal to prevent regression in some tests
1087 .legalForCartesianProduct({s32, v2s16, v4s8})
1088 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1089 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1090 .customIf([=](const LegalityQuery &Query) {
1091 // Handle casts from i1 vectors to scalars.
1092 LLT DstTy = Query.Types[0];
1093 LLT SrcTy = Query.Types[1];
1094 return DstTy.isScalar() && SrcTy.isVector() &&
1095 SrcTy.getScalarSizeInBits() == 1;
1096 })
1097 .lowerIf([=](const LegalityQuery &Query) {
1098 return Query.Types[0].isVector() != Query.Types[1].isVector();
1099 })
1101 .clampNumElements(0, v8s8, v16s8)
1102 .clampNumElements(0, v4s16, v8s16)
1103 .clampNumElements(0, v2s32, v4s32)
1104 .clampMaxNumElements(0, s64, 2)
1105 .lower();
1106
1107 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1108
1109 // va_list must be a pointer, but most sized types are pretty easy to handle
1110 // as the destination.
1112 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1113 .clampScalar(0, s8, s64)
1114 .widenScalarToNextPow2(0, /*Min*/ 8);
1115
1116 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1117 .lowerIf(
1118 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1119
1120 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1121
1122 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1123 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1124 .customFor(!UseOutlineAtomics, {{s128, p0}})
1125 .libcallFor(UseOutlineAtomics,
1126 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1127 .clampScalar(0, s32, s64);
1128
1129 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1130 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1131 G_ATOMICRMW_XOR})
1132 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1133 .libcallFor(UseOutlineAtomics,
1134 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1135 .clampScalar(0, s32, s64);
1136
1137 // Do not outline these atomics operations, as per comment in
1138 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1140 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1141 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1142 .clampScalar(0, s32, s64);
1143
1144 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1145
1146 // Merge/Unmerge
1147 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1148 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1149 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1151 .widenScalarToNextPow2(LitTyIdx, 8)
1152 .widenScalarToNextPow2(BigTyIdx, 32)
1153 .clampScalar(LitTyIdx, s8, s64)
1154 .clampScalar(BigTyIdx, s32, s128)
1155 .legalIf([=](const LegalityQuery &Q) {
1156 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1157 case 32:
1158 case 64:
1159 case 128:
1160 break;
1161 default:
1162 return false;
1163 }
1164 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1165 case 8:
1166 case 16:
1167 case 32:
1168 case 64:
1169 return true;
1170 default:
1171 return false;
1172 }
1173 });
1174 }
1175
1176 // TODO : nxv4s16, nxv2s16, nxv2s32
1177 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1178 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1179 {s16, nxv8s16, s64},
1180 {s32, nxv4s32, s64},
1181 {s64, nxv2s64, s64}})
1182 .unsupportedIf([=](const LegalityQuery &Query) {
1183 const LLT &EltTy = Query.Types[1].getElementType();
1184 if (Query.Types[1].isScalableVector())
1185 return false;
1186 return Query.Types[0] != EltTy;
1187 })
1188 .minScalar(2, s64)
1189 .customIf([=](const LegalityQuery &Query) {
1190 const LLT &VecTy = Query.Types[1];
1191 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1192 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1193 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1194 })
1195 .minScalarOrEltIf(
1196 [=](const LegalityQuery &Query) {
1197 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1198 // cause the total vec size to be > 128b.
1199 return Query.Types[1].isFixedVector() &&
1200 Query.Types[1].getNumElements() <= 2;
1201 },
1202 0, s64)
1203 .minScalarOrEltIf(
1204 [=](const LegalityQuery &Query) {
1205 return Query.Types[1].isFixedVector() &&
1206 Query.Types[1].getNumElements() <= 4;
1207 },
1208 0, s32)
1209 .minScalarOrEltIf(
1210 [=](const LegalityQuery &Query) {
1211 return Query.Types[1].isFixedVector() &&
1212 Query.Types[1].getNumElements() <= 8;
1213 },
1214 0, s16)
1215 .minScalarOrEltIf(
1216 [=](const LegalityQuery &Query) {
1217 return Query.Types[1].isFixedVector() &&
1218 Query.Types[1].getNumElements() <= 16;
1219 },
1220 0, s8)
1221 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1223 .clampMaxNumElements(1, s64, 2)
1224 .clampMaxNumElements(1, s32, 4)
1225 .clampMaxNumElements(1, s16, 8)
1226 .clampMaxNumElements(1, s8, 16)
1227 .clampMaxNumElements(1, p0, 2)
1229
1230 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1231 .legalIf(
1232 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1233 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1234 {nxv8s16, s32, s64},
1235 {nxv4s32, s32, s64},
1236 {nxv2s64, s64, s64}})
1239 .clampNumElements(0, v8s8, v16s8)
1240 .clampNumElements(0, v4s16, v8s16)
1241 .clampNumElements(0, v2s32, v4s32)
1242 .clampMaxNumElements(0, s64, 2)
1243 .clampMaxNumElements(0, p0, 2)
1245
1246 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1247 .legalFor({{v8s8, s8},
1248 {v16s8, s8},
1249 {v4s16, s16},
1250 {v8s16, s16},
1251 {v2s32, s32},
1252 {v4s32, s32},
1253 {v2s64, s64},
1254 {v2p0, p0}})
1255 .clampNumElements(0, v4s32, v4s32)
1256 .clampNumElements(0, v2s64, v2s64)
1257 .minScalarOrElt(0, s8)
1260 .minScalarSameAs(1, 0);
1261
1262 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1263
1264 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1265 .legalIf([=](const LegalityQuery &Query) {
1266 const LLT &DstTy = Query.Types[0];
1267 const LLT &SrcTy = Query.Types[1];
1268 // For now just support the TBL2 variant which needs the source vectors
1269 // to be the same size as the dest.
1270 if (DstTy != SrcTy)
1271 return false;
1272 return llvm::is_contained(
1273 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1274 })
1275 .moreElementsIf(
1276 [](const LegalityQuery &Query) {
1277 return Query.Types[0].getNumElements() >
1278 Query.Types[1].getNumElements();
1279 },
1280 changeTo(1, 0))
1283 [](const LegalityQuery &Query) {
1284 return Query.Types[0].getNumElements() <
1285 Query.Types[1].getNumElements();
1286 },
1287 changeTo(0, 1))
1288 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1289 .clampNumElements(0, v8s8, v16s8)
1290 .clampNumElements(0, v4s16, v8s16)
1291 .clampNumElements(0, v4s32, v4s32)
1292 .clampNumElements(0, v2s64, v2s64)
1294 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1295 // Bitcast pointers vector to i64.
1296 const LLT DstTy = Query.Types[0];
1297 return std::pair(
1298 0, LLT::vector(DstTy.getElementCount(), LLT::integer(64)));
1299 });
1300
1301 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1302 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1303 .customIf([=](const LegalityQuery &Query) {
1304 return Query.Types[0].isFixedVector() &&
1305 Query.Types[0].getScalarSizeInBits() < 8;
1306 })
1307 .bitcastIf(
1308 [=](const LegalityQuery &Query) {
1309 return Query.Types[0].isFixedVector() &&
1310 Query.Types[1].isFixedVector() &&
1311 Query.Types[0].getScalarSizeInBits() >= 8 &&
1312 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1313 Query.Types[0].getSizeInBits() <= 128 &&
1314 Query.Types[1].getSizeInBits() <= 64;
1315 },
1316 [=](const LegalityQuery &Query) {
1317 const LLT DstTy = Query.Types[0];
1318 const LLT SrcTy = Query.Types[1];
1319 return std::pair(
1320 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1323 SrcTy.getNumElements())));
1324 });
1325
1326 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1327 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1329 .immIdx(0); // Inform verifier imm idx 0 is handled.
1330
1331 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1332 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1333 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1334
1335 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1336
1337 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1338
1339 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1340
1341 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1342
1343 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1344
1345 if (ST.hasMOPS()) {
1346 // G_BZERO is not supported. Currently it is only emitted by
1347 // PreLegalizerCombiner for G_MEMSET with zero constant.
1349
1351 .legalForCartesianProduct({p0}, {s64}, {s64})
1352 .customForCartesianProduct({p0}, {s8}, {s64})
1353 .immIdx(0); // Inform verifier imm idx 0 is handled.
1354
1355 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1356 .legalForCartesianProduct({p0}, {p0}, {s64})
1357 .immIdx(0); // Inform verifier imm idx 0 is handled.
1358
1359 // G_MEMCPY_INLINE does not have a tailcall immediate
1360 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1361 .legalForCartesianProduct({p0}, {p0}, {s64});
1362
1363 } else {
1364 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1365 .libcall();
1366 }
1367
1368 // For fadd reductions we have pairwise operations available. We treat the
1369 // usual legal types as legal and handle the lowering to pairwise instructions
1370 // later.
1371 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1372 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1373 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1374 .minScalarOrElt(0, MinFPScalar)
1375 .clampMaxNumElements(1, s64, 2)
1376 .clampMaxNumElements(1, s32, 4)
1377 .clampMaxNumElements(1, s16, 8)
1379 .scalarize(1)
1380 .lower();
1381
1382 // For fmul reductions we need to split up into individual operations. We
1383 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1384 // smaller types, followed by scalarizing what remains.
1385 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1386 .minScalarOrElt(0, MinFPScalar)
1387 .clampMaxNumElements(1, s64, 2)
1388 .clampMaxNumElements(1, s32, 4)
1389 .clampMaxNumElements(1, s16, 8)
1390 .clampMaxNumElements(1, s32, 2)
1391 .clampMaxNumElements(1, s16, 4)
1392 .scalarize(1)
1393 .lower();
1394
1395 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1396 .scalarize(2)
1397 .lower();
1398
1399 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1400 .legalFor({{i8, v8i8},
1401 {i8, v16i8},
1402 {i16, v4i16},
1403 {i16, v8i16},
1404 {i32, v2i32},
1405 {i32, v4i32},
1406 {i64, v2i64}})
1408 .clampMaxNumElements(1, s64, 2)
1409 .clampMaxNumElements(1, s32, 4)
1410 .clampMaxNumElements(1, s16, 8)
1411 .clampMaxNumElements(1, s8, 16)
1413 .scalarize(1);
1414
1415 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1416 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1417 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1418 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1419 .minScalarOrElt(0, MinFPScalar)
1420 .clampMaxNumElements(1, s64, 2)
1421 .clampMaxNumElements(1, s32, 4)
1422 .clampMaxNumElements(1, s16, 8)
1423 .scalarize(1)
1424 .lower();
1425
1426 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1427 .clampMaxNumElements(1, s32, 2)
1428 .clampMaxNumElements(1, s16, 4)
1429 .clampMaxNumElements(1, s8, 8)
1430 .scalarize(1)
1431 .lower();
1432
1434 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1435 .legalFor({{i8, v8i8},
1436 {i8, v16i8},
1437 {i16, v4i16},
1438 {i16, v8i16},
1439 {i32, v2i32},
1440 {i32, v4i32}})
1441 .moreElementsIf(
1442 [=](const LegalityQuery &Query) {
1443 return Query.Types[1].isVector() &&
1444 Query.Types[1].getElementType() != s8 &&
1445 Query.Types[1].getNumElements() & 1;
1446 },
1448 .clampMaxNumElements(1, s64, 2)
1449 .clampMaxNumElements(1, s32, 4)
1450 .clampMaxNumElements(1, s16, 8)
1451 .clampMaxNumElements(1, s8, 16)
1452 .scalarize(1)
1453 .lower();
1454
1456 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1457 // Try to break down into smaller vectors as long as they're at least 64
1458 // bits. This lets us use vector operations for some parts of the
1459 // reduction.
1460 .fewerElementsIf(
1461 [=](const LegalityQuery &Q) {
1462 LLT SrcTy = Q.Types[1];
1463 if (SrcTy.isScalar())
1464 return false;
1465 if (!isPowerOf2_32(SrcTy.getNumElements()))
1466 return false;
1467 // We can usually perform 64b vector operations.
1468 return SrcTy.getSizeInBits() > 64;
1469 },
1470 [=](const LegalityQuery &Q) {
1471 LLT SrcTy = Q.Types[1];
1472 return std::make_pair(1, SrcTy.divide(2));
1473 })
1474 .scalarize(1)
1475 .lower();
1476
1477 // TODO: Update this to correct handling when adding AArch64/SVE support.
1478 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1479
1480 // Access to floating-point environment.
1481 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1482 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1483 .libcall();
1484
1485 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1486
1487 getActionDefinitionsBuilder(G_PREFETCH).custom();
1488
1489 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1490
1492 verify(*ST.getInstrInfo());
1493}
1494
1497 LostDebugLocObserver &LocObserver) const {
1498 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1499 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1500 GISelChangeObserver &Observer = Helper.Observer;
1501 switch (MI.getOpcode()) {
1502 default:
1503 // No idea what to do.
1504 return false;
1505 case TargetOpcode::G_VAARG:
1506 return legalizeVaArg(MI, MRI, MIRBuilder);
1507 case TargetOpcode::G_LOAD:
1508 case TargetOpcode::G_STORE:
1509 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1510 case TargetOpcode::G_SHL:
1511 case TargetOpcode::G_ASHR:
1512 case TargetOpcode::G_LSHR:
1513 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1514 case TargetOpcode::G_GLOBAL_VALUE:
1515 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1516 case TargetOpcode::G_SBFX:
1517 case TargetOpcode::G_UBFX:
1518 return legalizeBitfieldExtract(MI, MRI, Helper);
1519 case TargetOpcode::G_FSHL:
1520 case TargetOpcode::G_FSHR:
1521 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1522 case TargetOpcode::G_ROTR:
1523 return legalizeRotate(MI, MRI, Helper);
1524 case TargetOpcode::G_CTPOP:
1525 return legalizeCTPOP(MI, MRI, Helper);
1526 case TargetOpcode::G_ATOMIC_CMPXCHG:
1527 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1528 case TargetOpcode::G_CTTZ:
1529 return legalizeCTTZ(MI, Helper);
1530 case TargetOpcode::G_BZERO:
1531 case TargetOpcode::G_MEMCPY:
1532 case TargetOpcode::G_MEMMOVE:
1533 case TargetOpcode::G_MEMSET:
1534 return legalizeMemOps(MI, Helper);
1535 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1536 return legalizeExtractVectorElt(MI, MRI, Helper);
1537 case TargetOpcode::G_DYN_STACKALLOC:
1538 return legalizeDynStackAlloc(MI, Helper);
1539 case TargetOpcode::G_PREFETCH:
1540 return legalizePrefetch(MI, Helper);
1541 case TargetOpcode::G_ABS:
1542 return Helper.lowerAbsToCNeg(MI);
1543 case TargetOpcode::G_ICMP:
1544 return legalizeICMP(MI, MRI, MIRBuilder);
1545 case TargetOpcode::G_BITCAST:
1546 return legalizeBitcast(MI, Helper);
1547 case TargetOpcode::G_CONCAT_VECTORS:
1548 return legalizeConcatVectors(MI, MRI, MIRBuilder);
1549 case TargetOpcode::G_FPTRUNC:
1550 // In order to lower f16 to f64 properly, we need to use f32 as an
1551 // intermediary
1552 return legalizeFptrunc(MI, MIRBuilder, MRI);
1553 }
1554
1555 llvm_unreachable("expected switch to return");
1556}
1557
1558bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1559 LegalizerHelper &Helper) const {
1560 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1561 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1562 // We're trying to handle casts from i1 vectors to scalars but reloading from
1563 // stack.
1564 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1565 SrcTy.getElementType() != LLT::scalar(1))
1566 return false;
1567
1568 Helper.createStackStoreLoad(DstReg, SrcReg);
1569 MI.eraseFromParent();
1570 return true;
1571}
1572
1573bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1575 MachineIRBuilder &MIRBuilder,
1576 GISelChangeObserver &Observer,
1577 LegalizerHelper &Helper) const {
1578 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1579 MI.getOpcode() == TargetOpcode::G_FSHR);
1580
1581 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1582 // lowering
1583 Register ShiftNo = MI.getOperand(3).getReg();
1584 LLT ShiftTy = MRI.getType(ShiftNo);
1585 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1586
1587 // Adjust shift amount according to Opcode (FSHL/FSHR)
1588 // Convert FSHL to FSHR
1589 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1590 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1591
1592 // Lower non-constant shifts and leave zero shifts to the optimizer.
1593 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1594 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1596
1597 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1598
1599 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1600
1601 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1602 // in the range of 0 <-> BitWidth, it is legal
1603 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1604 VRegAndVal->Value.ult(BitWidth))
1605 return true;
1606
1607 // Cast the ShiftNumber to a 64-bit type
1608 auto Cast64 = MIRBuilder.buildConstant(LLT::integer(64), Amount.zext(64));
1609
1610 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1611 Observer.changingInstr(MI);
1612 MI.getOperand(3).setReg(Cast64.getReg(0));
1613 Observer.changedInstr(MI);
1614 }
1615 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1616 // instruction
1617 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1618 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1619 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1620 Cast64.getReg(0)});
1621 MI.eraseFromParent();
1622 }
1623 return true;
1624}
1625
1626bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1628 MachineIRBuilder &MIRBuilder) const {
1629 Register DstReg = MI.getOperand(0).getReg();
1630 Register SrcReg1 = MI.getOperand(2).getReg();
1631 Register SrcReg2 = MI.getOperand(3).getReg();
1632 LLT DstTy = MRI.getType(DstReg);
1633 LLT SrcTy = MRI.getType(SrcReg1);
1634
1635 // Check the vector types are legal
1636 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1637 DstTy.getNumElements() != SrcTy.getNumElements() ||
1638 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1639 return false;
1640
1641 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1642 // following passes
1643 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1644 if (Pred != CmpInst::ICMP_NE)
1645 return true;
1646 Register CmpReg =
1647 MIRBuilder
1648 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1649 .getReg(0);
1650 MIRBuilder.buildNot(DstReg, CmpReg);
1651
1652 MI.eraseFromParent();
1653 return true;
1654}
1655
1656bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1658 LegalizerHelper &Helper) const {
1659 // To allow for imported patterns to match, we ensure that the rotate amount
1660 // is 64b with an extension.
1661 Register AmtReg = MI.getOperand(2).getReg();
1662 LLT AmtTy = MRI.getType(AmtReg);
1663 (void)AmtTy;
1664 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1665 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1666 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::integer(64), AmtReg);
1667 Helper.Observer.changingInstr(MI);
1668 MI.getOperand(2).setReg(NewAmt.getReg(0));
1669 Helper.Observer.changedInstr(MI);
1670 return true;
1671}
1672
1673bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1675 GISelChangeObserver &Observer) const {
1676 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1677 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1678 // G_ADD_LOW instructions.
1679 // By splitting this here, we can optimize accesses in the small code model by
1680 // folding in the G_ADD_LOW into the load/store offset.
1681 auto &GlobalOp = MI.getOperand(1);
1682 // Don't modify an intrinsic call.
1683 if (GlobalOp.isSymbol())
1684 return true;
1685 const auto* GV = GlobalOp.getGlobal();
1686 if (GV->isThreadLocal())
1687 return true; // Don't want to modify TLS vars.
1688
1689 auto &TM = ST->getTargetLowering()->getTargetMachine();
1690 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1691
1692 if (OpFlags & AArch64II::MO_GOT)
1693 return true;
1694
1695 auto Offset = GlobalOp.getOffset();
1696 Register DstReg = MI.getOperand(0).getReg();
1697 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1698 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1699 // Set the regclass on the dest reg too.
1700 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1701
1702 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1703 // by creating a MOVK that sets bits 48-63 of the register to (global address
1704 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1705 // prevent an incorrect tag being generated during relocation when the
1706 // global appears before the code section. Without the offset, a global at
1707 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1708 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1709 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1710 // instead of `0xf`.
1711 // This assumes that we're in the small code model so we can assume a binary
1712 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1713 // binary must also be loaded into address range [0, 2^48). Both of these
1714 // properties need to be ensured at runtime when using tagged addresses.
1715 if (OpFlags & AArch64II::MO_TAGGED) {
1716 assert(!Offset &&
1717 "Should not have folded in an offset for a tagged global!");
1718 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1719 .addGlobalAddress(GV, 0x100000000,
1721 .addImm(48);
1722 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1723 }
1724
1725 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1726 .addGlobalAddress(GV, Offset,
1728 MI.eraseFromParent();
1729 return true;
1730}
1731
1733 MachineInstr &MI) const {
1734 MachineIRBuilder &MIB = Helper.MIRBuilder;
1735 MachineRegisterInfo &MRI = *MIB.getMRI();
1736
1737 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1738 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1739 MI.eraseFromParent();
1740 return true;
1741 };
1742 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1743 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1744 {MI.getOperand(2), MI.getOperand(3)});
1745 MI.eraseFromParent();
1746 return true;
1747 };
1748 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1749 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1750 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1751 MI.eraseFromParent();
1752 return true;
1753 };
1754
1755 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1756 switch (IntrinsicID) {
1757 case Intrinsic::vacopy: {
1758 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1759 unsigned VaListSize =
1760 (ST->isTargetDarwin() || ST->isTargetWindows())
1761 ? PtrSize
1762 : ST->isTargetILP32() ? 20 : 32;
1763
1764 MachineFunction &MF = *MI.getMF();
1766 LLT::scalar(VaListSize * 8));
1767 MIB.buildLoad(Val, MI.getOperand(2),
1770 VaListSize, Align(PtrSize)));
1771 MIB.buildStore(Val, MI.getOperand(1),
1774 VaListSize, Align(PtrSize)));
1775 MI.eraseFromParent();
1776 return true;
1777 }
1778 case Intrinsic::get_dynamic_area_offset: {
1779 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1780 MI.eraseFromParent();
1781 return true;
1782 }
1783 case Intrinsic::aarch64_mops_memset_tag: {
1784 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1785 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1786 // the instruction).
1787 auto &Value = MI.getOperand(3);
1788 Register ExtValueReg = MIB.buildAnyExt(LLT::integer(64), Value).getReg(0);
1789 Value.setReg(ExtValueReg);
1790 return true;
1791 }
1792 case Intrinsic::aarch64_prefetch: {
1793 auto &AddrVal = MI.getOperand(1);
1794
1795 int64_t IsWrite = MI.getOperand(2).getImm();
1796 int64_t Target = MI.getOperand(3).getImm();
1797 int64_t IsStream = MI.getOperand(4).getImm();
1798 int64_t IsData = MI.getOperand(5).getImm();
1799
1800 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1801 (!IsData << 3) | // IsDataCache bit
1802 (Target << 1) | // Cache level bits
1803 (unsigned)IsStream; // Stream bit
1804
1805 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1806 MI.eraseFromParent();
1807 return true;
1808 }
1809 case Intrinsic::aarch64_range_prefetch: {
1810 auto &AddrVal = MI.getOperand(1);
1811
1812 int64_t IsWrite = MI.getOperand(2).getImm();
1813 int64_t IsStream = MI.getOperand(3).getImm();
1814 unsigned PrfOp = (IsStream << 2) | IsWrite;
1815
1816 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1817 .addImm(PrfOp)
1818 .add(AddrVal)
1819 .addUse(MI.getOperand(4).getReg()); // Metadata
1820 MI.eraseFromParent();
1821 return true;
1822 }
1823 case Intrinsic::aarch64_prefetch_ir: {
1824 auto &AddrVal = MI.getOperand(1);
1825 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1826 MI.eraseFromParent();
1827 return true;
1828 }
1829 case Intrinsic::aarch64_neon_uaddv:
1830 case Intrinsic::aarch64_neon_saddv:
1831 case Intrinsic::aarch64_neon_umaxv:
1832 case Intrinsic::aarch64_neon_smaxv:
1833 case Intrinsic::aarch64_neon_uminv:
1834 case Intrinsic::aarch64_neon_sminv: {
1835 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1836 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1837 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1838
1839 auto OldDst = MI.getOperand(0).getReg();
1840 auto OldDstTy = MRI.getType(OldDst);
1841 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1842 if (OldDstTy == NewDstTy)
1843 return true;
1844
1845 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1846
1847 Helper.Observer.changingInstr(MI);
1848 MI.getOperand(0).setReg(NewDst);
1849 Helper.Observer.changedInstr(MI);
1850
1851 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1852 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1853 OldDst, NewDst);
1854
1855 return true;
1856 }
1857 case Intrinsic::aarch64_neon_uaddlp:
1858 case Intrinsic::aarch64_neon_saddlp: {
1859 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1860 ? AArch64::G_UADDLP
1861 : AArch64::G_SADDLP;
1862 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1863 MI.eraseFromParent();
1864
1865 return true;
1866 }
1867 case Intrinsic::aarch64_neon_uaddlv:
1868 case Intrinsic::aarch64_neon_saddlv: {
1869 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1870 ? AArch64::G_UADDLV
1871 : AArch64::G_SADDLV;
1872 Register DstReg = MI.getOperand(0).getReg();
1873 Register SrcReg = MI.getOperand(2).getReg();
1874 LLT DstTy = MRI.getType(DstReg);
1875
1876 LLT MidTy, ExtTy;
1877 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1878 ExtTy = LLT::integer(32);
1879 MidTy = LLT::fixed_vector(4, ExtTy);
1880 } else {
1881 ExtTy = LLT::integer(64);
1882 MidTy = LLT::fixed_vector(2, ExtTy);
1883 }
1884
1885 Register MidReg =
1886 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1887 Register ZeroReg =
1888 MIB.buildConstant(LLT::integer(64), 0)->getOperand(0).getReg();
1889 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1890 {MidReg, ZeroReg})
1891 .getReg(0);
1892
1893 if (DstTy.getScalarSizeInBits() < 32)
1894 MIB.buildTrunc(DstReg, ExtReg);
1895 else
1896 MIB.buildCopy(DstReg, ExtReg);
1897
1898 MI.eraseFromParent();
1899
1900 return true;
1901 }
1902 case Intrinsic::aarch64_neon_smax:
1903 return LowerBinOp(TargetOpcode::G_SMAX);
1904 case Intrinsic::aarch64_neon_smin:
1905 return LowerBinOp(TargetOpcode::G_SMIN);
1906 case Intrinsic::aarch64_neon_umax:
1907 return LowerBinOp(TargetOpcode::G_UMAX);
1908 case Intrinsic::aarch64_neon_umin:
1909 return LowerBinOp(TargetOpcode::G_UMIN);
1910 case Intrinsic::aarch64_neon_fmax:
1911 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1912 case Intrinsic::aarch64_neon_fmin:
1913 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1914 case Intrinsic::aarch64_neon_fmaxnm:
1915 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1916 case Intrinsic::aarch64_neon_fminnm:
1917 return LowerBinOp(TargetOpcode::G_FMINNUM);
1918 case Intrinsic::aarch64_neon_pmull:
1919 case Intrinsic::aarch64_neon_pmull64:
1920 return LowerBinOp(AArch64::G_PMULL);
1921 case Intrinsic::aarch64_neon_smull:
1922 return LowerBinOp(AArch64::G_SMULL);
1923 case Intrinsic::aarch64_neon_umull:
1924 return LowerBinOp(AArch64::G_UMULL);
1925 case Intrinsic::aarch64_neon_sabd:
1926 return LowerBinOp(TargetOpcode::G_ABDS);
1927 case Intrinsic::aarch64_neon_uabd:
1928 return LowerBinOp(TargetOpcode::G_ABDU);
1929 case Intrinsic::aarch64_neon_uhadd:
1930 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1931 case Intrinsic::aarch64_neon_urhadd:
1932 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1933 case Intrinsic::aarch64_neon_shadd:
1934 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1935 case Intrinsic::aarch64_neon_srhadd:
1936 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1937 case Intrinsic::aarch64_neon_sqshrn: {
1938 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1939 return true;
1940 // Create right shift instruction. Store the output register in Shr.
1941 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1942 {MRI.getType(MI.getOperand(2).getReg())},
1943 {MI.getOperand(2), MI.getOperand(3).getImm()});
1944 // Build the narrow intrinsic, taking in Shr.
1945 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1946 MI.eraseFromParent();
1947 return true;
1948 }
1949 case Intrinsic::aarch64_neon_sqshrun: {
1950 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1951 return true;
1952 // Create right shift instruction. Store the output register in Shr.
1953 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1954 {MRI.getType(MI.getOperand(2).getReg())},
1955 {MI.getOperand(2), MI.getOperand(3).getImm()});
1956 // Build the narrow intrinsic, taking in Shr.
1957 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1958 MI.eraseFromParent();
1959 return true;
1960 }
1961 case Intrinsic::aarch64_neon_sqrshrn: {
1962 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1963 return true;
1964 // Create right shift instruction. Store the output register in Shr.
1965 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1966 {MRI.getType(MI.getOperand(2).getReg())},
1967 {MI.getOperand(2), MI.getOperand(3).getImm()});
1968 // Build the narrow intrinsic, taking in Shr.
1969 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1970 MI.eraseFromParent();
1971 return true;
1972 }
1973 case Intrinsic::aarch64_neon_sqrshrun: {
1974 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1975 return true;
1976 // Create right shift instruction. Store the output register in Shr.
1977 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1978 {MRI.getType(MI.getOperand(2).getReg())},
1979 {MI.getOperand(2), MI.getOperand(3).getImm()});
1980 // Build the narrow intrinsic, taking in Shr.
1981 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1982 MI.eraseFromParent();
1983 return true;
1984 }
1985 case Intrinsic::aarch64_neon_uqrshrn: {
1986 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1987 return true;
1988 // Create right shift instruction. Store the output register in Shr.
1989 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
1990 {MRI.getType(MI.getOperand(2).getReg())},
1991 {MI.getOperand(2), MI.getOperand(3).getImm()});
1992 // Build the narrow intrinsic, taking in Shr.
1993 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1994 MI.eraseFromParent();
1995 return true;
1996 }
1997 case Intrinsic::aarch64_neon_uqshrn: {
1998 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1999 return true;
2000 // Create right shift instruction. Store the output register in Shr.
2001 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
2002 {MRI.getType(MI.getOperand(2).getReg())},
2003 {MI.getOperand(2), MI.getOperand(3).getImm()});
2004 // Build the narrow intrinsic, taking in Shr.
2005 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2006 MI.eraseFromParent();
2007 return true;
2008 }
2009 case Intrinsic::aarch64_neon_sqshlu: {
2010 // Check if last operand is constant vector dup
2011 auto ShiftAmount = isConstantOrConstantSplatVector(
2012 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
2013 if (ShiftAmount) {
2014 // If so, create a new intrinsic with the correct shift amount
2015 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
2016 {MI.getOperand(2)})
2017 .addImm(ShiftAmount->getSExtValue());
2018 MI.eraseFromParent();
2019 return true;
2020 }
2021 return false;
2022 }
2023 case Intrinsic::aarch64_neon_vsli: {
2024 MIB.buildInstr(
2025 AArch64::G_SLI, {MI.getOperand(0)},
2026 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2027 MI.eraseFromParent();
2028 break;
2029 }
2030 case Intrinsic::aarch64_neon_vsri: {
2031 MIB.buildInstr(
2032 AArch64::G_SRI, {MI.getOperand(0)},
2033 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2034 MI.eraseFromParent();
2035 break;
2036 }
2037 case Intrinsic::aarch64_neon_abs: {
2038 // Lower the intrinsic to G_ABS.
2039 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
2040 MI.eraseFromParent();
2041 return true;
2042 }
2043 case Intrinsic::aarch64_neon_sqadd: {
2044 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2045 return LowerBinOp(TargetOpcode::G_SADDSAT);
2046 break;
2047 }
2048 case Intrinsic::aarch64_neon_sqsub: {
2049 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2050 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2051 break;
2052 }
2053 case Intrinsic::aarch64_neon_uqadd: {
2054 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2055 return LowerBinOp(TargetOpcode::G_UADDSAT);
2056 break;
2057 }
2058 case Intrinsic::aarch64_neon_uqsub: {
2059 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2060 return LowerBinOp(TargetOpcode::G_USUBSAT);
2061 break;
2062 }
2063 case Intrinsic::aarch64_neon_udot:
2064 return LowerTriOp(AArch64::G_UDOT);
2065 case Intrinsic::aarch64_neon_sdot:
2066 return LowerTriOp(AArch64::G_SDOT);
2067 case Intrinsic::aarch64_neon_usdot:
2068 return LowerTriOp(AArch64::G_USDOT);
2069 case Intrinsic::aarch64_neon_sqxtn:
2070 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2071 case Intrinsic::aarch64_neon_sqxtun:
2072 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2073 case Intrinsic::aarch64_neon_uqxtn:
2074 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2075 case Intrinsic::aarch64_neon_fcvtzu:
2076 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2077 case Intrinsic::aarch64_neon_fcvtzs:
2078 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2079
2080 case Intrinsic::vector_reverse:
2081 // TODO: Add support for vector_reverse
2082 return false;
2083 }
2084
2085 return true;
2086}
2087
2088bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2090 GISelChangeObserver &Observer) const {
2091 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2092 MI.getOpcode() == TargetOpcode::G_LSHR ||
2093 MI.getOpcode() == TargetOpcode::G_SHL);
2094 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2095 // imported patterns can select it later. Either way, it will be legal.
2096 Register AmtReg = MI.getOperand(2).getReg();
2097 LLT AmtRegEltTy = MRI.getType(AmtReg).getScalarType();
2098 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2099 if (!VRegAndVal)
2100 return true;
2101 // Check the shift amount is in range for an immediate form.
2102 int64_t Amount = VRegAndVal->Value.getSExtValue();
2103 if (Amount > 31)
2104 return true; // This will have to remain a register variant.
2105 auto ExtCst =
2106 MIRBuilder.buildConstant(AmtRegEltTy.changeElementSize(64), Amount);
2107 Observer.changingInstr(MI);
2108 MI.getOperand(2).setReg(ExtCst.getReg(0));
2109 Observer.changedInstr(MI);
2110 return true;
2111}
2112
2114 MachineRegisterInfo &MRI) {
2115 Base = Root;
2116 Offset = 0;
2117
2118 Register NewBase;
2119 int64_t NewOffset;
2120 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2121 isShiftedInt<7, 3>(NewOffset)) {
2122 Base = NewBase;
2123 Offset = NewOffset;
2124 }
2125}
2126
2127// FIXME: This should be removed and replaced with the generic bitcast legalize
2128// action.
2129bool AArch64LegalizerInfo::legalizeLoadStore(
2131 GISelChangeObserver &Observer) const {
2132 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2133 MI.getOpcode() == TargetOpcode::G_LOAD);
2134 // Here we just try to handle vector loads/stores where our value type might
2135 // have pointer elements, which the SelectionDAG importer can't handle. To
2136 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2137 // the value to use s64 types.
2138
2139 // Custom legalization requires the instruction, if not deleted, must be fully
2140 // legalized. In order to allow further legalization of the inst, we create
2141 // a new instruction and erase the existing one.
2142
2143 Register ValReg = MI.getOperand(0).getReg();
2144 const LLT ValTy = MRI.getType(ValReg);
2145
2146 if (ValTy == LLT::scalar(128)) {
2147
2148 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2149 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2150 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2151 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2152 bool IsRcpC3 =
2153 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2154
2155 LLT s64 = LLT::integer(64);
2156
2157 unsigned Opcode;
2158 if (IsRcpC3) {
2159 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2160 } else {
2161 // For LSE2, loads/stores should have been converted to monotonic and had
2162 // a fence inserted after them.
2163 assert(Ordering == AtomicOrdering::Monotonic ||
2164 Ordering == AtomicOrdering::Unordered);
2165 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2166
2167 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2168 }
2169
2170 MachineInstrBuilder NewI;
2171 if (IsLoad) {
2172 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2173 MIRBuilder.buildMergeLikeInstr(
2174 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2175 } else {
2176 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2177 NewI = MIRBuilder.buildInstr(
2178 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2179 }
2180
2181 if (IsRcpC3) {
2182 NewI.addUse(MI.getOperand(1).getReg());
2183 } else {
2184 Register Base;
2185 int Offset;
2186 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2187 NewI.addUse(Base);
2188 NewI.addImm(Offset / 8);
2189 }
2190
2191 NewI.cloneMemRefs(MI);
2192 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2193 *MRI.getTargetRegisterInfo(),
2194 *ST->getRegBankInfo());
2195 MI.eraseFromParent();
2196 return true;
2197 }
2198
2199 if (!ValTy.isPointerVector() ||
2200 ValTy.getElementType().getAddressSpace() != 0) {
2201 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2202 return false;
2203 }
2204
2205 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2206 const LLT NewTy = LLT::vector(ValTy.getElementCount(), LLT::integer(PtrSize));
2207 auto &MMO = **MI.memoperands_begin();
2208 MMO.setType(NewTy);
2209
2210 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2211 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2212 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2213 } else {
2214 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2215 MIRBuilder.buildBitcast(ValReg, NewLoad);
2216 }
2217 MI.eraseFromParent();
2218 return true;
2219}
2220
2221bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2223 MachineIRBuilder &MIRBuilder) const {
2224 MachineFunction &MF = MIRBuilder.getMF();
2225 Align Alignment(MI.getOperand(2).getImm());
2226 Register Dst = MI.getOperand(0).getReg();
2227 Register ListPtr = MI.getOperand(1).getReg();
2228
2229 LLT PtrTy = MRI.getType(ListPtr);
2230 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2231
2232 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2233 const Align PtrAlign = Align(PtrSize);
2234 auto List = MIRBuilder.buildLoad(
2235 PtrTy, ListPtr,
2236 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2237 PtrTy, PtrAlign));
2238
2239 MachineInstrBuilder DstPtr;
2240 if (Alignment > PtrAlign) {
2241 // Realign the list to the actual required alignment.
2242 auto AlignMinus1 =
2243 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2244 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2245 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2246 } else
2247 DstPtr = List;
2248
2249 LLT ValTy = MRI.getType(Dst);
2250 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2251 MIRBuilder.buildLoad(
2252 Dst, DstPtr,
2253 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2254 ValTy, std::max(Alignment, PtrAlign)));
2255
2256 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2257
2258 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2259
2260 MIRBuilder.buildStore(NewList, ListPtr,
2261 *MF.getMachineMemOperand(MachinePointerInfo(),
2263 PtrTy, PtrAlign));
2264
2265 MI.eraseFromParent();
2266 return true;
2267}
2268
2269bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2270 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2271 // Only legal if we can select immediate forms.
2272 // TODO: Lower this otherwise.
2273 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2274 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2275}
2276
2277bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2279 LegalizerHelper &Helper) const {
2280 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2281 // it can be more efficiently lowered to the following sequence that uses
2282 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2283 // registers are cheap.
2284 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2285 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2286 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2287 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2288 //
2289 // For 128 bit vector popcounts, we lower to the following sequence:
2290 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2291 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2292 // uaddlp.4s v0, v0 // v4s32, v2s64
2293 // uaddlp.2d v0, v0 // v2s64
2294 //
2295 // For 64 bit vector popcounts, we lower to the following sequence:
2296 // cnt.8b v0, v0 // v4s16, v2s32
2297 // uaddlp.4h v0, v0 // v4s16, v2s32
2298 // uaddlp.2s v0, v0 // v2s32
2299
2300 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2301 Register Dst = MI.getOperand(0).getReg();
2302 Register Val = MI.getOperand(1).getReg();
2303 LLT Ty = MRI.getType(Val);
2304
2305 LLT i64 = LLT::integer(64);
2306 LLT i32 = LLT::integer(32);
2307 LLT i16 = LLT::integer(16);
2308 LLT i8 = LLT::integer(8);
2309 unsigned Size = Ty.getSizeInBits();
2310
2311 assert(Ty == MRI.getType(Dst) &&
2312 "Expected src and dst to have the same type!");
2313
2314 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2315
2316 auto Split = MIRBuilder.buildUnmerge(i64, Val);
2317 auto CTPOP1 = MIRBuilder.buildCTPOP(i64, Split->getOperand(0));
2318 auto CTPOP2 = MIRBuilder.buildCTPOP(i64, Split->getOperand(1));
2319 auto Add = MIRBuilder.buildAdd(i64, CTPOP1, CTPOP2);
2320
2321 MIRBuilder.buildZExt(Dst, Add);
2322 MI.eraseFromParent();
2323 return true;
2324 }
2325
2326 if (!ST->hasNEON() ||
2327 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2328 // Use generic lowering when custom lowering is not possible.
2329 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2330 Helper.lowerBitCount(MI) ==
2332 }
2333
2334 // Pre-conditioning: widen Val up to the nearest vector type.
2335 // s32,s64,v4s16,v2s32 -> v8i8
2336 // v8s16,v4s32,v2s64 -> v16i8
2337 LLT VTy = Size == 128 ? LLT::fixed_vector(16, i8) : LLT::fixed_vector(8, i8);
2338 if (Ty.isScalar()) {
2339 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2340 if (Size == 32) {
2341 Val = MIRBuilder.buildZExt(i64, Val).getReg(0);
2342 }
2343 }
2344 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2345
2346 // Count bits in each byte-sized lane.
2347 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2348
2349 // Sum across lanes.
2350 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2351 Ty.getScalarSizeInBits() != 16) {
2352 LLT Dt = Ty == LLT::fixed_vector(2, i64) ? LLT::fixed_vector(4, i32) : Ty;
2353 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2354 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2355 MachineInstrBuilder Sum;
2356
2357 if (Ty == LLT::fixed_vector(2, i64)) {
2358 auto UDOT =
2359 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2360 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2361 } else if (Ty == LLT::fixed_vector(4, i32)) {
2362 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2363 } else if (Ty == LLT::fixed_vector(2, i32)) {
2364 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2365 } else {
2366 llvm_unreachable("unexpected vector shape");
2367 }
2368
2369 Sum->getOperand(0).setReg(Dst);
2370 MI.eraseFromParent();
2371 return true;
2372 }
2373
2374 Register HSum = CTPOP.getReg(0);
2375 unsigned Opc;
2376 SmallVector<LLT> HAddTys;
2377 if (Ty.isScalar()) {
2378 Opc = Intrinsic::aarch64_neon_uaddlv;
2379 HAddTys.push_back(i32);
2380 } else if (Ty == LLT::fixed_vector(8, i16)) {
2381 Opc = Intrinsic::aarch64_neon_uaddlp;
2382 HAddTys.push_back(LLT::fixed_vector(8, i16));
2383 } else if (Ty == LLT::fixed_vector(4, i32)) {
2384 Opc = Intrinsic::aarch64_neon_uaddlp;
2385 HAddTys.push_back(LLT::fixed_vector(8, i16));
2386 HAddTys.push_back(LLT::fixed_vector(4, i32));
2387 } else if (Ty == LLT::fixed_vector(2, i64)) {
2388 Opc = Intrinsic::aarch64_neon_uaddlp;
2389 HAddTys.push_back(LLT::fixed_vector(8, i16));
2390 HAddTys.push_back(LLT::fixed_vector(4, i32));
2391 HAddTys.push_back(LLT::fixed_vector(2, i64));
2392 } else if (Ty == LLT::fixed_vector(4, i16)) {
2393 Opc = Intrinsic::aarch64_neon_uaddlp;
2394 HAddTys.push_back(LLT::fixed_vector(4, i16));
2395 } else if (Ty == LLT::fixed_vector(2, i32)) {
2396 Opc = Intrinsic::aarch64_neon_uaddlp;
2397 HAddTys.push_back(LLT::fixed_vector(4, i16));
2398 HAddTys.push_back(LLT::fixed_vector(2, i32));
2399 } else
2400 llvm_unreachable("unexpected vector shape");
2402 for (LLT HTy : HAddTys) {
2403 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2404 HSum = UADD.getReg(0);
2405 }
2406
2407 // Post-conditioning.
2408 if (Ty.isScalar() && (Size == 64 || Size == 128))
2409 MIRBuilder.buildZExt(Dst, UADD);
2410 else
2411 UADD->getOperand(0).setReg(Dst);
2412 MI.eraseFromParent();
2413 return true;
2414}
2415
2416bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2417 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2418 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2419 LLT i64 = LLT::integer(64);
2420 auto Addr = MI.getOperand(1).getReg();
2421 auto DesiredI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(2));
2422 auto NewI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(3));
2423 auto DstLo = MRI.createGenericVirtualRegister(i64);
2424 auto DstHi = MRI.createGenericVirtualRegister(i64);
2425
2426 MachineInstrBuilder CAS;
2427 if (ST->hasLSE()) {
2428 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2429 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2430 // the rest of the MIR so we must reassemble the extracted registers into a
2431 // 128-bit known-regclass one with code like this:
2432 //
2433 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2434 // %out = CASP %in1, ...
2435 // %OldLo = G_EXTRACT %out, 0
2436 // %OldHi = G_EXTRACT %out, 64
2437 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2438 unsigned Opcode;
2439 switch (Ordering) {
2441 Opcode = AArch64::CASPAX;
2442 break;
2444 Opcode = AArch64::CASPLX;
2445 break;
2448 Opcode = AArch64::CASPALX;
2449 break;
2450 default:
2451 Opcode = AArch64::CASPX;
2452 break;
2453 }
2454
2455 LLT s128 = LLT::scalar(128);
2456 auto CASDst = MRI.createGenericVirtualRegister(s128);
2457 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2458 auto CASNew = MRI.createGenericVirtualRegister(s128);
2459 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2460 .addUse(DesiredI->getOperand(0).getReg())
2461 .addImm(AArch64::sube64)
2462 .addUse(DesiredI->getOperand(1).getReg())
2463 .addImm(AArch64::subo64);
2464 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2465 .addUse(NewI->getOperand(0).getReg())
2466 .addImm(AArch64::sube64)
2467 .addUse(NewI->getOperand(1).getReg())
2468 .addImm(AArch64::subo64);
2469
2470 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2471
2472 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2473 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2474 } else {
2475 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2476 // can take arbitrary registers so it just has the normal GPR64 operands the
2477 // rest of AArch64 is expecting.
2478 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2479 unsigned Opcode;
2480 switch (Ordering) {
2482 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2483 break;
2485 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2486 break;
2489 Opcode = AArch64::CMP_SWAP_128;
2490 break;
2491 default:
2492 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2493 break;
2494 }
2495
2496 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2497 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2498 {Addr, DesiredI->getOperand(0),
2499 DesiredI->getOperand(1), NewI->getOperand(0),
2500 NewI->getOperand(1)});
2501 }
2502
2503 CAS.cloneMemRefs(MI);
2504 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2505 *MRI.getTargetRegisterInfo(),
2506 *ST->getRegBankInfo());
2507
2508 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2509 MI.eraseFromParent();
2510 return true;
2511}
2512
2513bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2514 LegalizerHelper &Helper) const {
2515 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2516 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2517 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2518 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2519 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2520 MI.eraseFromParent();
2521 return true;
2522}
2523
2524bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2525 LegalizerHelper &Helper) const {
2526 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2527
2528 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2529 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2530 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2531 // the instruction).
2532 auto &Value = MI.getOperand(1);
2533 Register ExtValueReg =
2534 MIRBuilder.buildAnyExt(LLT::integer(64), Value).getReg(0);
2535 Value.setReg(ExtValueReg);
2536 return true;
2537 }
2538
2539 return false;
2540}
2541
2542bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2543 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2544 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2545 auto VRegAndVal =
2547 if (VRegAndVal)
2548 return true;
2549 LLT VecTy = MRI.getType(Element->getVectorReg());
2550 if (VecTy.isScalableVector())
2551 return true;
2552 return Helper.lowerExtractInsertVectorElt(MI) !=
2554}
2555
2556bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2557 MachineInstr &MI, LegalizerHelper &Helper) const {
2558 MachineFunction &MF = *MI.getParent()->getParent();
2559 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2560 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2561
2562 // If stack probing is not enabled for this function, use the default
2563 // lowering.
2564 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2565 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2566 "inline-asm") {
2567 Helper.lowerDynStackAlloc(MI);
2568 return true;
2569 }
2570
2571 Register Dst = MI.getOperand(0).getReg();
2572 Register AllocSize = MI.getOperand(1).getReg();
2573 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2574
2575 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2576 "Unexpected type for dynamic alloca");
2577 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2578 "Unexpected type for dynamic alloca");
2579
2580 LLT PtrTy = MRI.getType(Dst);
2581 Register SPReg =
2583 Register SPTmp =
2584 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2585 auto NewMI =
2586 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2587 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2588 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2589 MIRBuilder.buildCopy(Dst, SPTmp);
2590
2591 MI.eraseFromParent();
2592 return true;
2593}
2594
2595bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2596 LegalizerHelper &Helper) const {
2597 MachineIRBuilder &MIB = Helper.MIRBuilder;
2598 auto &AddrVal = MI.getOperand(0);
2599
2600 int64_t IsWrite = MI.getOperand(1).getImm();
2601 int64_t Locality = MI.getOperand(2).getImm();
2602 int64_t IsData = MI.getOperand(3).getImm();
2603
2604 bool IsStream = Locality == 0;
2605 if (Locality != 0) {
2606 assert(Locality <= 3 && "Prefetch locality out-of-range");
2607 // The locality degree is the opposite of the cache speed.
2608 // Put the number the other way around.
2609 // The encoding starts at 0 for level 1
2610 Locality = 3 - Locality;
2611 }
2612
2613 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2614
2615 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2616 MI.eraseFromParent();
2617 return true;
2618}
2619
2620bool AArch64LegalizerInfo::legalizeConcatVectors(
2622 MachineIRBuilder &MIRBuilder) const {
2623 // Widen sub-byte element vectors to byte-sized elements before concatenating.
2624 // This is analogous to SDAG's integer type promotion for sub-byte types.
2626 Register DstReg = Concat.getReg(0);
2627 LLT DstTy = MRI.getType(DstReg);
2628 assert(DstTy.getScalarSizeInBits() < 8 && "Expected dst ty to be < 8b");
2629
2630 unsigned WideEltSize =
2631 std::max(8u, (unsigned)PowerOf2Ceil(DstTy.getScalarSizeInBits()));
2632 LLT SrcTy = MRI.getType(Concat.getSourceReg(0));
2633 LLT WideSrcTy = SrcTy.changeElementSize(WideEltSize);
2634 LLT WideDstTy = DstTy.changeElementSize(WideEltSize);
2635
2636 SmallVector<Register> WideSrcs;
2637 for (unsigned I = 0; I < Concat.getNumSources(); ++I) {
2638 auto Wide = MIRBuilder.buildAnyExt(WideSrcTy, Concat.getSourceReg(I));
2639 WideSrcs.push_back(Wide.getReg(0));
2640 }
2641
2642 auto WideConcat = MIRBuilder.buildConcatVectors(WideDstTy, WideSrcs);
2643 MIRBuilder.buildTrunc(DstReg, WideConcat);
2644 MI.eraseFromParent();
2645 return true;
2646}
2647
2648bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2649 MachineIRBuilder &MIRBuilder,
2650 MachineRegisterInfo &MRI) const {
2651 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2652 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2653 "Expected a power of 2 elements");
2654
2655 // We must mutate types here as FPTrunc may be used on a IEEE floating point
2656 // or a brainfloat.
2657 LLT v2s16 = DstTy.changeElementCount(2);
2658 LLT v4s16 = DstTy.changeElementCount(4);
2659 LLT v2s32 = SrcTy.changeElementCount(2).changeElementSize(32);
2660 LLT v4s32 = SrcTy.changeElementCount(4).changeElementSize(32);
2661 LLT v2s64 = SrcTy.changeElementCount(2);
2662
2663 SmallVector<Register> RegsToUnmergeTo;
2664 SmallVector<Register> TruncOddDstRegs;
2665 SmallVector<Register> RegsToMerge;
2666
2667 unsigned ElemCount = SrcTy.getNumElements();
2668
2669 // Find the biggest size chunks we can work with
2670 int StepSize = ElemCount % 4 ? 2 : 4;
2671
2672 // If we have a power of 2 greater than 2, we need to first unmerge into
2673 // enough pieces
2674 if (ElemCount <= 2)
2675 RegsToUnmergeTo.push_back(Src);
2676 else {
2677 for (unsigned i = 0; i < ElemCount / 2; ++i)
2678 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2679
2680 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2681 }
2682
2683 // Create all of the round-to-odd instructions and store them
2684 for (auto SrcReg : RegsToUnmergeTo) {
2685 Register Mid =
2686 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2687 .getReg(0);
2688 TruncOddDstRegs.push_back(Mid);
2689 }
2690
2691 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2692 // truncate 2s32 to 2s16.
2693 unsigned Index = 0;
2694 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2695 if (StepSize == 4) {
2696 Register ConcatDst =
2697 MIRBuilder
2699 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2700 .getReg(0);
2701
2702 RegsToMerge.push_back(
2703 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2704 } else {
2705 RegsToMerge.push_back(
2706 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2707 }
2708 }
2709
2710 // If there is only one register, replace the destination
2711 if (RegsToMerge.size() == 1) {
2712 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2713 MI.eraseFromParent();
2714 return true;
2715 }
2716
2717 // Merge the rest of the instructions & replace the register
2718 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2719 MRI.replaceRegWith(Dst, Fin);
2720 MI.eraseFromParent();
2721 return true;
2722}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
static constexpr LLT float128()
Get a 128-bit IEEE quad value.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1506
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...