LLVM 22.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
69 v16s8, v8s16, v4s32,
70 v2s64, v2p0,
71 /* End 128bit types */
72 /* Begin 64bit types */
73 v8s8, v4s16, v2s32};
74 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
75 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
76 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
77
78 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
79
80 // FIXME: support subtargets which have neon/fp-armv8 disabled.
81 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
83 return;
84 }
85
86 // Some instructions only support s16 if the subtarget has full 16-bit FP
87 // support.
88 const bool HasFP16 = ST.hasFullFP16();
89 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
90
91 const bool HasCSSC = ST.hasCSSC();
92 const bool HasRCPC3 = ST.hasRCPC3();
93 const bool HasSVE = ST.hasSVE();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
99 v2s64, v2p0})
100 .widenScalarToNextPow2(0)
101 .clampScalar(0, s8, s64)
104 .clampNumElements(0, v8s8, v16s8)
105 .clampNumElements(0, v4s16, v8s16)
106 .clampNumElements(0, v2s32, v4s32)
107 .clampMaxNumElements(0, s64, 2)
108 .clampMaxNumElements(0, p0, 2)
110
112 .legalFor({p0, s16, s32, s64})
113 .legalFor(PackedVectorAllTypeList)
117 .clampScalar(0, s16, s64)
118 .clampNumElements(0, v8s8, v16s8)
119 .clampNumElements(0, v4s16, v8s16)
120 .clampNumElements(0, v2s32, v4s32)
121 .clampMaxNumElements(0, s64, 2)
122 .clampMaxNumElements(0, p0, 2);
123
125 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
126 smallerThan(1, 0)))
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
130 .minScalar(1, s8)
131 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
132 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
133
135 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
136 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
137 .widenScalarToNextPow2(1)
138 .clampScalar(1, s32, s128)
140 .minScalar(0, s16)
141 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
142 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
143 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
144
145 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
146 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
147 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
148 .widenScalarToNextPow2(0)
149 .clampScalar(0, s32, s64)
150 .clampMaxNumElements(0, s8, 16)
151 .clampMaxNumElements(0, s16, 8)
152 .clampNumElements(0, v2s32, v4s32)
153 .clampNumElements(0, v2s64, v2s64)
155 [=](const LegalityQuery &Query) {
156 return Query.Types[0].getNumElements() <= 2;
157 },
158 0, s32)
159 .minScalarOrEltIf(
160 [=](const LegalityQuery &Query) {
161 return Query.Types[0].getNumElements() <= 4;
162 },
163 0, s16)
164 .minScalarOrEltIf(
165 [=](const LegalityQuery &Query) {
166 return Query.Types[0].getNumElements() <= 16;
167 },
168 0, s8)
169 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
171
173 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
174 .widenScalarToNextPow2(0)
175 .clampScalar(0, s32, s64)
176 .clampMaxNumElements(0, s8, 16)
177 .clampMaxNumElements(0, s16, 8)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 [=](const LegalityQuery &Query) {
182 return Query.Types[0].getNumElements() <= 2;
183 },
184 0, s32)
185 .minScalarOrEltIf(
186 [=](const LegalityQuery &Query) {
187 return Query.Types[0].getNumElements() <= 4;
188 },
189 0, s16)
190 .minScalarOrEltIf(
191 [=](const LegalityQuery &Query) {
192 return Query.Types[0].getNumElements() <= 16;
193 },
194 0, s8)
195 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
197
198 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
199 .customIf([=](const LegalityQuery &Query) {
200 const auto &SrcTy = Query.Types[0];
201 const auto &AmtTy = Query.Types[1];
202 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
203 AmtTy.getSizeInBits() == 32;
204 })
205 .legalFor({
206 {s32, s32},
207 {s32, s64},
208 {s64, s64},
209 {v8s8, v8s8},
210 {v16s8, v16s8},
211 {v4s16, v4s16},
212 {v8s16, v8s16},
213 {v2s32, v2s32},
214 {v4s32, v4s32},
215 {v2s64, v2s64},
216 })
217 .widenScalarToNextPow2(0)
218 .clampScalar(1, s32, s64)
219 .clampScalar(0, s32, s64)
220 .clampNumElements(0, v8s8, v16s8)
221 .clampNumElements(0, v4s16, v8s16)
222 .clampNumElements(0, v2s32, v4s32)
223 .clampNumElements(0, v2s64, v2s64)
225 .minScalarSameAs(1, 0)
229
231 .legalFor({{p0, s64}, {v2p0, v2s64}})
232 .clampScalarOrElt(1, s64, s64)
233 .clampNumElements(0, v2p0, v2p0);
234
235 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
236
237 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
238 .legalFor({s32, s64})
239 .libcallFor({s128})
240 .clampScalar(0, s32, s64)
242 .scalarize(0);
243
244 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
245 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
246 .libcallFor({s128})
248 .minScalarOrElt(0, s32)
249 .clampNumElements(0, v2s32, v4s32)
250 .clampNumElements(0, v2s64, v2s64)
251 .scalarize(0);
252
253 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
254 .widenScalarToNextPow2(0, /*Min = */ 32)
255 .clampScalar(0, s32, s64)
256 .lower();
257
258 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
259 .legalFor({s64, v16s8, v8s16, v4s32})
260 .lower();
261
262 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
263 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
264 .legalFor(HasCSSC, {s32, s64})
265 .minScalar(HasCSSC, 0, s32)
266 .clampNumElements(0, v8s8, v16s8)
267 .clampNumElements(0, v4s16, v8s16)
268 .clampNumElements(0, v2s32, v4s32)
269 .lower();
270
271 // FIXME: Legal vector types are only legal with NEON.
273 .legalFor(HasCSSC, {s32, s64})
274 .legalFor(PackedVectorAllTypeList)
275 .customIf([=](const LegalityQuery &Q) {
276 // TODO: Fix suboptimal codegen for 128+ bit types.
277 LLT SrcTy = Q.Types[0];
278 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
279 })
280 .widenScalarIf(
281 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
282 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
283 .widenScalarIf(
284 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
285 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
286 .clampNumElements(0, v8s8, v16s8)
287 .clampNumElements(0, v4s16, v8s16)
288 .clampNumElements(0, v2s32, v4s32)
289 .clampNumElements(0, v2s64, v2s64)
291 .lower();
292
294 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
295 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
296 .lower();
297
299 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
300 .legalFor({{s32, s32}, {s64, s32}})
301 .clampScalar(0, s32, s64)
302 .clampScalar(1, s32, s64)
304
305 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
306 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
307 .lower();
308
310 .legalFor({{s32, s64}, {s64, s64}})
311 .customIf([=](const LegalityQuery &Q) {
312 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
313 })
314 .lower();
316
317 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
318 .customFor({{s32, s32}, {s64, s64}});
319
320 auto always = [=](const LegalityQuery &Q) { return true; };
322 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
323 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
324 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
325 .customFor({{s128, s128},
326 {v4s16, v4s16},
327 {v8s16, v8s16},
328 {v2s32, v2s32},
329 {v4s32, v4s32},
330 {v2s64, v2s64}})
331 .clampScalar(0, s32, s128)
333 .minScalarEltSameAsIf(always, 1, 0)
334 .maxScalarEltSameAsIf(always, 1, 0)
335 .clampNumElements(0, v8s8, v16s8)
336 .clampNumElements(0, v4s16, v8s16)
337 .clampNumElements(0, v2s32, v4s32)
338 .clampNumElements(0, v2s64, v2s64)
341
343 .legalFor({{s32, s32},
344 {s64, s64},
345 {v8s8, v8s8},
346 {v16s8, v16s8},
347 {v4s16, v4s16},
348 {v8s16, v8s16},
349 {v2s32, v2s32},
350 {v4s32, v4s32}})
351 .widenScalarToNextPow2(1, /*Min=*/32)
352 .clampScalar(1, s32, s64)
353 .clampNumElements(0, v8s8, v16s8)
354 .clampNumElements(0, v4s16, v8s16)
355 .clampNumElements(0, v2s32, v4s32)
358 .scalarSameSizeAs(0, 1);
359
360 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
361
363 .lowerIf(isVector(0))
364 .widenScalarToNextPow2(1, /*Min=*/32)
365 .clampScalar(1, s32, s64)
366 .scalarSameSizeAs(0, 1)
367 .legalFor(HasCSSC, {s32, s64})
368 .customFor(!HasCSSC, {s32, s64});
369
370 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
371
372 getActionDefinitionsBuilder(G_BITREVERSE)
373 .legalFor({s32, s64, v8s8, v16s8})
374 .widenScalarToNextPow2(0, /*Min = */ 32)
376 .clampScalar(0, s32, s64)
377 .clampNumElements(0, v8s8, v16s8)
378 .clampNumElements(0, v4s16, v8s16)
379 .clampNumElements(0, v2s32, v4s32)
380 .clampNumElements(0, v2s64, v2s64)
383 .lower();
384
386 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
388 .clampScalar(0, s32, s64)
389 .clampNumElements(0, v4s16, v8s16)
390 .clampNumElements(0, v2s32, v4s32)
391 .clampNumElements(0, v2s64, v2s64)
393
394 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
395 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
396 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
397 .clampNumElements(0, v8s8, v16s8)
398 .clampNumElements(0, v4s16, v8s16)
399 .clampNumElements(0, v2s32, v4s32)
400 .clampMaxNumElements(0, s64, 2)
403 .lower();
404
406 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
407 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
408 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
409 .legalFor({s32, s64, v2s32, v4s32, v2s64})
410 .legalFor(HasFP16, {s16, v4s16, v8s16})
411 .libcallFor({s128})
412 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
413 .minScalarOrElt(0, MinFPScalar)
414 .clampNumElements(0, v4s16, v8s16)
415 .clampNumElements(0, v2s32, v4s32)
416 .clampNumElements(0, v2s64, v2s64)
418
419 getActionDefinitionsBuilder({G_FABS, G_FNEG})
420 .legalFor({s32, s64, v2s32, v4s32, v2s64})
421 .legalFor(HasFP16, {s16, v4s16, v8s16})
422 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
424 .clampNumElements(0, v4s16, v8s16)
425 .clampNumElements(0, v2s32, v4s32)
426 .clampNumElements(0, v2s64, v2s64)
428 .lowerFor({s16, v4s16, v8s16});
429
431 .libcallFor({s32, s64, s128})
432 .minScalar(0, s32)
433 .scalarize(0);
434
435 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
436 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
437 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
438 G_FSINH, G_FTANH, G_FMODF})
439 // We need a call for these, so we always need to scalarize.
440 .scalarize(0)
441 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
442 .minScalar(0, s32)
443 .libcallFor({s32, s64, s128});
444 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
445 .scalarize(0)
446 .minScalar(0, s32)
447 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
448
449 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
450 .legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
451 .legalFor(HasFP16, {{s32, s16}, {s64, s16}})
452 .minScalar(1, s32)
453 .libcallFor({{s64, s128}})
454 .lower();
455 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
456 .legalFor({{s64, s32}, {s64, s64}})
457 .legalFor(HasFP16, {{s64, s16}})
458 .minScalar(0, s64)
459 .minScalar(1, s32)
460 .libcallFor({{s64, s128}})
461 .lower();
462
463 // TODO: Custom legalization for mismatched types.
464 getActionDefinitionsBuilder(G_FCOPYSIGN)
466 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
467 [=](const LegalityQuery &Query) {
468 const LLT Ty = Query.Types[0];
469 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
470 })
471 .lower();
472
474
475 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
476 auto &Actions = getActionDefinitionsBuilder(Op);
477
478 if (Op == G_SEXTLOAD)
480
481 // Atomics have zero extending behavior.
482 Actions
483 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
484 {s32, p0, s16, 8},
485 {s32, p0, s32, 8},
486 {s64, p0, s8, 2},
487 {s64, p0, s16, 2},
488 {s64, p0, s32, 4},
489 {s64, p0, s64, 8},
490 {p0, p0, s64, 8},
491 {v2s32, p0, s64, 8}})
492 .widenScalarToNextPow2(0)
493 .clampScalar(0, s32, s64)
494 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
495 // how to do that yet.
496 .unsupportedIfMemSizeNotPow2()
497 // Lower anything left over into G_*EXT and G_LOAD
498 .lower();
499 }
500
501 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
502 const LLT &ValTy = Query.Types[0];
503 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
504 };
505
507 .customIf([=](const LegalityQuery &Query) {
508 return HasRCPC3 && Query.Types[0] == s128 &&
509 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
510 })
511 .customIf([=](const LegalityQuery &Query) {
512 return Query.Types[0] == s128 &&
513 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
514 })
515 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
516 {s16, p0, s16, 8},
517 {s32, p0, s32, 8},
518 {s64, p0, s64, 8},
519 {p0, p0, s64, 8},
520 {s128, p0, s128, 8},
521 {v8s8, p0, s64, 8},
522 {v16s8, p0, s128, 8},
523 {v4s16, p0, s64, 8},
524 {v8s16, p0, s128, 8},
525 {v2s32, p0, s64, 8},
526 {v4s32, p0, s128, 8},
527 {v2s64, p0, s128, 8}})
528 // These extends are also legal
529 .legalForTypesWithMemDesc(
530 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
531 .legalForTypesWithMemDesc({
532 // SVE vscale x 128 bit base sizes
533 {nxv16s8, p0, nxv16s8, 8},
534 {nxv8s16, p0, nxv8s16, 8},
535 {nxv4s32, p0, nxv4s32, 8},
536 {nxv2s64, p0, nxv2s64, 8},
537 })
538 .widenScalarToNextPow2(0, /* MinSize = */ 8)
539 .clampMaxNumElements(0, s8, 16)
540 .clampMaxNumElements(0, s16, 8)
541 .clampMaxNumElements(0, s32, 4)
542 .clampMaxNumElements(0, s64, 2)
543 .clampMaxNumElements(0, p0, 2)
545 .clampScalar(0, s8, s64)
547 [=](const LegalityQuery &Query) {
548 // Clamp extending load results to 32-bits.
549 return Query.Types[0].isScalar() &&
550 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
551 Query.Types[0].getSizeInBits() > 32;
552 },
553 changeTo(0, s32))
554 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
555 .bitcastIf(typeInSet(0, {v4s8}),
556 [=](const LegalityQuery &Query) {
557 const LLT VecTy = Query.Types[0];
558 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
559 })
560 .customIf(IsPtrVecPred)
561 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
562 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
563
565 .customIf([=](const LegalityQuery &Query) {
566 return HasRCPC3 && Query.Types[0] == s128 &&
567 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
568 })
569 .customIf([=](const LegalityQuery &Query) {
570 return Query.Types[0] == s128 &&
571 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
572 })
573 .widenScalarIf(
574 all(scalarNarrowerThan(0, 32),
576 changeTo(0, s32))
578 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
579 {s32, p0, s8, 8}, // truncstorei8 from s32
580 {s64, p0, s8, 8}, // truncstorei8 from s64
581 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
582 {s64, p0, s16, 8}, // truncstorei16 from s64
583 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
584 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
585 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
586 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
587 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
588 .legalForTypesWithMemDesc({
589 // SVE vscale x 128 bit base sizes
590 // TODO: Add nxv2p0. Consider bitcastIf.
591 // See #92130
592 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
593 {nxv16s8, p0, nxv16s8, 8},
594 {nxv8s16, p0, nxv8s16, 8},
595 {nxv4s32, p0, nxv4s32, 8},
596 {nxv2s64, p0, nxv2s64, 8},
597 })
598 .clampScalar(0, s8, s64)
599 .minScalarOrElt(0, s8)
600 .lowerIf([=](const LegalityQuery &Query) {
601 return Query.Types[0].isScalar() &&
602 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
603 })
604 // Maximum: sN * k = 128
605 .clampMaxNumElements(0, s8, 16)
606 .clampMaxNumElements(0, s16, 8)
607 .clampMaxNumElements(0, s32, 4)
608 .clampMaxNumElements(0, s64, 2)
609 .clampMaxNumElements(0, p0, 2)
611 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
612 .bitcastIf(all(typeInSet(0, {v4s8}),
613 LegalityPredicate([=](const LegalityQuery &Query) {
614 return Query.Types[0].getSizeInBits() ==
615 Query.MMODescrs[0].MemoryTy.getSizeInBits();
616 })),
617 [=](const LegalityQuery &Query) {
618 const LLT VecTy = Query.Types[0];
619 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
620 })
621 .customIf(IsPtrVecPred)
622 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
623 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
624 .lower();
625
626 getActionDefinitionsBuilder(G_INDEXED_STORE)
627 // Idx 0 == Ptr, Idx 1 == Val
628 // TODO: we can implement legalizations but as of now these are
629 // generated in a very specific way.
631 {p0, s8, s8, 8},
632 {p0, s16, s16, 8},
633 {p0, s32, s8, 8},
634 {p0, s32, s16, 8},
635 {p0, s32, s32, 8},
636 {p0, s64, s64, 8},
637 {p0, p0, p0, 8},
638 {p0, v8s8, v8s8, 8},
639 {p0, v16s8, v16s8, 8},
640 {p0, v4s16, v4s16, 8},
641 {p0, v8s16, v8s16, 8},
642 {p0, v2s32, v2s32, 8},
643 {p0, v4s32, v4s32, 8},
644 {p0, v2s64, v2s64, 8},
645 {p0, v2p0, v2p0, 8},
646 {p0, s128, s128, 8},
647 })
648 .unsupported();
649
650 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
651 LLT LdTy = Query.Types[0];
652 LLT PtrTy = Query.Types[1];
653 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
654 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
655 return false;
656 if (PtrTy != p0)
657 return false;
658 return true;
659 };
660 getActionDefinitionsBuilder(G_INDEXED_LOAD)
663 .legalIf(IndexedLoadBasicPred)
664 .unsupported();
665 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
666 .unsupportedIf(
668 .legalIf(all(typeInSet(0, {s16, s32, s64}),
669 LegalityPredicate([=](const LegalityQuery &Q) {
670 LLT LdTy = Q.Types[0];
671 LLT PtrTy = Q.Types[1];
672 LLT MemTy = Q.MMODescrs[0].MemoryTy;
673 if (PtrTy != p0)
674 return false;
675 if (LdTy == s16)
676 return MemTy == s8;
677 if (LdTy == s32)
678 return MemTy == s8 || MemTy == s16;
679 if (LdTy == s64)
680 return MemTy == s8 || MemTy == s16 || MemTy == s32;
681 return false;
682 })))
683 .unsupported();
684
685 // Constants
687 .legalFor({p0, s8, s16, s32, s64})
688 .widenScalarToNextPow2(0)
689 .clampScalar(0, s8, s64);
690 getActionDefinitionsBuilder(G_FCONSTANT)
691 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
692 .legalFor({s16, s32, s64, s128})
693 .clampScalar(0, MinFPScalar, s128);
694
695 // FIXME: fix moreElementsToNextPow2
697 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
699 .clampScalar(1, s32, s64)
700 .clampScalar(0, s32, s32)
703 [=](const LegalityQuery &Query) {
704 const LLT &Ty = Query.Types[0];
705 const LLT &SrcTy = Query.Types[1];
706 return Ty.isVector() && !SrcTy.isPointerVector() &&
707 Ty.getElementType() != SrcTy.getElementType();
708 },
709 0, 1)
710 .minScalarOrEltIf(
711 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
712 1, s32)
713 .minScalarOrEltIf(
714 [=](const LegalityQuery &Query) {
715 return Query.Types[1].isPointerVector();
716 },
717 0, s64)
719 .clampNumElements(1, v8s8, v16s8)
720 .clampNumElements(1, v4s16, v8s16)
721 .clampNumElements(1, v2s32, v4s32)
722 .clampNumElements(1, v2s64, v2s64)
723 .clampNumElements(1, v2p0, v2p0)
724 .customIf(isVector(0));
725
727 .legalFor({{s32, s32},
728 {s32, s64},
729 {v4s32, v4s32},
730 {v2s32, v2s32},
731 {v2s64, v2s64}})
732 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
734 .clampScalar(0, s32, s32)
735 .minScalarOrElt(1, MinFPScalar)
738 [=](const LegalityQuery &Query) {
739 const LLT &Ty = Query.Types[0];
740 const LLT &SrcTy = Query.Types[1];
741 return Ty.isVector() && !SrcTy.isPointerVector() &&
742 Ty.getElementType() != SrcTy.getElementType();
743 },
744 0, 1)
745 .clampNumElements(1, v4s16, v8s16)
746 .clampNumElements(1, v2s32, v4s32)
747 .clampMaxNumElements(1, s64, 2)
749 .libcallFor({{s32, s128}});
750
751 // Extensions
752 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
753 unsigned DstSize = Query.Types[0].getSizeInBits();
754
755 // Handle legal vectors using legalFor
756 if (Query.Types[0].isVector())
757 return false;
758
759 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
760 return false; // Extending to a scalar s128 needs narrowing.
761
762 const LLT &SrcTy = Query.Types[1];
763
764 // Make sure we fit in a register otherwise. Don't bother checking that
765 // the source type is below 128 bits. We shouldn't be allowing anything
766 // through which is wider than the destination in the first place.
767 unsigned SrcSize = SrcTy.getSizeInBits();
768 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
769 return false;
770
771 return true;
772 };
773 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
774 .legalIf(ExtLegalFunc)
775 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
776 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
778 .clampMaxNumElements(1, s8, 8)
779 .clampMaxNumElements(1, s16, 4)
780 .clampMaxNumElements(1, s32, 2)
781 // Tries to convert a large EXTEND into two smaller EXTENDs
782 .lowerIf([=](const LegalityQuery &Query) {
783 return (Query.Types[0].getScalarSizeInBits() >
784 Query.Types[1].getScalarSizeInBits() * 2) &&
785 Query.Types[0].isVector() &&
786 (Query.Types[1].getScalarSizeInBits() == 8 ||
787 Query.Types[1].getScalarSizeInBits() == 16);
788 })
789 .clampMinNumElements(1, s8, 8)
790 .clampMinNumElements(1, s16, 4)
792
794 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
796 .clampMaxNumElements(0, s8, 8)
797 .clampMaxNumElements(0, s16, 4)
798 .clampMaxNumElements(0, s32, 2)
800 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
801 0, s8)
802 .lowerIf([=](const LegalityQuery &Query) {
803 LLT DstTy = Query.Types[0];
804 LLT SrcTy = Query.Types[1];
805 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
806 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
807 })
808 .clampMinNumElements(0, s8, 8)
809 .clampMinNumElements(0, s16, 4)
810 .alwaysLegal();
811
812 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
813 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}});
814
815 getActionDefinitionsBuilder(G_SEXT_INREG)
816 .legalFor({s32, s64})
817 .legalFor(PackedVectorAllTypeList)
818 .maxScalar(0, s64)
819 .clampNumElements(0, v8s8, v16s8)
820 .clampNumElements(0, v4s16, v8s16)
821 .clampNumElements(0, v2s32, v4s32)
822 .clampMaxNumElements(0, s64, 2)
823 .lower();
824
825 // FP conversions
827 .legalFor(
828 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
829 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
831 .customIf([](const LegalityQuery &Q) {
832 LLT DstTy = Q.Types[0];
833 LLT SrcTy = Q.Types[1];
834 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
835 SrcTy.getScalarSizeInBits() == 64 &&
836 DstTy.getScalarSizeInBits() == 16;
837 })
838 // Clamp based on input
839 .clampNumElements(1, v4s32, v4s32)
840 .clampNumElements(1, v2s64, v2s64)
841 .scalarize(0);
842
844 .legalFor(
845 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
846 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
849 [](const LegalityQuery &Q) {
850 LLT DstTy = Q.Types[0];
851 LLT SrcTy = Q.Types[1];
852 return SrcTy.isVector() && DstTy.isVector() &&
853 SrcTy.getScalarSizeInBits() == 16 &&
854 DstTy.getScalarSizeInBits() == 64;
855 },
856 changeElementTo(1, s32))
857 .clampNumElements(0, v4s32, v4s32)
858 .clampNumElements(0, v2s64, v2s64)
859 .scalarize(0);
860
861 // Conversions
862 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
863 .legalFor({{s32, s32},
864 {s64, s32},
865 {s32, s64},
866 {s64, s64},
867 {v2s32, v2s32},
868 {v4s32, v4s32},
869 {v2s64, v2s64}})
870 .legalFor(HasFP16,
871 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
872 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
874 // The range of a fp16 value fits into an i17, so we can lower the width
875 // to i64.
877 [=](const LegalityQuery &Query) {
878 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
879 },
880 changeTo(0, s64))
883 .minScalar(0, s32)
884 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
886 [=](const LegalityQuery &Query) {
887 return Query.Types[0].getScalarSizeInBits() <= 64 &&
888 Query.Types[0].getScalarSizeInBits() >
889 Query.Types[1].getScalarSizeInBits();
890 },
892 .widenScalarIf(
893 [=](const LegalityQuery &Query) {
894 return Query.Types[1].getScalarSizeInBits() <= 64 &&
895 Query.Types[0].getScalarSizeInBits() <
896 Query.Types[1].getScalarSizeInBits();
897 },
899 .clampNumElements(0, v4s16, v8s16)
900 .clampNumElements(0, v2s32, v4s32)
901 .clampMaxNumElements(0, s64, 2)
902 .libcallFor(
903 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
904
905 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
906 .legalFor({{s32, s32},
907 {s64, s32},
908 {s32, s64},
909 {s64, s64},
910 {v2s32, v2s32},
911 {v4s32, v4s32},
912 {v2s64, v2s64}})
913 .legalFor(
914 HasFP16,
915 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
916 // Handle types larger than i64 by scalarizing/lowering.
917 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
919 // The range of a fp16 value fits into an i17, so we can lower the width
920 // to i64.
922 [=](const LegalityQuery &Query) {
923 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
924 },
925 changeTo(0, s64))
926 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
928 .widenScalarToNextPow2(0, /*MinSize=*/32)
929 .minScalar(0, s32)
930 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
932 [=](const LegalityQuery &Query) {
933 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
934 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
935 ITySize > Query.Types[1].getScalarSizeInBits();
936 },
938 .widenScalarIf(
939 [=](const LegalityQuery &Query) {
940 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
941 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
942 Query.Types[0].getScalarSizeInBits() < FTySize;
943 },
946 .clampNumElements(0, v4s16, v8s16)
947 .clampNumElements(0, v2s32, v4s32)
948 .clampMaxNumElements(0, s64, 2);
949
950 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
951 .legalFor({{s32, s32},
952 {s64, s32},
953 {s32, s64},
954 {s64, s64},
955 {v2s32, v2s32},
956 {v4s32, v4s32},
957 {v2s64, v2s64}})
958 .legalFor(HasFP16,
959 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
960 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
964 .minScalar(1, s32)
965 .lowerIf([](const LegalityQuery &Query) {
966 return Query.Types[1].isVector() &&
967 Query.Types[1].getScalarSizeInBits() == 64 &&
968 Query.Types[0].getScalarSizeInBits() == 16;
969 })
970 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
972 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
973 [](const LegalityQuery &Query) {
974 return Query.Types[0].getScalarSizeInBits() == 32 &&
975 Query.Types[1].getScalarSizeInBits() == 64;
976 },
977 0)
978 .widenScalarIf(
979 [](const LegalityQuery &Query) {
980 return Query.Types[1].getScalarSizeInBits() <= 64 &&
981 Query.Types[0].getScalarSizeInBits() <
982 Query.Types[1].getScalarSizeInBits();
983 },
985 .widenScalarIf(
986 [](const LegalityQuery &Query) {
987 return Query.Types[0].getScalarSizeInBits() <= 64 &&
988 Query.Types[0].getScalarSizeInBits() >
989 Query.Types[1].getScalarSizeInBits();
990 },
992 .clampNumElements(0, v4s16, v8s16)
993 .clampNumElements(0, v2s32, v4s32)
994 .clampMaxNumElements(0, s64, 2)
995 .libcallFor({{s16, s128},
996 {s32, s128},
997 {s64, s128},
998 {s128, s128},
999 {s128, s32},
1000 {s128, s64}});
1001
1002 // Control-flow
1005 .legalFor({s32})
1006 .clampScalar(0, s32, s32);
1007 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1008
1010 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1011 .widenScalarToNextPow2(0)
1012 .clampScalar(0, s32, s64)
1013 .clampScalar(1, s32, s32)
1016 .lowerIf(isVector(0));
1017
1018 // Pointer-handling
1019 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1020
1021 if (TM.getCodeModel() == CodeModel::Small)
1022 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1023 else
1024 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1025
1026 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1027 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1028
1029 getActionDefinitionsBuilder(G_PTRTOINT)
1030 .legalFor({{s64, p0}, {v2s64, v2p0}})
1031 .widenScalarToNextPow2(0, 64)
1032 .clampScalar(0, s64, s64)
1033 .clampMaxNumElements(0, s64, 2);
1034
1035 getActionDefinitionsBuilder(G_INTTOPTR)
1036 .unsupportedIf([&](const LegalityQuery &Query) {
1037 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1038 })
1039 .legalFor({{p0, s64}, {v2p0, v2s64}})
1040 .clampMaxNumElements(1, s64, 2);
1041
1042 // Casts for 32 and 64-bit width type are just copies.
1043 // Same for 128-bit width type, except they are on the FPR bank.
1045 // Keeping 32-bit instructions legal to prevent regression in some tests
1046 .legalForCartesianProduct({s32, v2s16, v4s8})
1047 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1048 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1049 .customIf([=](const LegalityQuery &Query) {
1050 // Handle casts from i1 vectors to scalars.
1051 LLT DstTy = Query.Types[0];
1052 LLT SrcTy = Query.Types[1];
1053 return DstTy.isScalar() && SrcTy.isVector() &&
1054 SrcTy.getScalarSizeInBits() == 1;
1055 })
1056 .lowerIf([=](const LegalityQuery &Query) {
1057 return Query.Types[0].isVector() != Query.Types[1].isVector();
1058 })
1060 .clampNumElements(0, v8s8, v16s8)
1061 .clampNumElements(0, v4s16, v8s16)
1062 .clampNumElements(0, v2s32, v4s32)
1063 .lower();
1064
1065 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1066
1067 // va_list must be a pointer, but most sized types are pretty easy to handle
1068 // as the destination.
1070 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1071 .clampScalar(0, s8, s64)
1072 .widenScalarToNextPow2(0, /*Min*/ 8);
1073
1074 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1075 .lowerIf(
1076 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1077
1078 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1079
1080 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1081 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1082 .customFor(!UseOutlineAtomics, {{s128, p0}})
1083 .libcallFor(UseOutlineAtomics,
1084 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1085 .clampScalar(0, s32, s64);
1086
1087 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1088 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1089 G_ATOMICRMW_XOR})
1090 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1091 .libcallFor(UseOutlineAtomics,
1092 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1093 .clampScalar(0, s32, s64);
1094
1095 // Do not outline these atomics operations, as per comment in
1096 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1098 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1099 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1100 .clampScalar(0, s32, s64);
1101
1102 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1103
1104 // Merge/Unmerge
1105 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1106 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1107 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1109 .widenScalarToNextPow2(LitTyIdx, 8)
1110 .widenScalarToNextPow2(BigTyIdx, 32)
1111 .clampScalar(LitTyIdx, s8, s64)
1112 .clampScalar(BigTyIdx, s32, s128)
1113 .legalIf([=](const LegalityQuery &Q) {
1114 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1115 case 32:
1116 case 64:
1117 case 128:
1118 break;
1119 default:
1120 return false;
1121 }
1122 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1123 case 8:
1124 case 16:
1125 case 32:
1126 case 64:
1127 return true;
1128 default:
1129 return false;
1130 }
1131 });
1132 }
1133
1134 // TODO : nxv4s16, nxv2s16, nxv2s32
1135 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1136 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1137 {s16, nxv8s16, s64},
1138 {s32, nxv4s32, s64},
1139 {s64, nxv2s64, s64}})
1140 .unsupportedIf([=](const LegalityQuery &Query) {
1141 const LLT &EltTy = Query.Types[1].getElementType();
1142 if (Query.Types[1].isScalableVector())
1143 return false;
1144 return Query.Types[0] != EltTy;
1145 })
1146 .minScalar(2, s64)
1147 .customIf([=](const LegalityQuery &Query) {
1148 const LLT &VecTy = Query.Types[1];
1149 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1150 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1151 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1152 })
1153 .minScalarOrEltIf(
1154 [=](const LegalityQuery &Query) {
1155 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1156 // cause the total vec size to be > 128b.
1157 return Query.Types[1].isFixedVector() &&
1158 Query.Types[1].getNumElements() <= 2;
1159 },
1160 0, s64)
1161 .minScalarOrEltIf(
1162 [=](const LegalityQuery &Query) {
1163 return Query.Types[1].isFixedVector() &&
1164 Query.Types[1].getNumElements() <= 4;
1165 },
1166 0, s32)
1167 .minScalarOrEltIf(
1168 [=](const LegalityQuery &Query) {
1169 return Query.Types[1].isFixedVector() &&
1170 Query.Types[1].getNumElements() <= 8;
1171 },
1172 0, s16)
1173 .minScalarOrEltIf(
1174 [=](const LegalityQuery &Query) {
1175 return Query.Types[1].isFixedVector() &&
1176 Query.Types[1].getNumElements() <= 16;
1177 },
1178 0, s8)
1179 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1181 .clampMaxNumElements(1, s64, 2)
1182 .clampMaxNumElements(1, s32, 4)
1183 .clampMaxNumElements(1, s16, 8)
1184 .clampMaxNumElements(1, s8, 16)
1185 .clampMaxNumElements(1, p0, 2)
1187
1188 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1189 .legalIf(
1190 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1191 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1192 {nxv8s16, s32, s64},
1193 {nxv4s32, s32, s64},
1194 {nxv2s64, s64, s64}})
1197 .clampNumElements(0, v8s8, v16s8)
1198 .clampNumElements(0, v4s16, v8s16)
1199 .clampNumElements(0, v2s32, v4s32)
1200 .clampMaxNumElements(0, s64, 2)
1201 .clampMaxNumElements(0, p0, 2)
1203
1204 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1205 .legalFor({{v8s8, s8},
1206 {v16s8, s8},
1207 {v4s16, s16},
1208 {v8s16, s16},
1209 {v2s32, s32},
1210 {v4s32, s32},
1211 {v2s64, s64},
1212 {v2p0, p0}})
1213 .clampNumElements(0, v4s32, v4s32)
1214 .clampNumElements(0, v2s64, v2s64)
1215 .minScalarOrElt(0, s8)
1218 .minScalarSameAs(1, 0);
1219
1220 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1221
1222 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1223 .legalIf([=](const LegalityQuery &Query) {
1224 const LLT &DstTy = Query.Types[0];
1225 const LLT &SrcTy = Query.Types[1];
1226 // For now just support the TBL2 variant which needs the source vectors
1227 // to be the same size as the dest.
1228 if (DstTy != SrcTy)
1229 return false;
1230 return llvm::is_contained(
1231 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1232 })
1233 .moreElementsIf(
1234 [](const LegalityQuery &Query) {
1235 return Query.Types[0].getNumElements() >
1236 Query.Types[1].getNumElements();
1237 },
1238 changeTo(1, 0))
1241 [](const LegalityQuery &Query) {
1242 return Query.Types[0].getNumElements() <
1243 Query.Types[1].getNumElements();
1244 },
1245 changeTo(0, 1))
1246 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1247 .clampNumElements(0, v8s8, v16s8)
1248 .clampNumElements(0, v4s16, v8s16)
1249 .clampNumElements(0, v4s32, v4s32)
1250 .clampNumElements(0, v2s64, v2s64)
1252 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1253 // Bitcast pointers vector to i64.
1254 const LLT DstTy = Query.Types[0];
1255 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1256 });
1257
1258 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1259 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1260 .bitcastIf(
1261 [=](const LegalityQuery &Query) {
1262 return Query.Types[0].isFixedVector() &&
1263 Query.Types[1].isFixedVector() &&
1264 Query.Types[0].getSizeInBits() <= 128 &&
1265 Query.Types[1].getSizeInBits() <= 64;
1266 },
1267 [=](const LegalityQuery &Query) {
1268 const LLT DstTy = Query.Types[0];
1269 const LLT SrcTy = Query.Types[1];
1270 return std::pair(
1271 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1274 SrcTy.getNumElements())));
1275 });
1276
1277 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1278 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1280 .immIdx(0); // Inform verifier imm idx 0 is handled.
1281
1282 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1283 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1284 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1285
1286 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1287
1288 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1289
1290 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1291
1292 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1293
1294 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1295
1296 if (ST.hasMOPS()) {
1297 // G_BZERO is not supported. Currently it is only emitted by
1298 // PreLegalizerCombiner for G_MEMSET with zero constant.
1300
1302 .legalForCartesianProduct({p0}, {s64}, {s64})
1303 .customForCartesianProduct({p0}, {s8}, {s64})
1304 .immIdx(0); // Inform verifier imm idx 0 is handled.
1305
1306 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1307 .legalForCartesianProduct({p0}, {p0}, {s64})
1308 .immIdx(0); // Inform verifier imm idx 0 is handled.
1309
1310 // G_MEMCPY_INLINE does not have a tailcall immediate
1311 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1312 .legalForCartesianProduct({p0}, {p0}, {s64});
1313
1314 } else {
1315 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1316 .libcall();
1317 }
1318
1319 // For fadd reductions we have pairwise operations available. We treat the
1320 // usual legal types as legal and handle the lowering to pairwise instructions
1321 // later.
1322 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1323 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1324 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1325 .minScalarOrElt(0, MinFPScalar)
1326 .clampMaxNumElements(1, s64, 2)
1327 .clampMaxNumElements(1, s32, 4)
1328 .clampMaxNumElements(1, s16, 8)
1330 .scalarize(1)
1331 .lower();
1332
1333 // For fmul reductions we need to split up into individual operations. We
1334 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1335 // smaller types, followed by scalarizing what remains.
1336 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1337 .minScalarOrElt(0, MinFPScalar)
1338 .clampMaxNumElements(1, s64, 2)
1339 .clampMaxNumElements(1, s32, 4)
1340 .clampMaxNumElements(1, s16, 8)
1341 .clampMaxNumElements(1, s32, 2)
1342 .clampMaxNumElements(1, s16, 4)
1343 .scalarize(1)
1344 .lower();
1345
1346 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1347 .scalarize(2)
1348 .lower();
1349
1350 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1351 .legalFor({{s8, v8s8},
1352 {s8, v16s8},
1353 {s16, v4s16},
1354 {s16, v8s16},
1355 {s32, v2s32},
1356 {s32, v4s32},
1357 {s64, v2s64}})
1359 .clampMaxNumElements(1, s64, 2)
1360 .clampMaxNumElements(1, s32, 4)
1361 .clampMaxNumElements(1, s16, 8)
1362 .clampMaxNumElements(1, s8, 16)
1364 .scalarize(1);
1365
1366 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1367 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1368 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1369 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1370 .minScalarOrElt(0, MinFPScalar)
1371 .clampMaxNumElements(1, s64, 2)
1372 .clampMaxNumElements(1, s32, 4)
1373 .clampMaxNumElements(1, s16, 8)
1374 .scalarize(1)
1375 .lower();
1376
1377 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1378 .clampMaxNumElements(1, s32, 2)
1379 .clampMaxNumElements(1, s16, 4)
1380 .clampMaxNumElements(1, s8, 8)
1381 .scalarize(1)
1382 .lower();
1383
1385 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1386 .legalFor({{s8, v8s8},
1387 {s8, v16s8},
1388 {s16, v4s16},
1389 {s16, v8s16},
1390 {s32, v2s32},
1391 {s32, v4s32}})
1392 .moreElementsIf(
1393 [=](const LegalityQuery &Query) {
1394 return Query.Types[1].isVector() &&
1395 Query.Types[1].getElementType() != s8 &&
1396 Query.Types[1].getNumElements() & 1;
1397 },
1399 .clampMaxNumElements(1, s64, 2)
1400 .clampMaxNumElements(1, s32, 4)
1401 .clampMaxNumElements(1, s16, 8)
1402 .clampMaxNumElements(1, s8, 16)
1403 .scalarize(1)
1404 .lower();
1405
1407 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1408 // Try to break down into smaller vectors as long as they're at least 64
1409 // bits. This lets us use vector operations for some parts of the
1410 // reduction.
1411 .fewerElementsIf(
1412 [=](const LegalityQuery &Q) {
1413 LLT SrcTy = Q.Types[1];
1414 if (SrcTy.isScalar())
1415 return false;
1416 if (!isPowerOf2_32(SrcTy.getNumElements()))
1417 return false;
1418 // We can usually perform 64b vector operations.
1419 return SrcTy.getSizeInBits() > 64;
1420 },
1421 [=](const LegalityQuery &Q) {
1422 LLT SrcTy = Q.Types[1];
1423 return std::make_pair(1, SrcTy.divide(2));
1424 })
1425 .scalarize(1)
1426 .lower();
1427
1428 // TODO: Update this to correct handling when adding AArch64/SVE support.
1429 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1430
1431 // Access to floating-point environment.
1432 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1433 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1434 .libcall();
1435
1436 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1437
1438 getActionDefinitionsBuilder(G_PREFETCH).custom();
1439
1440 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1441
1443 verify(*ST.getInstrInfo());
1444}
1445
1448 LostDebugLocObserver &LocObserver) const {
1449 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1450 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1451 GISelChangeObserver &Observer = Helper.Observer;
1452 switch (MI.getOpcode()) {
1453 default:
1454 // No idea what to do.
1455 return false;
1456 case TargetOpcode::G_VAARG:
1457 return legalizeVaArg(MI, MRI, MIRBuilder);
1458 case TargetOpcode::G_LOAD:
1459 case TargetOpcode::G_STORE:
1460 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1461 case TargetOpcode::G_SHL:
1462 case TargetOpcode::G_ASHR:
1463 case TargetOpcode::G_LSHR:
1464 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1465 case TargetOpcode::G_GLOBAL_VALUE:
1466 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1467 case TargetOpcode::G_SBFX:
1468 case TargetOpcode::G_UBFX:
1469 return legalizeBitfieldExtract(MI, MRI, Helper);
1470 case TargetOpcode::G_FSHL:
1471 case TargetOpcode::G_FSHR:
1472 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1473 case TargetOpcode::G_ROTR:
1474 return legalizeRotate(MI, MRI, Helper);
1475 case TargetOpcode::G_CTPOP:
1476 return legalizeCTPOP(MI, MRI, Helper);
1477 case TargetOpcode::G_ATOMIC_CMPXCHG:
1478 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1479 case TargetOpcode::G_CTTZ:
1480 return legalizeCTTZ(MI, Helper);
1481 case TargetOpcode::G_BZERO:
1482 case TargetOpcode::G_MEMCPY:
1483 case TargetOpcode::G_MEMMOVE:
1484 case TargetOpcode::G_MEMSET:
1485 return legalizeMemOps(MI, Helper);
1486 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1487 return legalizeExtractVectorElt(MI, MRI, Helper);
1488 case TargetOpcode::G_DYN_STACKALLOC:
1489 return legalizeDynStackAlloc(MI, Helper);
1490 case TargetOpcode::G_PREFETCH:
1491 return legalizePrefetch(MI, Helper);
1492 case TargetOpcode::G_ABS:
1493 return Helper.lowerAbsToCNeg(MI);
1494 case TargetOpcode::G_ICMP:
1495 return legalizeICMP(MI, MRI, MIRBuilder);
1496 case TargetOpcode::G_BITCAST:
1497 return legalizeBitcast(MI, Helper);
1498 case TargetOpcode::G_FPTRUNC:
1499 // In order to lower f16 to f64 properly, we need to use f32 as an
1500 // intermediary
1501 return legalizeFptrunc(MI, MIRBuilder, MRI);
1502 }
1503
1504 llvm_unreachable("expected switch to return");
1505}
1506
1507bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1508 LegalizerHelper &Helper) const {
1509 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1510 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1511 // We're trying to handle casts from i1 vectors to scalars but reloading from
1512 // stack.
1513 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1514 SrcTy.getElementType() != LLT::scalar(1))
1515 return false;
1516
1517 Helper.createStackStoreLoad(DstReg, SrcReg);
1518 MI.eraseFromParent();
1519 return true;
1520}
1521
1522bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1524 MachineIRBuilder &MIRBuilder,
1525 GISelChangeObserver &Observer,
1526 LegalizerHelper &Helper) const {
1527 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1528 MI.getOpcode() == TargetOpcode::G_FSHR);
1529
1530 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1531 // lowering
1532 Register ShiftNo = MI.getOperand(3).getReg();
1533 LLT ShiftTy = MRI.getType(ShiftNo);
1534 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1535
1536 // Adjust shift amount according to Opcode (FSHL/FSHR)
1537 // Convert FSHL to FSHR
1538 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1539 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1540
1541 // Lower non-constant shifts and leave zero shifts to the optimizer.
1542 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1543 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1545
1546 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1547
1548 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1549
1550 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1551 // in the range of 0 <-> BitWidth, it is legal
1552 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1553 VRegAndVal->Value.ult(BitWidth))
1554 return true;
1555
1556 // Cast the ShiftNumber to a 64-bit type
1557 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1558
1559 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1560 Observer.changingInstr(MI);
1561 MI.getOperand(3).setReg(Cast64.getReg(0));
1562 Observer.changedInstr(MI);
1563 }
1564 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1565 // instruction
1566 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1567 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1568 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1569 Cast64.getReg(0)});
1570 MI.eraseFromParent();
1571 }
1572 return true;
1573}
1574
1575bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1577 MachineIRBuilder &MIRBuilder) const {
1578 Register DstReg = MI.getOperand(0).getReg();
1579 Register SrcReg1 = MI.getOperand(2).getReg();
1580 Register SrcReg2 = MI.getOperand(3).getReg();
1581 LLT DstTy = MRI.getType(DstReg);
1582 LLT SrcTy = MRI.getType(SrcReg1);
1583
1584 // Check the vector types are legal
1585 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1586 DstTy.getNumElements() != SrcTy.getNumElements() ||
1587 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1588 return false;
1589
1590 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1591 // following passes
1592 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1593 if (Pred != CmpInst::ICMP_NE)
1594 return true;
1595 Register CmpReg =
1596 MIRBuilder
1597 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1598 .getReg(0);
1599 MIRBuilder.buildNot(DstReg, CmpReg);
1600
1601 MI.eraseFromParent();
1602 return true;
1603}
1604
1605bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1607 LegalizerHelper &Helper) const {
1608 // To allow for imported patterns to match, we ensure that the rotate amount
1609 // is 64b with an extension.
1610 Register AmtReg = MI.getOperand(2).getReg();
1611 LLT AmtTy = MRI.getType(AmtReg);
1612 (void)AmtTy;
1613 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1614 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1615 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1616 Helper.Observer.changingInstr(MI);
1617 MI.getOperand(2).setReg(NewAmt.getReg(0));
1618 Helper.Observer.changedInstr(MI);
1619 return true;
1620}
1621
1622bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1624 GISelChangeObserver &Observer) const {
1625 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1626 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1627 // G_ADD_LOW instructions.
1628 // By splitting this here, we can optimize accesses in the small code model by
1629 // folding in the G_ADD_LOW into the load/store offset.
1630 auto &GlobalOp = MI.getOperand(1);
1631 // Don't modify an intrinsic call.
1632 if (GlobalOp.isSymbol())
1633 return true;
1634 const auto* GV = GlobalOp.getGlobal();
1635 if (GV->isThreadLocal())
1636 return true; // Don't want to modify TLS vars.
1637
1638 auto &TM = ST->getTargetLowering()->getTargetMachine();
1639 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1640
1641 if (OpFlags & AArch64II::MO_GOT)
1642 return true;
1643
1644 auto Offset = GlobalOp.getOffset();
1645 Register DstReg = MI.getOperand(0).getReg();
1646 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1647 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1648 // Set the regclass on the dest reg too.
1649 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1650
1651 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1652 // by creating a MOVK that sets bits 48-63 of the register to (global address
1653 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1654 // prevent an incorrect tag being generated during relocation when the
1655 // global appears before the code section. Without the offset, a global at
1656 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1657 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1658 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1659 // instead of `0xf`.
1660 // This assumes that we're in the small code model so we can assume a binary
1661 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1662 // binary must also be loaded into address range [0, 2^48). Both of these
1663 // properties need to be ensured at runtime when using tagged addresses.
1664 if (OpFlags & AArch64II::MO_TAGGED) {
1665 assert(!Offset &&
1666 "Should not have folded in an offset for a tagged global!");
1667 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1668 .addGlobalAddress(GV, 0x100000000,
1670 .addImm(48);
1671 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1672 }
1673
1674 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1675 .addGlobalAddress(GV, Offset,
1677 MI.eraseFromParent();
1678 return true;
1679}
1680
1682 MachineInstr &MI) const {
1683 MachineIRBuilder &MIB = Helper.MIRBuilder;
1684 MachineRegisterInfo &MRI = *MIB.getMRI();
1685
1686 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1687 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1688 MI.eraseFromParent();
1689 return true;
1690 };
1691 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1692 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1693 {MI.getOperand(2), MI.getOperand(3)});
1694 MI.eraseFromParent();
1695 return true;
1696 };
1697 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1698 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1699 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1700 MI.eraseFromParent();
1701 return true;
1702 };
1703
1704 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1705 switch (IntrinsicID) {
1706 case Intrinsic::vacopy: {
1707 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1708 unsigned VaListSize =
1709 (ST->isTargetDarwin() || ST->isTargetWindows())
1710 ? PtrSize
1711 : ST->isTargetILP32() ? 20 : 32;
1712
1713 MachineFunction &MF = *MI.getMF();
1715 LLT::scalar(VaListSize * 8));
1716 MIB.buildLoad(Val, MI.getOperand(2),
1719 VaListSize, Align(PtrSize)));
1720 MIB.buildStore(Val, MI.getOperand(1),
1723 VaListSize, Align(PtrSize)));
1724 MI.eraseFromParent();
1725 return true;
1726 }
1727 case Intrinsic::get_dynamic_area_offset: {
1728 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1729 MI.eraseFromParent();
1730 return true;
1731 }
1732 case Intrinsic::aarch64_mops_memset_tag: {
1733 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1734 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1735 // the instruction).
1736 auto &Value = MI.getOperand(3);
1737 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1738 Value.setReg(ExtValueReg);
1739 return true;
1740 }
1741 case Intrinsic::aarch64_prefetch: {
1742 auto &AddrVal = MI.getOperand(1);
1743
1744 int64_t IsWrite = MI.getOperand(2).getImm();
1745 int64_t Target = MI.getOperand(3).getImm();
1746 int64_t IsStream = MI.getOperand(4).getImm();
1747 int64_t IsData = MI.getOperand(5).getImm();
1748
1749 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1750 (!IsData << 3) | // IsDataCache bit
1751 (Target << 1) | // Cache level bits
1752 (unsigned)IsStream; // Stream bit
1753
1754 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1755 MI.eraseFromParent();
1756 return true;
1757 }
1758 case Intrinsic::aarch64_neon_uaddv:
1759 case Intrinsic::aarch64_neon_saddv:
1760 case Intrinsic::aarch64_neon_umaxv:
1761 case Intrinsic::aarch64_neon_smaxv:
1762 case Intrinsic::aarch64_neon_uminv:
1763 case Intrinsic::aarch64_neon_sminv: {
1764 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1765 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1766 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1767
1768 auto OldDst = MI.getOperand(0).getReg();
1769 auto OldDstTy = MRI.getType(OldDst);
1770 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1771 if (OldDstTy == NewDstTy)
1772 return true;
1773
1774 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1775
1776 Helper.Observer.changingInstr(MI);
1777 MI.getOperand(0).setReg(NewDst);
1778 Helper.Observer.changedInstr(MI);
1779
1780 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1781 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1782 OldDst, NewDst);
1783
1784 return true;
1785 }
1786 case Intrinsic::aarch64_neon_uaddlp:
1787 case Intrinsic::aarch64_neon_saddlp: {
1788 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1789 ? AArch64::G_UADDLP
1790 : AArch64::G_SADDLP;
1791 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1792 MI.eraseFromParent();
1793
1794 return true;
1795 }
1796 case Intrinsic::aarch64_neon_uaddlv:
1797 case Intrinsic::aarch64_neon_saddlv: {
1798 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1799 ? AArch64::G_UADDLV
1800 : AArch64::G_SADDLV;
1801 Register DstReg = MI.getOperand(0).getReg();
1802 Register SrcReg = MI.getOperand(2).getReg();
1803 LLT DstTy = MRI.getType(DstReg);
1804
1805 LLT MidTy, ExtTy;
1806 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1807 MidTy = LLT::fixed_vector(4, 32);
1808 ExtTy = LLT::scalar(32);
1809 } else {
1810 MidTy = LLT::fixed_vector(2, 64);
1811 ExtTy = LLT::scalar(64);
1812 }
1813
1814 Register MidReg =
1815 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1816 Register ZeroReg =
1817 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1818 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1819 {MidReg, ZeroReg})
1820 .getReg(0);
1821
1822 if (DstTy.getScalarSizeInBits() < 32)
1823 MIB.buildTrunc(DstReg, ExtReg);
1824 else
1825 MIB.buildCopy(DstReg, ExtReg);
1826
1827 MI.eraseFromParent();
1828
1829 return true;
1830 }
1831 case Intrinsic::aarch64_neon_smax:
1832 return LowerBinOp(TargetOpcode::G_SMAX);
1833 case Intrinsic::aarch64_neon_smin:
1834 return LowerBinOp(TargetOpcode::G_SMIN);
1835 case Intrinsic::aarch64_neon_umax:
1836 return LowerBinOp(TargetOpcode::G_UMAX);
1837 case Intrinsic::aarch64_neon_umin:
1838 return LowerBinOp(TargetOpcode::G_UMIN);
1839 case Intrinsic::aarch64_neon_fmax:
1840 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1841 case Intrinsic::aarch64_neon_fmin:
1842 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1843 case Intrinsic::aarch64_neon_fmaxnm:
1844 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1845 case Intrinsic::aarch64_neon_fminnm:
1846 return LowerBinOp(TargetOpcode::G_FMINNUM);
1847 case Intrinsic::aarch64_neon_pmull:
1848 case Intrinsic::aarch64_neon_pmull64:
1849 return LowerBinOp(AArch64::G_PMULL);
1850 case Intrinsic::aarch64_neon_smull:
1851 return LowerBinOp(AArch64::G_SMULL);
1852 case Intrinsic::aarch64_neon_umull:
1853 return LowerBinOp(AArch64::G_UMULL);
1854 case Intrinsic::aarch64_neon_sabd:
1855 return LowerBinOp(TargetOpcode::G_ABDS);
1856 case Intrinsic::aarch64_neon_uabd:
1857 return LowerBinOp(TargetOpcode::G_ABDU);
1858 case Intrinsic::aarch64_neon_uhadd:
1859 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1860 case Intrinsic::aarch64_neon_urhadd:
1861 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1862 case Intrinsic::aarch64_neon_shadd:
1863 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1864 case Intrinsic::aarch64_neon_srhadd:
1865 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1866 case Intrinsic::aarch64_neon_sqshrn: {
1867 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1868 return false;
1869 // Create right shift instruction. Store the output register in Shr.
1870 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1871 {MRI.getType(MI.getOperand(2).getReg())},
1872 {MI.getOperand(2), MI.getOperand(3).getImm()});
1873 // Build the narrow intrinsic, taking in Shr.
1874 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1875 MI.eraseFromParent();
1876 return true;
1877 }
1878 case Intrinsic::aarch64_neon_sqshrun: {
1879 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1880 return false;
1881 // Create right shift instruction. Store the output register in Shr.
1882 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1883 {MRI.getType(MI.getOperand(2).getReg())},
1884 {MI.getOperand(2), MI.getOperand(3).getImm()});
1885 // Build the narrow intrinsic, taking in Shr.
1886 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1887 MI.eraseFromParent();
1888 return true;
1889 }
1890 case Intrinsic::aarch64_neon_sqrshrn: {
1891 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1892 return false;
1893 // Create right shift instruction. Store the output register in Shr.
1894 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1895 {MRI.getType(MI.getOperand(2).getReg())},
1896 {MI.getOperand(2), MI.getOperand(3).getImm()});
1897 // Build the narrow intrinsic, taking in Shr.
1898 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1899 MI.eraseFromParent();
1900 return true;
1901 }
1902 case Intrinsic::aarch64_neon_sqrshrun: {
1903 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1904 return false;
1905 // Create right shift instruction. Store the output register in Shr.
1906 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1907 {MRI.getType(MI.getOperand(2).getReg())},
1908 {MI.getOperand(2), MI.getOperand(3).getImm()});
1909 // Build the narrow intrinsic, taking in Shr.
1910 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1911 MI.eraseFromParent();
1912 return true;
1913 }
1914 case Intrinsic::aarch64_neon_uqrshrn: {
1915 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1916 return false;
1917 // Create right shift instruction. Store the output register in Shr.
1918 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
1919 {MRI.getType(MI.getOperand(2).getReg())},
1920 {MI.getOperand(2), MI.getOperand(3).getImm()});
1921 // Build the narrow intrinsic, taking in Shr.
1922 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1923 MI.eraseFromParent();
1924 return true;
1925 }
1926 case Intrinsic::aarch64_neon_uqshrn: {
1927 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1928 return false;
1929 // Create right shift instruction. Store the output register in Shr.
1930 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
1931 {MRI.getType(MI.getOperand(2).getReg())},
1932 {MI.getOperand(2), MI.getOperand(3).getImm()});
1933 // Build the narrow intrinsic, taking in Shr.
1934 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1935 MI.eraseFromParent();
1936 return true;
1937 }
1938 case Intrinsic::aarch64_neon_sqshlu: {
1939 // Check if last operand is constant vector dup
1940 auto ShiftAmount = isConstantOrConstantSplatVector(
1941 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
1942 if (ShiftAmount) {
1943 // If so, create a new intrinsic with the correct shift amount
1944 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
1945 {MI.getOperand(2)})
1946 .addImm(ShiftAmount->getSExtValue());
1947 MI.eraseFromParent();
1948 return true;
1949 }
1950 return false;
1951 }
1952 case Intrinsic::aarch64_neon_abs: {
1953 // Lower the intrinsic to G_ABS.
1954 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1955 MI.eraseFromParent();
1956 return true;
1957 }
1958 case Intrinsic::aarch64_neon_sqadd: {
1959 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1960 return LowerBinOp(TargetOpcode::G_SADDSAT);
1961 break;
1962 }
1963 case Intrinsic::aarch64_neon_sqsub: {
1964 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1965 return LowerBinOp(TargetOpcode::G_SSUBSAT);
1966 break;
1967 }
1968 case Intrinsic::aarch64_neon_uqadd: {
1969 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1970 return LowerBinOp(TargetOpcode::G_UADDSAT);
1971 break;
1972 }
1973 case Intrinsic::aarch64_neon_uqsub: {
1974 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1975 return LowerBinOp(TargetOpcode::G_USUBSAT);
1976 break;
1977 }
1978 case Intrinsic::aarch64_neon_udot:
1979 return LowerTriOp(AArch64::G_UDOT);
1980 case Intrinsic::aarch64_neon_sdot:
1981 return LowerTriOp(AArch64::G_SDOT);
1982 case Intrinsic::aarch64_neon_usdot:
1983 return LowerTriOp(AArch64::G_USDOT);
1984 case Intrinsic::aarch64_neon_sqxtn:
1985 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
1986 case Intrinsic::aarch64_neon_sqxtun:
1987 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
1988 case Intrinsic::aarch64_neon_uqxtn:
1989 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
1990
1991 case Intrinsic::vector_reverse:
1992 // TODO: Add support for vector_reverse
1993 return false;
1994 }
1995
1996 return true;
1997}
1998
1999bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2001 GISelChangeObserver &Observer) const {
2002 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2003 MI.getOpcode() == TargetOpcode::G_LSHR ||
2004 MI.getOpcode() == TargetOpcode::G_SHL);
2005 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2006 // imported patterns can select it later. Either way, it will be legal.
2007 Register AmtReg = MI.getOperand(2).getReg();
2008 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2009 if (!VRegAndVal)
2010 return true;
2011 // Check the shift amount is in range for an immediate form.
2012 int64_t Amount = VRegAndVal->Value.getSExtValue();
2013 if (Amount > 31)
2014 return true; // This will have to remain a register variant.
2015 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
2016 Observer.changingInstr(MI);
2017 MI.getOperand(2).setReg(ExtCst.getReg(0));
2018 Observer.changedInstr(MI);
2019 return true;
2020}
2021
2024 Base = Root;
2025 Offset = 0;
2026
2027 Register NewBase;
2028 int64_t NewOffset;
2029 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2030 isShiftedInt<7, 3>(NewOffset)) {
2031 Base = NewBase;
2032 Offset = NewOffset;
2033 }
2034}
2035
2036// FIXME: This should be removed and replaced with the generic bitcast legalize
2037// action.
2038bool AArch64LegalizerInfo::legalizeLoadStore(
2040 GISelChangeObserver &Observer) const {
2041 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2042 MI.getOpcode() == TargetOpcode::G_LOAD);
2043 // Here we just try to handle vector loads/stores where our value type might
2044 // have pointer elements, which the SelectionDAG importer can't handle. To
2045 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2046 // the value to use s64 types.
2047
2048 // Custom legalization requires the instruction, if not deleted, must be fully
2049 // legalized. In order to allow further legalization of the inst, we create
2050 // a new instruction and erase the existing one.
2051
2052 Register ValReg = MI.getOperand(0).getReg();
2053 const LLT ValTy = MRI.getType(ValReg);
2054
2055 if (ValTy == LLT::scalar(128)) {
2056
2057 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2058 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2059 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2060 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2061 bool IsRcpC3 =
2062 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2063
2064 LLT s64 = LLT::scalar(64);
2065
2066 unsigned Opcode;
2067 if (IsRcpC3) {
2068 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2069 } else {
2070 // For LSE2, loads/stores should have been converted to monotonic and had
2071 // a fence inserted after them.
2072 assert(Ordering == AtomicOrdering::Monotonic ||
2073 Ordering == AtomicOrdering::Unordered);
2074 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2075
2076 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2077 }
2078
2079 MachineInstrBuilder NewI;
2080 if (IsLoad) {
2081 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2082 MIRBuilder.buildMergeLikeInstr(
2083 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2084 } else {
2085 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2086 NewI = MIRBuilder.buildInstr(
2087 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2088 }
2089
2090 if (IsRcpC3) {
2091 NewI.addUse(MI.getOperand(1).getReg());
2092 } else {
2093 Register Base;
2094 int Offset;
2095 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2096 NewI.addUse(Base);
2097 NewI.addImm(Offset / 8);
2098 }
2099
2100 NewI.cloneMemRefs(MI);
2101 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2102 *MRI.getTargetRegisterInfo(),
2103 *ST->getRegBankInfo());
2104 MI.eraseFromParent();
2105 return true;
2106 }
2107
2108 if (!ValTy.isPointerVector() ||
2109 ValTy.getElementType().getAddressSpace() != 0) {
2110 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2111 return false;
2112 }
2113
2114 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2115 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
2116 auto &MMO = **MI.memoperands_begin();
2117 MMO.setType(NewTy);
2118
2119 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2120 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2121 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2122 } else {
2123 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2124 MIRBuilder.buildBitcast(ValReg, NewLoad);
2125 }
2126 MI.eraseFromParent();
2127 return true;
2128}
2129
2130bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2132 MachineIRBuilder &MIRBuilder) const {
2133 MachineFunction &MF = MIRBuilder.getMF();
2134 Align Alignment(MI.getOperand(2).getImm());
2135 Register Dst = MI.getOperand(0).getReg();
2136 Register ListPtr = MI.getOperand(1).getReg();
2137
2138 LLT PtrTy = MRI.getType(ListPtr);
2139 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2140
2141 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2142 const Align PtrAlign = Align(PtrSize);
2143 auto List = MIRBuilder.buildLoad(
2144 PtrTy, ListPtr,
2145 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2146 PtrTy, PtrAlign));
2147
2148 MachineInstrBuilder DstPtr;
2149 if (Alignment > PtrAlign) {
2150 // Realign the list to the actual required alignment.
2151 auto AlignMinus1 =
2152 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2153 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2154 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2155 } else
2156 DstPtr = List;
2157
2158 LLT ValTy = MRI.getType(Dst);
2159 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2160 MIRBuilder.buildLoad(
2161 Dst, DstPtr,
2162 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2163 ValTy, std::max(Alignment, PtrAlign)));
2164
2165 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2166
2167 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2168
2169 MIRBuilder.buildStore(NewList, ListPtr,
2170 *MF.getMachineMemOperand(MachinePointerInfo(),
2172 PtrTy, PtrAlign));
2173
2174 MI.eraseFromParent();
2175 return true;
2176}
2177
2178bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2180 // Only legal if we can select immediate forms.
2181 // TODO: Lower this otherwise.
2182 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2183 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2184}
2185
2186bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2188 LegalizerHelper &Helper) const {
2189 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2190 // it can be more efficiently lowered to the following sequence that uses
2191 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2192 // registers are cheap.
2193 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2194 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2195 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2196 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2197 //
2198 // For 128 bit vector popcounts, we lower to the following sequence:
2199 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2200 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2201 // uaddlp.4s v0, v0 // v4s32, v2s64
2202 // uaddlp.2d v0, v0 // v2s64
2203 //
2204 // For 64 bit vector popcounts, we lower to the following sequence:
2205 // cnt.8b v0, v0 // v4s16, v2s32
2206 // uaddlp.4h v0, v0 // v4s16, v2s32
2207 // uaddlp.2s v0, v0 // v2s32
2208
2209 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2210 Register Dst = MI.getOperand(0).getReg();
2211 Register Val = MI.getOperand(1).getReg();
2212 LLT Ty = MRI.getType(Val);
2213 unsigned Size = Ty.getSizeInBits();
2214
2215 assert(Ty == MRI.getType(Dst) &&
2216 "Expected src and dst to have the same type!");
2217
2218 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2219 LLT s64 = LLT::scalar(64);
2220
2221 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2222 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2223 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2224 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2225
2226 MIRBuilder.buildZExt(Dst, Add);
2227 MI.eraseFromParent();
2228 return true;
2229 }
2230
2231 if (!ST->hasNEON() ||
2232 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2233 // Use generic lowering when custom lowering is not possible.
2234 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2235 Helper.lowerBitCount(MI) ==
2237 }
2238
2239 // Pre-conditioning: widen Val up to the nearest vector type.
2240 // s32,s64,v4s16,v2s32 -> v8i8
2241 // v8s16,v4s32,v2s64 -> v16i8
2242 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2243 if (Ty.isScalar()) {
2244 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2245 if (Size == 32) {
2246 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2247 }
2248 }
2249 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2250
2251 // Count bits in each byte-sized lane.
2252 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2253
2254 // Sum across lanes.
2255
2256 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2257 Ty.getScalarSizeInBits() != 16) {
2258 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2259 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2260 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2261 MachineInstrBuilder Sum;
2262
2263 if (Ty == LLT::fixed_vector(2, 64)) {
2264 auto UDOT =
2265 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2266 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2267 } else if (Ty == LLT::fixed_vector(4, 32)) {
2268 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2269 } else if (Ty == LLT::fixed_vector(2, 32)) {
2270 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2271 } else {
2272 llvm_unreachable("unexpected vector shape");
2273 }
2274
2275 Sum->getOperand(0).setReg(Dst);
2276 MI.eraseFromParent();
2277 return true;
2278 }
2279
2280 Register HSum = CTPOP.getReg(0);
2281 unsigned Opc;
2282 SmallVector<LLT> HAddTys;
2283 if (Ty.isScalar()) {
2284 Opc = Intrinsic::aarch64_neon_uaddlv;
2285 HAddTys.push_back(LLT::scalar(32));
2286 } else if (Ty == LLT::fixed_vector(8, 16)) {
2287 Opc = Intrinsic::aarch64_neon_uaddlp;
2288 HAddTys.push_back(LLT::fixed_vector(8, 16));
2289 } else if (Ty == LLT::fixed_vector(4, 32)) {
2290 Opc = Intrinsic::aarch64_neon_uaddlp;
2291 HAddTys.push_back(LLT::fixed_vector(8, 16));
2292 HAddTys.push_back(LLT::fixed_vector(4, 32));
2293 } else if (Ty == LLT::fixed_vector(2, 64)) {
2294 Opc = Intrinsic::aarch64_neon_uaddlp;
2295 HAddTys.push_back(LLT::fixed_vector(8, 16));
2296 HAddTys.push_back(LLT::fixed_vector(4, 32));
2297 HAddTys.push_back(LLT::fixed_vector(2, 64));
2298 } else if (Ty == LLT::fixed_vector(4, 16)) {
2299 Opc = Intrinsic::aarch64_neon_uaddlp;
2300 HAddTys.push_back(LLT::fixed_vector(4, 16));
2301 } else if (Ty == LLT::fixed_vector(2, 32)) {
2302 Opc = Intrinsic::aarch64_neon_uaddlp;
2303 HAddTys.push_back(LLT::fixed_vector(4, 16));
2304 HAddTys.push_back(LLT::fixed_vector(2, 32));
2305 } else
2306 llvm_unreachable("unexpected vector shape");
2308 for (LLT HTy : HAddTys) {
2309 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2310 HSum = UADD.getReg(0);
2311 }
2312
2313 // Post-conditioning.
2314 if (Ty.isScalar() && (Size == 64 || Size == 128))
2315 MIRBuilder.buildZExt(Dst, UADD);
2316 else
2317 UADD->getOperand(0).setReg(Dst);
2318 MI.eraseFromParent();
2319 return true;
2320}
2321
2322bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2324 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2325 LLT s64 = LLT::scalar(64);
2326 auto Addr = MI.getOperand(1).getReg();
2327 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2328 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2329 auto DstLo = MRI.createGenericVirtualRegister(s64);
2330 auto DstHi = MRI.createGenericVirtualRegister(s64);
2331
2332 MachineInstrBuilder CAS;
2333 if (ST->hasLSE()) {
2334 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2335 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2336 // the rest of the MIR so we must reassemble the extracted registers into a
2337 // 128-bit known-regclass one with code like this:
2338 //
2339 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2340 // %out = CASP %in1, ...
2341 // %OldLo = G_EXTRACT %out, 0
2342 // %OldHi = G_EXTRACT %out, 64
2343 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2344 unsigned Opcode;
2345 switch (Ordering) {
2347 Opcode = AArch64::CASPAX;
2348 break;
2350 Opcode = AArch64::CASPLX;
2351 break;
2354 Opcode = AArch64::CASPALX;
2355 break;
2356 default:
2357 Opcode = AArch64::CASPX;
2358 break;
2359 }
2360
2361 LLT s128 = LLT::scalar(128);
2362 auto CASDst = MRI.createGenericVirtualRegister(s128);
2363 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2364 auto CASNew = MRI.createGenericVirtualRegister(s128);
2365 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2366 .addUse(DesiredI->getOperand(0).getReg())
2367 .addImm(AArch64::sube64)
2368 .addUse(DesiredI->getOperand(1).getReg())
2369 .addImm(AArch64::subo64);
2370 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2371 .addUse(NewI->getOperand(0).getReg())
2372 .addImm(AArch64::sube64)
2373 .addUse(NewI->getOperand(1).getReg())
2374 .addImm(AArch64::subo64);
2375
2376 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2377
2378 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2379 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2380 } else {
2381 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2382 // can take arbitrary registers so it just has the normal GPR64 operands the
2383 // rest of AArch64 is expecting.
2384 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2385 unsigned Opcode;
2386 switch (Ordering) {
2388 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2389 break;
2391 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2392 break;
2395 Opcode = AArch64::CMP_SWAP_128;
2396 break;
2397 default:
2398 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2399 break;
2400 }
2401
2402 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2403 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2404 {Addr, DesiredI->getOperand(0),
2405 DesiredI->getOperand(1), NewI->getOperand(0),
2406 NewI->getOperand(1)});
2407 }
2408
2409 CAS.cloneMemRefs(MI);
2410 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2411 *MRI.getTargetRegisterInfo(),
2412 *ST->getRegBankInfo());
2413
2414 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2415 MI.eraseFromParent();
2416 return true;
2417}
2418
2419bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2420 LegalizerHelper &Helper) const {
2421 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2422 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2423 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2424 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2425 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2426 MI.eraseFromParent();
2427 return true;
2428}
2429
2430bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2431 LegalizerHelper &Helper) const {
2432 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2433
2434 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2435 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2436 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2437 // the instruction).
2438 auto &Value = MI.getOperand(1);
2439 Register ExtValueReg =
2440 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2441 Value.setReg(ExtValueReg);
2442 return true;
2443 }
2444
2445 return false;
2446}
2447
2448bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2450 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2451 auto VRegAndVal =
2453 if (VRegAndVal)
2454 return true;
2455 LLT VecTy = MRI.getType(Element->getVectorReg());
2456 if (VecTy.isScalableVector())
2457 return true;
2458 return Helper.lowerExtractInsertVectorElt(MI) !=
2460}
2461
2462bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2463 MachineInstr &MI, LegalizerHelper &Helper) const {
2464 MachineFunction &MF = *MI.getParent()->getParent();
2465 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2466 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2467
2468 // If stack probing is not enabled for this function, use the default
2469 // lowering.
2470 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2471 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2472 "inline-asm") {
2473 Helper.lowerDynStackAlloc(MI);
2474 return true;
2475 }
2476
2477 Register Dst = MI.getOperand(0).getReg();
2478 Register AllocSize = MI.getOperand(1).getReg();
2479 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2480
2481 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2482 "Unexpected type for dynamic alloca");
2483 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2484 "Unexpected type for dynamic alloca");
2485
2486 LLT PtrTy = MRI.getType(Dst);
2487 Register SPReg =
2489 Register SPTmp =
2490 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2491 auto NewMI =
2492 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2493 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2494 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2495 MIRBuilder.buildCopy(Dst, SPTmp);
2496
2497 MI.eraseFromParent();
2498 return true;
2499}
2500
2501bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2502 LegalizerHelper &Helper) const {
2503 MachineIRBuilder &MIB = Helper.MIRBuilder;
2504 auto &AddrVal = MI.getOperand(0);
2505
2506 int64_t IsWrite = MI.getOperand(1).getImm();
2507 int64_t Locality = MI.getOperand(2).getImm();
2508 int64_t IsData = MI.getOperand(3).getImm();
2509
2510 bool IsStream = Locality == 0;
2511 if (Locality != 0) {
2512 assert(Locality <= 3 && "Prefetch locality out-of-range");
2513 // The locality degree is the opposite of the cache speed.
2514 // Put the number the other way around.
2515 // The encoding starts at 0 for level 1
2516 Locality = 3 - Locality;
2517 }
2518
2519 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2520
2521 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2522 MI.eraseFromParent();
2523 return true;
2524}
2525
2526bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2527 MachineIRBuilder &MIRBuilder,
2528 MachineRegisterInfo &MRI) const {
2529 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2530 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2531 "Expected a power of 2 elements");
2532
2533 LLT s16 = LLT::scalar(16);
2534 LLT s32 = LLT::scalar(32);
2535 LLT s64 = LLT::scalar(64);
2536 LLT v2s16 = LLT::fixed_vector(2, s16);
2537 LLT v4s16 = LLT::fixed_vector(4, s16);
2538 LLT v2s32 = LLT::fixed_vector(2, s32);
2539 LLT v4s32 = LLT::fixed_vector(4, s32);
2540 LLT v2s64 = LLT::fixed_vector(2, s64);
2541
2542 SmallVector<Register> RegsToUnmergeTo;
2543 SmallVector<Register> TruncOddDstRegs;
2544 SmallVector<Register> RegsToMerge;
2545
2546 unsigned ElemCount = SrcTy.getNumElements();
2547
2548 // Find the biggest size chunks we can work with
2549 int StepSize = ElemCount % 4 ? 2 : 4;
2550
2551 // If we have a power of 2 greater than 2, we need to first unmerge into
2552 // enough pieces
2553 if (ElemCount <= 2)
2554 RegsToUnmergeTo.push_back(Src);
2555 else {
2556 for (unsigned i = 0; i < ElemCount / 2; ++i)
2557 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2558
2559 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2560 }
2561
2562 // Create all of the round-to-odd instructions and store them
2563 for (auto SrcReg : RegsToUnmergeTo) {
2564 Register Mid =
2565 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2566 .getReg(0);
2567 TruncOddDstRegs.push_back(Mid);
2568 }
2569
2570 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2571 // truncate 2s32 to 2s16.
2572 unsigned Index = 0;
2573 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2574 if (StepSize == 4) {
2575 Register ConcatDst =
2576 MIRBuilder
2578 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2579 .getReg(0);
2580
2581 RegsToMerge.push_back(
2582 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2583 } else {
2584 RegsToMerge.push_back(
2585 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2586 }
2587 }
2588
2589 // If there is only one register, replace the destination
2590 if (RegsToMerge.size() == 1) {
2591 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2592 MI.eraseFromParent();
2593 return true;
2594 }
2595
2596 // Merge the rest of the instructions & replace the register
2597 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2598 MRI.replaceRegWith(Dst, Fin);
2599 MI.eraseFromParent();
2600 return true;
2601}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1571
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1918
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...