LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
69 v16s8, v8s16, v4s32,
70 v2s64, v2p0,
71 /* End 128bit types */
72 /* Begin 64bit types */
73 v8s8, v4s16, v2s32};
74 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
75 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
76 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
77
78 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
79
80 // FIXME: support subtargets which have neon/fp-armv8 disabled.
81 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
83 return;
84 }
85
86 // Some instructions only support s16 if the subtarget has full 16-bit FP
87 // support.
88 const bool HasFP16 = ST.hasFullFP16();
89 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
90
91 const bool HasCSSC = ST.hasCSSC();
92 const bool HasRCPC3 = ST.hasRCPC3();
93 const bool HasSVE = ST.hasSVE();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
99 v2s64, v2p0})
100 .widenScalarToNextPow2(0)
101 .clampScalar(0, s8, s64)
104 .clampNumElements(0, v8s8, v16s8)
105 .clampNumElements(0, v4s16, v8s16)
106 .clampNumElements(0, v2s32, v4s32)
107 .clampMaxNumElements(0, s64, 2)
108 .clampMaxNumElements(0, p0, 2)
110
112 .legalFor({p0, s16, s32, s64})
113 .legalFor(PackedVectorAllTypeList)
117 .clampScalar(0, s16, s64)
118 .clampNumElements(0, v8s8, v16s8)
119 .clampNumElements(0, v4s16, v8s16)
120 .clampNumElements(0, v2s32, v4s32)
121 .clampMaxNumElements(0, s64, 2)
122 .clampMaxNumElements(0, p0, 2);
123
125 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
126 smallerThan(1, 0)))
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
130 .minScalar(1, s8)
131 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
132 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
133
135 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
136 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
137 .widenScalarToNextPow2(1)
138 .clampScalar(1, s32, s128)
140 .minScalar(0, s16)
141 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
142 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
143 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
144
145 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
146 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
147 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
148 .widenScalarToNextPow2(0)
149 .clampScalar(0, s32, s64)
150 .clampMaxNumElements(0, s8, 16)
151 .clampMaxNumElements(0, s16, 8)
152 .clampNumElements(0, v2s32, v4s32)
153 .clampNumElements(0, v2s64, v2s64)
155 [=](const LegalityQuery &Query) {
156 return Query.Types[0].getNumElements() <= 2;
157 },
158 0, s32)
159 .minScalarOrEltIf(
160 [=](const LegalityQuery &Query) {
161 return Query.Types[0].getNumElements() <= 4;
162 },
163 0, s16)
164 .minScalarOrEltIf(
165 [=](const LegalityQuery &Query) {
166 return Query.Types[0].getNumElements() <= 16;
167 },
168 0, s8)
169 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
171
173 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
174 .widenScalarToNextPow2(0)
175 .clampScalar(0, s32, s64)
176 .clampMaxNumElements(0, s8, 16)
177 .clampMaxNumElements(0, s16, 8)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 [=](const LegalityQuery &Query) {
182 return Query.Types[0].getNumElements() <= 2;
183 },
184 0, s32)
185 .minScalarOrEltIf(
186 [=](const LegalityQuery &Query) {
187 return Query.Types[0].getNumElements() <= 4;
188 },
189 0, s16)
190 .minScalarOrEltIf(
191 [=](const LegalityQuery &Query) {
192 return Query.Types[0].getNumElements() <= 16;
193 },
194 0, s8)
195 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
197
198 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
199 .customIf([=](const LegalityQuery &Query) {
200 const auto &SrcTy = Query.Types[0];
201 const auto &AmtTy = Query.Types[1];
202 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
203 AmtTy.getSizeInBits() == 32;
204 })
205 .legalFor({
206 {s32, s32},
207 {s32, s64},
208 {s64, s64},
209 {v8s8, v8s8},
210 {v16s8, v16s8},
211 {v4s16, v4s16},
212 {v8s16, v8s16},
213 {v2s32, v2s32},
214 {v4s32, v4s32},
215 {v2s64, v2s64},
216 })
217 .widenScalarToNextPow2(0)
218 .clampScalar(1, s32, s64)
219 .clampScalar(0, s32, s64)
220 .clampNumElements(0, v8s8, v16s8)
221 .clampNumElements(0, v4s16, v8s16)
222 .clampNumElements(0, v2s32, v4s32)
223 .clampNumElements(0, v2s64, v2s64)
225 .minScalarSameAs(1, 0)
229
231 .legalFor({{p0, s64}, {v2p0, v2s64}})
232 .clampScalarOrElt(1, s64, s64)
233 .clampNumElements(0, v2p0, v2p0);
234
235 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
236
237 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
238 .legalFor({s32, s64})
239 .libcallFor({s128})
240 .clampScalar(0, s32, s64)
242 .scalarize(0);
243
244 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
245 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
246 .libcallFor({s128})
248 .minScalarOrElt(0, s32)
249 .clampNumElements(0, v2s32, v4s32)
250 .clampNumElements(0, v2s64, v2s64)
251 .scalarize(0);
252
253 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
254 .widenScalarToNextPow2(0, /*Min = */ 32)
255 .clampScalar(0, s32, s64)
256 .lower();
257
258 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
259 .legalFor({s64, v16s8, v8s16, v4s32})
260 .lower();
261
262 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
263 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
264 .legalFor(HasCSSC, {s32, s64})
265 .minScalar(HasCSSC, 0, s32)
266 .clampNumElements(0, v8s8, v16s8)
267 .clampNumElements(0, v4s16, v8s16)
268 .clampNumElements(0, v2s32, v4s32)
269 .lower();
270
271 // FIXME: Legal vector types are only legal with NEON.
273 .legalFor(HasCSSC, {s32, s64})
274 .legalFor(PackedVectorAllTypeList)
275 .customIf([=](const LegalityQuery &Q) {
276 // TODO: Fix suboptimal codegen for 128+ bit types.
277 LLT SrcTy = Q.Types[0];
278 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
279 })
280 .widenScalarIf(
281 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
282 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
283 .widenScalarIf(
284 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
285 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
286 .clampNumElements(0, v8s8, v16s8)
287 .clampNumElements(0, v4s16, v8s16)
288 .clampNumElements(0, v2s32, v4s32)
289 .clampNumElements(0, v2s64, v2s64)
291 .lower();
292
294 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
295 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
296 .lower();
297
299 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
300 .legalFor({{s32, s32}, {s64, s32}})
301 .clampScalar(0, s32, s64)
302 .clampScalar(1, s32, s64)
304
305 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
306 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
307 .lower();
308
310 .legalFor({{s32, s64}, {s64, s64}})
311 .customIf([=](const LegalityQuery &Q) {
312 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
313 })
314 .lower();
316
317 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
318 .customFor({{s32, s32}, {s64, s64}});
319
320 auto always = [=](const LegalityQuery &Q) { return true; };
322 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
323 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
324 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
325 .customFor({{s128, s128},
326 {v4s16, v4s16},
327 {v8s16, v8s16},
328 {v2s32, v2s32},
329 {v4s32, v4s32},
330 {v2s64, v2s64}})
331 .clampScalar(0, s32, s128)
333 .minScalarEltSameAsIf(always, 1, 0)
334 .maxScalarEltSameAsIf(always, 1, 0)
335 .clampNumElements(0, v8s8, v16s8)
336 .clampNumElements(0, v4s16, v8s16)
337 .clampNumElements(0, v2s32, v4s32)
338 .clampNumElements(0, v2s64, v2s64)
341
342 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
343 .legalFor({{s32, s32},
344 {s64, s64},
345 {v8s8, v8s8},
346 {v16s8, v16s8},
347 {v4s16, v4s16},
348 {v8s16, v8s16},
349 {v2s32, v2s32},
350 {v4s32, v4s32}})
351 .widenScalarToNextPow2(1, /*Min=*/32)
352 .clampScalar(1, s32, s64)
353 .clampNumElements(0, v8s8, v16s8)
354 .clampNumElements(0, v4s16, v8s16)
355 .clampNumElements(0, v2s32, v4s32)
358 .scalarSameSizeAs(0, 1);
359
360 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
361
363 .lowerIf(isVector(0))
364 .widenScalarToNextPow2(1, /*Min=*/32)
365 .clampScalar(1, s32, s64)
366 .scalarSameSizeAs(0, 1)
367 .legalFor(HasCSSC, {s32, s64})
368 .customFor(!HasCSSC, {s32, s64});
369
370 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
371
372 getActionDefinitionsBuilder(G_BITREVERSE)
373 .legalFor({s32, s64, v8s8, v16s8})
374 .widenScalarToNextPow2(0, /*Min = */ 32)
376 .clampScalar(0, s32, s64)
377 .clampNumElements(0, v8s8, v16s8)
378 .clampNumElements(0, v4s16, v8s16)
379 .clampNumElements(0, v2s32, v4s32)
380 .clampNumElements(0, v2s64, v2s64)
383 .lower();
384
386 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
388 .clampScalar(0, s32, s64)
389 .clampNumElements(0, v4s16, v8s16)
390 .clampNumElements(0, v2s32, v4s32)
391 .clampNumElements(0, v2s64, v2s64)
393
394 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
395 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
396 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
397 .clampNumElements(0, v8s8, v16s8)
398 .clampNumElements(0, v4s16, v8s16)
399 .clampNumElements(0, v2s32, v4s32)
400 .clampMaxNumElements(0, s64, 2)
403 .lower();
404
406 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
407 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
408 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
409 .legalFor({s32, s64, v2s32, v4s32, v2s64})
410 .legalFor(HasFP16, {s16, v4s16, v8s16})
411 .libcallFor({s128})
412 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
413 .minScalarOrElt(0, MinFPScalar)
414 .clampNumElements(0, v4s16, v8s16)
415 .clampNumElements(0, v2s32, v4s32)
416 .clampNumElements(0, v2s64, v2s64)
418
419 getActionDefinitionsBuilder({G_FABS, G_FNEG})
420 .legalFor({s32, s64, v2s32, v4s32, v2s64})
421 .legalFor(HasFP16, {s16, v4s16, v8s16})
422 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
424 .clampNumElements(0, v4s16, v8s16)
425 .clampNumElements(0, v2s32, v4s32)
426 .clampNumElements(0, v2s64, v2s64)
428 .lowerFor({s16, v4s16, v8s16});
429
431 .libcallFor({s32, s64, s128})
432 .minScalar(0, s32)
433 .scalarize(0);
434
435 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
436 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
437 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
438 G_FSINH, G_FTANH, G_FMODF})
439 // We need a call for these, so we always need to scalarize.
440 .scalarize(0)
441 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
442 .minScalar(0, s32)
443 .libcallFor({s32, s64, s128});
444 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
445 .scalarize(0)
446 .minScalar(0, s32)
447 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
448
449 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
450 .legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
451 .legalFor(HasFP16, {{s32, s16}, {s64, s16}})
452 .minScalar(1, s32)
453 .libcallFor({{s64, s128}})
454 .lower();
455 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
456 .legalFor({{s64, s32}, {s64, s64}})
457 .legalFor(HasFP16, {{s64, s16}})
458 .minScalar(0, s64)
459 .minScalar(1, s32)
460 .libcallFor({{s64, s128}})
461 .lower();
462
463 // TODO: Custom legalization for mismatched types.
464 getActionDefinitionsBuilder(G_FCOPYSIGN)
466 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
467 [=](const LegalityQuery &Query) {
468 const LLT Ty = Query.Types[0];
469 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
470 })
471 .lower();
472
474
475 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
476 auto &Actions = getActionDefinitionsBuilder(Op);
477
478 if (Op == G_SEXTLOAD)
480
481 // Atomics have zero extending behavior.
482 Actions
483 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
484 {s32, p0, s16, 8},
485 {s32, p0, s32, 8},
486 {s64, p0, s8, 2},
487 {s64, p0, s16, 2},
488 {s64, p0, s32, 4},
489 {s64, p0, s64, 8},
490 {p0, p0, s64, 8},
491 {v2s32, p0, s64, 8}})
492 .widenScalarToNextPow2(0)
493 .clampScalar(0, s32, s64)
494 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
495 // how to do that yet.
496 .unsupportedIfMemSizeNotPow2()
497 // Lower anything left over into G_*EXT and G_LOAD
498 .lower();
499 }
500
501 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
502 const LLT &ValTy = Query.Types[0];
503 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
504 };
505
507 .customIf([=](const LegalityQuery &Query) {
508 return HasRCPC3 && Query.Types[0] == s128 &&
509 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
510 })
511 .customIf([=](const LegalityQuery &Query) {
512 return Query.Types[0] == s128 &&
513 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
514 })
515 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
516 {s16, p0, s16, 8},
517 {s32, p0, s32, 8},
518 {s64, p0, s64, 8},
519 {p0, p0, s64, 8},
520 {s128, p0, s128, 8},
521 {v8s8, p0, s64, 8},
522 {v16s8, p0, s128, 8},
523 {v4s16, p0, s64, 8},
524 {v8s16, p0, s128, 8},
525 {v2s32, p0, s64, 8},
526 {v4s32, p0, s128, 8},
527 {v2s64, p0, s128, 8}})
528 // These extends are also legal
529 .legalForTypesWithMemDesc(
530 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
531 .legalForTypesWithMemDesc({
532 // SVE vscale x 128 bit base sizes
533 {nxv16s8, p0, nxv16s8, 8},
534 {nxv8s16, p0, nxv8s16, 8},
535 {nxv4s32, p0, nxv4s32, 8},
536 {nxv2s64, p0, nxv2s64, 8},
537 })
538 .widenScalarToNextPow2(0, /* MinSize = */ 8)
539 .clampMaxNumElements(0, s8, 16)
540 .clampMaxNumElements(0, s16, 8)
541 .clampMaxNumElements(0, s32, 4)
542 .clampMaxNumElements(0, s64, 2)
543 .clampMaxNumElements(0, p0, 2)
545 .clampScalar(0, s8, s64)
547 [=](const LegalityQuery &Query) {
548 // Clamp extending load results to 32-bits.
549 return Query.Types[0].isScalar() &&
550 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
551 Query.Types[0].getSizeInBits() > 32;
552 },
553 changeTo(0, s32))
554 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
555 .bitcastIf(typeInSet(0, {v4s8}),
556 [=](const LegalityQuery &Query) {
557 const LLT VecTy = Query.Types[0];
558 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
559 })
560 .customIf(IsPtrVecPred)
561 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
562 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
563
565 .customIf([=](const LegalityQuery &Query) {
566 return HasRCPC3 && Query.Types[0] == s128 &&
567 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
568 })
569 .customIf([=](const LegalityQuery &Query) {
570 return Query.Types[0] == s128 &&
571 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
572 })
573 .widenScalarIf(
574 all(scalarNarrowerThan(0, 32),
576 changeTo(0, s32))
578 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
579 {s32, p0, s8, 8}, // truncstorei8 from s32
580 {s64, p0, s8, 8}, // truncstorei8 from s64
581 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
582 {s64, p0, s16, 8}, // truncstorei16 from s64
583 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
584 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
585 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
586 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
587 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
588 .legalForTypesWithMemDesc({
589 // SVE vscale x 128 bit base sizes
590 // TODO: Add nxv2p0. Consider bitcastIf.
591 // See #92130
592 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
593 {nxv16s8, p0, nxv16s8, 8},
594 {nxv8s16, p0, nxv8s16, 8},
595 {nxv4s32, p0, nxv4s32, 8},
596 {nxv2s64, p0, nxv2s64, 8},
597 })
598 .clampScalar(0, s8, s64)
599 .minScalarOrElt(0, s8)
600 .lowerIf([=](const LegalityQuery &Query) {
601 return Query.Types[0].isScalar() &&
602 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
603 })
604 // Maximum: sN * k = 128
605 .clampMaxNumElements(0, s8, 16)
606 .clampMaxNumElements(0, s16, 8)
607 .clampMaxNumElements(0, s32, 4)
608 .clampMaxNumElements(0, s64, 2)
609 .clampMaxNumElements(0, p0, 2)
611 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
612 .bitcastIf(all(typeInSet(0, {v4s8}),
613 LegalityPredicate([=](const LegalityQuery &Query) {
614 return Query.Types[0].getSizeInBits() ==
615 Query.MMODescrs[0].MemoryTy.getSizeInBits();
616 })),
617 [=](const LegalityQuery &Query) {
618 const LLT VecTy = Query.Types[0];
619 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
620 })
621 .customIf(IsPtrVecPred)
622 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
623 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
624 .lower();
625
626 getActionDefinitionsBuilder(G_INDEXED_STORE)
627 // Idx 0 == Ptr, Idx 1 == Val
628 // TODO: we can implement legalizations but as of now these are
629 // generated in a very specific way.
631 {p0, s8, s8, 8},
632 {p0, s16, s16, 8},
633 {p0, s32, s8, 8},
634 {p0, s32, s16, 8},
635 {p0, s32, s32, 8},
636 {p0, s64, s64, 8},
637 {p0, p0, p0, 8},
638 {p0, v8s8, v8s8, 8},
639 {p0, v16s8, v16s8, 8},
640 {p0, v4s16, v4s16, 8},
641 {p0, v8s16, v8s16, 8},
642 {p0, v2s32, v2s32, 8},
643 {p0, v4s32, v4s32, 8},
644 {p0, v2s64, v2s64, 8},
645 {p0, v2p0, v2p0, 8},
646 {p0, s128, s128, 8},
647 })
648 .unsupported();
649
650 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
651 LLT LdTy = Query.Types[0];
652 LLT PtrTy = Query.Types[1];
653 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
654 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
655 return false;
656 if (PtrTy != p0)
657 return false;
658 return true;
659 };
660 getActionDefinitionsBuilder(G_INDEXED_LOAD)
663 .legalIf(IndexedLoadBasicPred)
664 .unsupported();
665 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
666 .unsupportedIf(
668 .legalIf(all(typeInSet(0, {s16, s32, s64}),
669 LegalityPredicate([=](const LegalityQuery &Q) {
670 LLT LdTy = Q.Types[0];
671 LLT PtrTy = Q.Types[1];
672 LLT MemTy = Q.MMODescrs[0].MemoryTy;
673 if (PtrTy != p0)
674 return false;
675 if (LdTy == s16)
676 return MemTy == s8;
677 if (LdTy == s32)
678 return MemTy == s8 || MemTy == s16;
679 if (LdTy == s64)
680 return MemTy == s8 || MemTy == s16 || MemTy == s32;
681 return false;
682 })))
683 .unsupported();
684
685 // Constants
687 .legalFor({p0, s8, s16, s32, s64})
688 .widenScalarToNextPow2(0)
689 .clampScalar(0, s8, s64);
690 getActionDefinitionsBuilder(G_FCONSTANT)
691 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
692 .legalFor({s16, s32, s64, s128})
693 .clampScalar(0, MinFPScalar, s128);
694
695 // FIXME: fix moreElementsToNextPow2
697 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
699 .clampScalar(1, s32, s64)
700 .clampScalar(0, s32, s32)
703 [=](const LegalityQuery &Query) {
704 const LLT &Ty = Query.Types[0];
705 const LLT &SrcTy = Query.Types[1];
706 return Ty.isVector() && !SrcTy.isPointerVector() &&
707 Ty.getElementType() != SrcTy.getElementType();
708 },
709 0, 1)
710 .minScalarOrEltIf(
711 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
712 1, s32)
713 .minScalarOrEltIf(
714 [=](const LegalityQuery &Query) {
715 return Query.Types[1].isPointerVector();
716 },
717 0, s64)
719 .clampNumElements(1, v8s8, v16s8)
720 .clampNumElements(1, v4s16, v8s16)
721 .clampNumElements(1, v2s32, v4s32)
722 .clampNumElements(1, v2s64, v2s64)
723 .clampNumElements(1, v2p0, v2p0)
724 .customIf(isVector(0));
725
727 .legalFor({{s32, s32},
728 {s32, s64},
729 {v4s32, v4s32},
730 {v2s32, v2s32},
731 {v2s64, v2s64}})
732 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
734 .clampScalar(0, s32, s32)
735 .minScalarOrElt(1, MinFPScalar)
738 [=](const LegalityQuery &Query) {
739 const LLT &Ty = Query.Types[0];
740 const LLT &SrcTy = Query.Types[1];
741 return Ty.isVector() && !SrcTy.isPointerVector() &&
742 Ty.getElementType() != SrcTy.getElementType();
743 },
744 0, 1)
745 .clampNumElements(1, v4s16, v8s16)
746 .clampNumElements(1, v2s32, v4s32)
747 .clampMaxNumElements(1, s64, 2)
749 .libcallFor({{s32, s128}});
750
751 // Extensions
752 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
753 unsigned DstSize = Query.Types[0].getSizeInBits();
754
755 // Handle legal vectors using legalFor
756 if (Query.Types[0].isVector())
757 return false;
758
759 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
760 return false; // Extending to a scalar s128 needs narrowing.
761
762 const LLT &SrcTy = Query.Types[1];
763
764 // Make sure we fit in a register otherwise. Don't bother checking that
765 // the source type is below 128 bits. We shouldn't be allowing anything
766 // through which is wider than the destination in the first place.
767 unsigned SrcSize = SrcTy.getSizeInBits();
768 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
769 return false;
770
771 return true;
772 };
773 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
774 .legalIf(ExtLegalFunc)
775 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
776 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
778 .clampMaxNumElements(1, s8, 8)
779 .clampMaxNumElements(1, s16, 4)
780 .clampMaxNumElements(1, s32, 2)
781 // Tries to convert a large EXTEND into two smaller EXTENDs
782 .lowerIf([=](const LegalityQuery &Query) {
783 return (Query.Types[0].getScalarSizeInBits() >
784 Query.Types[1].getScalarSizeInBits() * 2) &&
785 Query.Types[0].isVector() &&
786 (Query.Types[1].getScalarSizeInBits() == 8 ||
787 Query.Types[1].getScalarSizeInBits() == 16);
788 })
789 .clampMinNumElements(1, s8, 8)
790 .clampMinNumElements(1, s16, 4)
792
794 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
796 .clampMaxNumElements(0, s8, 8)
797 .clampMaxNumElements(0, s16, 4)
798 .clampMaxNumElements(0, s32, 2)
800 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
801 0, s8)
802 .lowerIf([=](const LegalityQuery &Query) {
803 LLT DstTy = Query.Types[0];
804 LLT SrcTy = Query.Types[1];
805 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
806 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
807 })
808 .clampMinNumElements(0, s8, 8)
809 .clampMinNumElements(0, s16, 4)
810 .alwaysLegal();
811
812 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
813 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
814 .clampNumElements(0, v2s32, v2s32);
815
816 getActionDefinitionsBuilder(G_SEXT_INREG)
817 .legalFor({s32, s64})
818 .legalFor(PackedVectorAllTypeList)
819 .maxScalar(0, s64)
820 .clampNumElements(0, v8s8, v16s8)
821 .clampNumElements(0, v4s16, v8s16)
822 .clampNumElements(0, v2s32, v4s32)
823 .clampMaxNumElements(0, s64, 2)
824 .lower();
825
826 // FP conversions
828 .legalFor(
829 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
830 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
832 .customIf([](const LegalityQuery &Q) {
833 LLT DstTy = Q.Types[0];
834 LLT SrcTy = Q.Types[1];
835 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
836 SrcTy.getScalarSizeInBits() == 64 &&
837 DstTy.getScalarSizeInBits() == 16;
838 })
839 // Clamp based on input
840 .clampNumElements(1, v4s32, v4s32)
841 .clampNumElements(1, v2s64, v2s64)
842 .scalarize(0);
843
845 .legalFor(
846 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
847 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
850 [](const LegalityQuery &Q) {
851 LLT DstTy = Q.Types[0];
852 LLT SrcTy = Q.Types[1];
853 return SrcTy.isVector() && DstTy.isVector() &&
854 SrcTy.getScalarSizeInBits() == 16 &&
855 DstTy.getScalarSizeInBits() == 64;
856 },
857 changeElementTo(1, s32))
858 .clampNumElements(0, v4s32, v4s32)
859 .clampNumElements(0, v2s64, v2s64)
860 .scalarize(0);
861
862 // Conversions
863 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
864 .legalFor({{s32, s32},
865 {s64, s32},
866 {s32, s64},
867 {s64, s64},
868 {v2s32, v2s32},
869 {v4s32, v4s32},
870 {v2s64, v2s64}})
871 .legalFor(HasFP16,
872 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
873 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
875 // The range of a fp16 value fits into an i17, so we can lower the width
876 // to i64.
878 [=](const LegalityQuery &Query) {
879 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
880 },
881 changeTo(0, s64))
884 .minScalar(0, s32)
885 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
887 [=](const LegalityQuery &Query) {
888 return Query.Types[0].getScalarSizeInBits() <= 64 &&
889 Query.Types[0].getScalarSizeInBits() >
890 Query.Types[1].getScalarSizeInBits();
891 },
893 .widenScalarIf(
894 [=](const LegalityQuery &Query) {
895 return Query.Types[1].getScalarSizeInBits() <= 64 &&
896 Query.Types[0].getScalarSizeInBits() <
897 Query.Types[1].getScalarSizeInBits();
898 },
900 .clampNumElements(0, v4s16, v8s16)
901 .clampNumElements(0, v2s32, v4s32)
902 .clampMaxNumElements(0, s64, 2)
903 .libcallFor(
904 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
905
906 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
907 .legalFor({{s32, s32},
908 {s64, s32},
909 {s32, s64},
910 {s64, s64},
911 {v2s32, v2s32},
912 {v4s32, v4s32},
913 {v2s64, v2s64}})
914 .legalFor(
915 HasFP16,
916 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
917 // Handle types larger than i64 by scalarizing/lowering.
918 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
920 // The range of a fp16 value fits into an i17, so we can lower the width
921 // to i64.
923 [=](const LegalityQuery &Query) {
924 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
925 },
926 changeTo(0, s64))
927 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
929 .widenScalarToNextPow2(0, /*MinSize=*/32)
930 .minScalar(0, s32)
931 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
933 [=](const LegalityQuery &Query) {
934 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
935 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
936 ITySize > Query.Types[1].getScalarSizeInBits();
937 },
939 .widenScalarIf(
940 [=](const LegalityQuery &Query) {
941 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
942 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
943 Query.Types[0].getScalarSizeInBits() < FTySize;
944 },
947 .clampNumElements(0, v4s16, v8s16)
948 .clampNumElements(0, v2s32, v4s32)
949 .clampMaxNumElements(0, s64, 2);
950
951 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
952 .legalFor({{s32, s32},
953 {s64, s32},
954 {s32, s64},
955 {s64, s64},
956 {v2s32, v2s32},
957 {v4s32, v4s32},
958 {v2s64, v2s64}})
959 .legalFor(HasFP16,
960 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
961 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
965 .minScalar(1, s32)
966 .lowerIf([](const LegalityQuery &Query) {
967 return Query.Types[1].isVector() &&
968 Query.Types[1].getScalarSizeInBits() == 64 &&
969 Query.Types[0].getScalarSizeInBits() == 16;
970 })
971 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
973 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
974 [](const LegalityQuery &Query) {
975 return Query.Types[0].getScalarSizeInBits() == 32 &&
976 Query.Types[1].getScalarSizeInBits() == 64;
977 },
978 0)
979 .widenScalarIf(
980 [](const LegalityQuery &Query) {
981 return Query.Types[1].getScalarSizeInBits() <= 64 &&
982 Query.Types[0].getScalarSizeInBits() <
983 Query.Types[1].getScalarSizeInBits();
984 },
986 .widenScalarIf(
987 [](const LegalityQuery &Query) {
988 return Query.Types[0].getScalarSizeInBits() <= 64 &&
989 Query.Types[0].getScalarSizeInBits() >
990 Query.Types[1].getScalarSizeInBits();
991 },
993 .clampNumElements(0, v4s16, v8s16)
994 .clampNumElements(0, v2s32, v4s32)
995 .clampMaxNumElements(0, s64, 2)
996 .libcallFor({{s16, s128},
997 {s32, s128},
998 {s64, s128},
999 {s128, s128},
1000 {s128, s32},
1001 {s128, s64}});
1002
1003 // Control-flow
1006 .legalFor({s32})
1007 .clampScalar(0, s32, s32);
1008 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1009
1011 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1012 .widenScalarToNextPow2(0)
1013 .clampScalar(0, s32, s64)
1014 .clampScalar(1, s32, s32)
1017 .lowerIf(isVector(0));
1018
1019 // Pointer-handling
1020 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1021
1022 if (TM.getCodeModel() == CodeModel::Small)
1023 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1024 else
1025 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1026
1027 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1028 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1029
1030 getActionDefinitionsBuilder(G_PTRTOINT)
1031 .legalFor({{s64, p0}, {v2s64, v2p0}})
1032 .widenScalarToNextPow2(0, 64)
1033 .clampScalar(0, s64, s64)
1034 .clampMaxNumElements(0, s64, 2);
1035
1036 getActionDefinitionsBuilder(G_INTTOPTR)
1037 .unsupportedIf([&](const LegalityQuery &Query) {
1038 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1039 })
1040 .legalFor({{p0, s64}, {v2p0, v2s64}})
1041 .clampMaxNumElements(1, s64, 2);
1042
1043 // Casts for 32 and 64-bit width type are just copies.
1044 // Same for 128-bit width type, except they are on the FPR bank.
1046 // Keeping 32-bit instructions legal to prevent regression in some tests
1047 .legalForCartesianProduct({s32, v2s16, v4s8})
1048 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1049 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1050 .customIf([=](const LegalityQuery &Query) {
1051 // Handle casts from i1 vectors to scalars.
1052 LLT DstTy = Query.Types[0];
1053 LLT SrcTy = Query.Types[1];
1054 return DstTy.isScalar() && SrcTy.isVector() &&
1055 SrcTy.getScalarSizeInBits() == 1;
1056 })
1057 .lowerIf([=](const LegalityQuery &Query) {
1058 return Query.Types[0].isVector() != Query.Types[1].isVector();
1059 })
1061 .clampNumElements(0, v8s8, v16s8)
1062 .clampNumElements(0, v4s16, v8s16)
1063 .clampNumElements(0, v2s32, v4s32)
1064 .lower();
1065
1066 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1067
1068 // va_list must be a pointer, but most sized types are pretty easy to handle
1069 // as the destination.
1071 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1072 .clampScalar(0, s8, s64)
1073 .widenScalarToNextPow2(0, /*Min*/ 8);
1074
1075 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1076 .lowerIf(
1077 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1078
1079 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1080
1081 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1082 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1083 .customFor(!UseOutlineAtomics, {{s128, p0}})
1084 .libcallFor(UseOutlineAtomics,
1085 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1086 .clampScalar(0, s32, s64);
1087
1088 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1089 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1090 G_ATOMICRMW_XOR})
1091 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1092 .libcallFor(UseOutlineAtomics,
1093 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1094 .clampScalar(0, s32, s64);
1095
1096 // Do not outline these atomics operations, as per comment in
1097 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1099 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1100 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1101 .clampScalar(0, s32, s64);
1102
1103 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1104
1105 // Merge/Unmerge
1106 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1107 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1108 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1110 .widenScalarToNextPow2(LitTyIdx, 8)
1111 .widenScalarToNextPow2(BigTyIdx, 32)
1112 .clampScalar(LitTyIdx, s8, s64)
1113 .clampScalar(BigTyIdx, s32, s128)
1114 .legalIf([=](const LegalityQuery &Q) {
1115 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1116 case 32:
1117 case 64:
1118 case 128:
1119 break;
1120 default:
1121 return false;
1122 }
1123 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1124 case 8:
1125 case 16:
1126 case 32:
1127 case 64:
1128 return true;
1129 default:
1130 return false;
1131 }
1132 });
1133 }
1134
1135 // TODO : nxv4s16, nxv2s16, nxv2s32
1136 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1137 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1138 {s16, nxv8s16, s64},
1139 {s32, nxv4s32, s64},
1140 {s64, nxv2s64, s64}})
1141 .unsupportedIf([=](const LegalityQuery &Query) {
1142 const LLT &EltTy = Query.Types[1].getElementType();
1143 if (Query.Types[1].isScalableVector())
1144 return false;
1145 return Query.Types[0] != EltTy;
1146 })
1147 .minScalar(2, s64)
1148 .customIf([=](const LegalityQuery &Query) {
1149 const LLT &VecTy = Query.Types[1];
1150 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1151 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1152 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1153 })
1154 .minScalarOrEltIf(
1155 [=](const LegalityQuery &Query) {
1156 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1157 // cause the total vec size to be > 128b.
1158 return Query.Types[1].isFixedVector() &&
1159 Query.Types[1].getNumElements() <= 2;
1160 },
1161 0, s64)
1162 .minScalarOrEltIf(
1163 [=](const LegalityQuery &Query) {
1164 return Query.Types[1].isFixedVector() &&
1165 Query.Types[1].getNumElements() <= 4;
1166 },
1167 0, s32)
1168 .minScalarOrEltIf(
1169 [=](const LegalityQuery &Query) {
1170 return Query.Types[1].isFixedVector() &&
1171 Query.Types[1].getNumElements() <= 8;
1172 },
1173 0, s16)
1174 .minScalarOrEltIf(
1175 [=](const LegalityQuery &Query) {
1176 return Query.Types[1].isFixedVector() &&
1177 Query.Types[1].getNumElements() <= 16;
1178 },
1179 0, s8)
1180 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1182 .clampMaxNumElements(1, s64, 2)
1183 .clampMaxNumElements(1, s32, 4)
1184 .clampMaxNumElements(1, s16, 8)
1185 .clampMaxNumElements(1, s8, 16)
1186 .clampMaxNumElements(1, p0, 2)
1188
1189 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1190 .legalIf(
1191 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1192 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1193 {nxv8s16, s32, s64},
1194 {nxv4s32, s32, s64},
1195 {nxv2s64, s64, s64}})
1198 .clampNumElements(0, v8s8, v16s8)
1199 .clampNumElements(0, v4s16, v8s16)
1200 .clampNumElements(0, v2s32, v4s32)
1201 .clampMaxNumElements(0, s64, 2)
1202 .clampMaxNumElements(0, p0, 2)
1204
1205 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1206 .legalFor({{v8s8, s8},
1207 {v16s8, s8},
1208 {v4s16, s16},
1209 {v8s16, s16},
1210 {v2s32, s32},
1211 {v4s32, s32},
1212 {v2s64, s64},
1213 {v2p0, p0}})
1214 .clampNumElements(0, v4s32, v4s32)
1215 .clampNumElements(0, v2s64, v2s64)
1216 .minScalarOrElt(0, s8)
1219 .minScalarSameAs(1, 0);
1220
1221 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1222
1223 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1224 .legalIf([=](const LegalityQuery &Query) {
1225 const LLT &DstTy = Query.Types[0];
1226 const LLT &SrcTy = Query.Types[1];
1227 // For now just support the TBL2 variant which needs the source vectors
1228 // to be the same size as the dest.
1229 if (DstTy != SrcTy)
1230 return false;
1231 return llvm::is_contained(
1232 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1233 })
1234 .moreElementsIf(
1235 [](const LegalityQuery &Query) {
1236 return Query.Types[0].getNumElements() >
1237 Query.Types[1].getNumElements();
1238 },
1239 changeTo(1, 0))
1242 [](const LegalityQuery &Query) {
1243 return Query.Types[0].getNumElements() <
1244 Query.Types[1].getNumElements();
1245 },
1246 changeTo(0, 1))
1247 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1248 .clampNumElements(0, v8s8, v16s8)
1249 .clampNumElements(0, v4s16, v8s16)
1250 .clampNumElements(0, v4s32, v4s32)
1251 .clampNumElements(0, v2s64, v2s64)
1253 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1254 // Bitcast pointers vector to i64.
1255 const LLT DstTy = Query.Types[0];
1256 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1257 });
1258
1259 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1260 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1261 .bitcastIf(
1262 [=](const LegalityQuery &Query) {
1263 return Query.Types[0].isFixedVector() &&
1264 Query.Types[1].isFixedVector() &&
1265 Query.Types[0].getScalarSizeInBits() >= 8 &&
1266 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1267 Query.Types[0].getSizeInBits() <= 128 &&
1268 Query.Types[1].getSizeInBits() <= 64;
1269 },
1270 [=](const LegalityQuery &Query) {
1271 const LLT DstTy = Query.Types[0];
1272 const LLT SrcTy = Query.Types[1];
1273 return std::pair(
1274 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1277 SrcTy.getNumElements())));
1278 });
1279
1280 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1281 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1283 .immIdx(0); // Inform verifier imm idx 0 is handled.
1284
1285 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1286 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1287 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1288
1289 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1290
1291 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1292
1293 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1294
1295 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1296
1297 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1298
1299 if (ST.hasMOPS()) {
1300 // G_BZERO is not supported. Currently it is only emitted by
1301 // PreLegalizerCombiner for G_MEMSET with zero constant.
1303
1305 .legalForCartesianProduct({p0}, {s64}, {s64})
1306 .customForCartesianProduct({p0}, {s8}, {s64})
1307 .immIdx(0); // Inform verifier imm idx 0 is handled.
1308
1309 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1310 .legalForCartesianProduct({p0}, {p0}, {s64})
1311 .immIdx(0); // Inform verifier imm idx 0 is handled.
1312
1313 // G_MEMCPY_INLINE does not have a tailcall immediate
1314 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1315 .legalForCartesianProduct({p0}, {p0}, {s64});
1316
1317 } else {
1318 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1319 .libcall();
1320 }
1321
1322 // For fadd reductions we have pairwise operations available. We treat the
1323 // usual legal types as legal and handle the lowering to pairwise instructions
1324 // later.
1325 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1326 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1327 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1328 .minScalarOrElt(0, MinFPScalar)
1329 .clampMaxNumElements(1, s64, 2)
1330 .clampMaxNumElements(1, s32, 4)
1331 .clampMaxNumElements(1, s16, 8)
1333 .scalarize(1)
1334 .lower();
1335
1336 // For fmul reductions we need to split up into individual operations. We
1337 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1338 // smaller types, followed by scalarizing what remains.
1339 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1340 .minScalarOrElt(0, MinFPScalar)
1341 .clampMaxNumElements(1, s64, 2)
1342 .clampMaxNumElements(1, s32, 4)
1343 .clampMaxNumElements(1, s16, 8)
1344 .clampMaxNumElements(1, s32, 2)
1345 .clampMaxNumElements(1, s16, 4)
1346 .scalarize(1)
1347 .lower();
1348
1349 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1350 .scalarize(2)
1351 .lower();
1352
1353 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1354 .legalFor({{s8, v8s8},
1355 {s8, v16s8},
1356 {s16, v4s16},
1357 {s16, v8s16},
1358 {s32, v2s32},
1359 {s32, v4s32},
1360 {s64, v2s64}})
1362 .clampMaxNumElements(1, s64, 2)
1363 .clampMaxNumElements(1, s32, 4)
1364 .clampMaxNumElements(1, s16, 8)
1365 .clampMaxNumElements(1, s8, 16)
1367 .scalarize(1);
1368
1369 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1370 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1371 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1372 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1373 .minScalarOrElt(0, MinFPScalar)
1374 .clampMaxNumElements(1, s64, 2)
1375 .clampMaxNumElements(1, s32, 4)
1376 .clampMaxNumElements(1, s16, 8)
1377 .scalarize(1)
1378 .lower();
1379
1380 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1381 .clampMaxNumElements(1, s32, 2)
1382 .clampMaxNumElements(1, s16, 4)
1383 .clampMaxNumElements(1, s8, 8)
1384 .scalarize(1)
1385 .lower();
1386
1388 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1389 .legalFor({{s8, v8s8},
1390 {s8, v16s8},
1391 {s16, v4s16},
1392 {s16, v8s16},
1393 {s32, v2s32},
1394 {s32, v4s32}})
1395 .moreElementsIf(
1396 [=](const LegalityQuery &Query) {
1397 return Query.Types[1].isVector() &&
1398 Query.Types[1].getElementType() != s8 &&
1399 Query.Types[1].getNumElements() & 1;
1400 },
1402 .clampMaxNumElements(1, s64, 2)
1403 .clampMaxNumElements(1, s32, 4)
1404 .clampMaxNumElements(1, s16, 8)
1405 .clampMaxNumElements(1, s8, 16)
1406 .scalarize(1)
1407 .lower();
1408
1410 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1411 // Try to break down into smaller vectors as long as they're at least 64
1412 // bits. This lets us use vector operations for some parts of the
1413 // reduction.
1414 .fewerElementsIf(
1415 [=](const LegalityQuery &Q) {
1416 LLT SrcTy = Q.Types[1];
1417 if (SrcTy.isScalar())
1418 return false;
1419 if (!isPowerOf2_32(SrcTy.getNumElements()))
1420 return false;
1421 // We can usually perform 64b vector operations.
1422 return SrcTy.getSizeInBits() > 64;
1423 },
1424 [=](const LegalityQuery &Q) {
1425 LLT SrcTy = Q.Types[1];
1426 return std::make_pair(1, SrcTy.divide(2));
1427 })
1428 .scalarize(1)
1429 .lower();
1430
1431 // TODO: Update this to correct handling when adding AArch64/SVE support.
1432 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1433
1434 // Access to floating-point environment.
1435 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1436 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1437 .libcall();
1438
1439 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1440
1441 getActionDefinitionsBuilder(G_PREFETCH).custom();
1442
1443 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1444
1446 verify(*ST.getInstrInfo());
1447}
1448
1451 LostDebugLocObserver &LocObserver) const {
1452 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1453 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1454 GISelChangeObserver &Observer = Helper.Observer;
1455 switch (MI.getOpcode()) {
1456 default:
1457 // No idea what to do.
1458 return false;
1459 case TargetOpcode::G_VAARG:
1460 return legalizeVaArg(MI, MRI, MIRBuilder);
1461 case TargetOpcode::G_LOAD:
1462 case TargetOpcode::G_STORE:
1463 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1464 case TargetOpcode::G_SHL:
1465 case TargetOpcode::G_ASHR:
1466 case TargetOpcode::G_LSHR:
1467 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1468 case TargetOpcode::G_GLOBAL_VALUE:
1469 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1470 case TargetOpcode::G_SBFX:
1471 case TargetOpcode::G_UBFX:
1472 return legalizeBitfieldExtract(MI, MRI, Helper);
1473 case TargetOpcode::G_FSHL:
1474 case TargetOpcode::G_FSHR:
1475 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1476 case TargetOpcode::G_ROTR:
1477 return legalizeRotate(MI, MRI, Helper);
1478 case TargetOpcode::G_CTPOP:
1479 return legalizeCTPOP(MI, MRI, Helper);
1480 case TargetOpcode::G_ATOMIC_CMPXCHG:
1481 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1482 case TargetOpcode::G_CTTZ:
1483 return legalizeCTTZ(MI, Helper);
1484 case TargetOpcode::G_BZERO:
1485 case TargetOpcode::G_MEMCPY:
1486 case TargetOpcode::G_MEMMOVE:
1487 case TargetOpcode::G_MEMSET:
1488 return legalizeMemOps(MI, Helper);
1489 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1490 return legalizeExtractVectorElt(MI, MRI, Helper);
1491 case TargetOpcode::G_DYN_STACKALLOC:
1492 return legalizeDynStackAlloc(MI, Helper);
1493 case TargetOpcode::G_PREFETCH:
1494 return legalizePrefetch(MI, Helper);
1495 case TargetOpcode::G_ABS:
1496 return Helper.lowerAbsToCNeg(MI);
1497 case TargetOpcode::G_ICMP:
1498 return legalizeICMP(MI, MRI, MIRBuilder);
1499 case TargetOpcode::G_BITCAST:
1500 return legalizeBitcast(MI, Helper);
1501 case TargetOpcode::G_FPTRUNC:
1502 // In order to lower f16 to f64 properly, we need to use f32 as an
1503 // intermediary
1504 return legalizeFptrunc(MI, MIRBuilder, MRI);
1505 }
1506
1507 llvm_unreachable("expected switch to return");
1508}
1509
1510bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1511 LegalizerHelper &Helper) const {
1512 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1513 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1514 // We're trying to handle casts from i1 vectors to scalars but reloading from
1515 // stack.
1516 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1517 SrcTy.getElementType() != LLT::scalar(1))
1518 return false;
1519
1520 Helper.createStackStoreLoad(DstReg, SrcReg);
1521 MI.eraseFromParent();
1522 return true;
1523}
1524
1525bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1527 MachineIRBuilder &MIRBuilder,
1528 GISelChangeObserver &Observer,
1529 LegalizerHelper &Helper) const {
1530 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1531 MI.getOpcode() == TargetOpcode::G_FSHR);
1532
1533 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1534 // lowering
1535 Register ShiftNo = MI.getOperand(3).getReg();
1536 LLT ShiftTy = MRI.getType(ShiftNo);
1537 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1538
1539 // Adjust shift amount according to Opcode (FSHL/FSHR)
1540 // Convert FSHL to FSHR
1541 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1542 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1543
1544 // Lower non-constant shifts and leave zero shifts to the optimizer.
1545 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1546 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1548
1549 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1550
1551 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1552
1553 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1554 // in the range of 0 <-> BitWidth, it is legal
1555 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1556 VRegAndVal->Value.ult(BitWidth))
1557 return true;
1558
1559 // Cast the ShiftNumber to a 64-bit type
1560 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1561
1562 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1563 Observer.changingInstr(MI);
1564 MI.getOperand(3).setReg(Cast64.getReg(0));
1565 Observer.changedInstr(MI);
1566 }
1567 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1568 // instruction
1569 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1570 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1571 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1572 Cast64.getReg(0)});
1573 MI.eraseFromParent();
1574 }
1575 return true;
1576}
1577
1578bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1580 MachineIRBuilder &MIRBuilder) const {
1581 Register DstReg = MI.getOperand(0).getReg();
1582 Register SrcReg1 = MI.getOperand(2).getReg();
1583 Register SrcReg2 = MI.getOperand(3).getReg();
1584 LLT DstTy = MRI.getType(DstReg);
1585 LLT SrcTy = MRI.getType(SrcReg1);
1586
1587 // Check the vector types are legal
1588 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1589 DstTy.getNumElements() != SrcTy.getNumElements() ||
1590 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1591 return false;
1592
1593 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1594 // following passes
1595 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1596 if (Pred != CmpInst::ICMP_NE)
1597 return true;
1598 Register CmpReg =
1599 MIRBuilder
1600 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1601 .getReg(0);
1602 MIRBuilder.buildNot(DstReg, CmpReg);
1603
1604 MI.eraseFromParent();
1605 return true;
1606}
1607
1608bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1610 LegalizerHelper &Helper) const {
1611 // To allow for imported patterns to match, we ensure that the rotate amount
1612 // is 64b with an extension.
1613 Register AmtReg = MI.getOperand(2).getReg();
1614 LLT AmtTy = MRI.getType(AmtReg);
1615 (void)AmtTy;
1616 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1617 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1618 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1619 Helper.Observer.changingInstr(MI);
1620 MI.getOperand(2).setReg(NewAmt.getReg(0));
1621 Helper.Observer.changedInstr(MI);
1622 return true;
1623}
1624
1625bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1627 GISelChangeObserver &Observer) const {
1628 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1629 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1630 // G_ADD_LOW instructions.
1631 // By splitting this here, we can optimize accesses in the small code model by
1632 // folding in the G_ADD_LOW into the load/store offset.
1633 auto &GlobalOp = MI.getOperand(1);
1634 // Don't modify an intrinsic call.
1635 if (GlobalOp.isSymbol())
1636 return true;
1637 const auto* GV = GlobalOp.getGlobal();
1638 if (GV->isThreadLocal())
1639 return true; // Don't want to modify TLS vars.
1640
1641 auto &TM = ST->getTargetLowering()->getTargetMachine();
1642 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1643
1644 if (OpFlags & AArch64II::MO_GOT)
1645 return true;
1646
1647 auto Offset = GlobalOp.getOffset();
1648 Register DstReg = MI.getOperand(0).getReg();
1649 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1650 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1651 // Set the regclass on the dest reg too.
1652 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1653
1654 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1655 // by creating a MOVK that sets bits 48-63 of the register to (global address
1656 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1657 // prevent an incorrect tag being generated during relocation when the
1658 // global appears before the code section. Without the offset, a global at
1659 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1660 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1661 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1662 // instead of `0xf`.
1663 // This assumes that we're in the small code model so we can assume a binary
1664 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1665 // binary must also be loaded into address range [0, 2^48). Both of these
1666 // properties need to be ensured at runtime when using tagged addresses.
1667 if (OpFlags & AArch64II::MO_TAGGED) {
1668 assert(!Offset &&
1669 "Should not have folded in an offset for a tagged global!");
1670 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1671 .addGlobalAddress(GV, 0x100000000,
1673 .addImm(48);
1674 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1675 }
1676
1677 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1678 .addGlobalAddress(GV, Offset,
1680 MI.eraseFromParent();
1681 return true;
1682}
1683
1685 MachineInstr &MI) const {
1686 MachineIRBuilder &MIB = Helper.MIRBuilder;
1687 MachineRegisterInfo &MRI = *MIB.getMRI();
1688
1689 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1690 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1691 MI.eraseFromParent();
1692 return true;
1693 };
1694 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1695 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1696 {MI.getOperand(2), MI.getOperand(3)});
1697 MI.eraseFromParent();
1698 return true;
1699 };
1700 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1701 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1702 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1703 MI.eraseFromParent();
1704 return true;
1705 };
1706
1707 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1708 switch (IntrinsicID) {
1709 case Intrinsic::vacopy: {
1710 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1711 unsigned VaListSize =
1712 (ST->isTargetDarwin() || ST->isTargetWindows())
1713 ? PtrSize
1714 : ST->isTargetILP32() ? 20 : 32;
1715
1716 MachineFunction &MF = *MI.getMF();
1718 LLT::scalar(VaListSize * 8));
1719 MIB.buildLoad(Val, MI.getOperand(2),
1722 VaListSize, Align(PtrSize)));
1723 MIB.buildStore(Val, MI.getOperand(1),
1726 VaListSize, Align(PtrSize)));
1727 MI.eraseFromParent();
1728 return true;
1729 }
1730 case Intrinsic::get_dynamic_area_offset: {
1731 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1732 MI.eraseFromParent();
1733 return true;
1734 }
1735 case Intrinsic::aarch64_mops_memset_tag: {
1736 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1737 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1738 // the instruction).
1739 auto &Value = MI.getOperand(3);
1740 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1741 Value.setReg(ExtValueReg);
1742 return true;
1743 }
1744 case Intrinsic::aarch64_prefetch: {
1745 auto &AddrVal = MI.getOperand(1);
1746
1747 int64_t IsWrite = MI.getOperand(2).getImm();
1748 int64_t Target = MI.getOperand(3).getImm();
1749 int64_t IsStream = MI.getOperand(4).getImm();
1750 int64_t IsData = MI.getOperand(5).getImm();
1751
1752 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1753 (!IsData << 3) | // IsDataCache bit
1754 (Target << 1) | // Cache level bits
1755 (unsigned)IsStream; // Stream bit
1756
1757 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1758 MI.eraseFromParent();
1759 return true;
1760 }
1761 case Intrinsic::aarch64_range_prefetch: {
1762 auto &AddrVal = MI.getOperand(1);
1763
1764 int64_t IsWrite = MI.getOperand(2).getImm();
1765 int64_t IsStream = MI.getOperand(3).getImm();
1766 unsigned PrfOp = (IsStream << 2) | IsWrite;
1767
1768 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1769 .addImm(PrfOp)
1770 .add(AddrVal)
1771 .addUse(MI.getOperand(4).getReg()); // Metadata
1772 MI.eraseFromParent();
1773 return true;
1774 }
1775 case Intrinsic::aarch64_prefetch_ir: {
1776 auto &AddrVal = MI.getOperand(1);
1777 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1778 MI.eraseFromParent();
1779 return true;
1780 }
1781 case Intrinsic::aarch64_neon_uaddv:
1782 case Intrinsic::aarch64_neon_saddv:
1783 case Intrinsic::aarch64_neon_umaxv:
1784 case Intrinsic::aarch64_neon_smaxv:
1785 case Intrinsic::aarch64_neon_uminv:
1786 case Intrinsic::aarch64_neon_sminv: {
1787 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1788 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1789 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1790
1791 auto OldDst = MI.getOperand(0).getReg();
1792 auto OldDstTy = MRI.getType(OldDst);
1793 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1794 if (OldDstTy == NewDstTy)
1795 return true;
1796
1797 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1798
1799 Helper.Observer.changingInstr(MI);
1800 MI.getOperand(0).setReg(NewDst);
1801 Helper.Observer.changedInstr(MI);
1802
1803 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1804 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1805 OldDst, NewDst);
1806
1807 return true;
1808 }
1809 case Intrinsic::aarch64_neon_uaddlp:
1810 case Intrinsic::aarch64_neon_saddlp: {
1811 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1812 ? AArch64::G_UADDLP
1813 : AArch64::G_SADDLP;
1814 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1815 MI.eraseFromParent();
1816
1817 return true;
1818 }
1819 case Intrinsic::aarch64_neon_uaddlv:
1820 case Intrinsic::aarch64_neon_saddlv: {
1821 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1822 ? AArch64::G_UADDLV
1823 : AArch64::G_SADDLV;
1824 Register DstReg = MI.getOperand(0).getReg();
1825 Register SrcReg = MI.getOperand(2).getReg();
1826 LLT DstTy = MRI.getType(DstReg);
1827
1828 LLT MidTy, ExtTy;
1829 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1830 MidTy = LLT::fixed_vector(4, 32);
1831 ExtTy = LLT::scalar(32);
1832 } else {
1833 MidTy = LLT::fixed_vector(2, 64);
1834 ExtTy = LLT::scalar(64);
1835 }
1836
1837 Register MidReg =
1838 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1839 Register ZeroReg =
1840 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1841 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1842 {MidReg, ZeroReg})
1843 .getReg(0);
1844
1845 if (DstTy.getScalarSizeInBits() < 32)
1846 MIB.buildTrunc(DstReg, ExtReg);
1847 else
1848 MIB.buildCopy(DstReg, ExtReg);
1849
1850 MI.eraseFromParent();
1851
1852 return true;
1853 }
1854 case Intrinsic::aarch64_neon_smax:
1855 return LowerBinOp(TargetOpcode::G_SMAX);
1856 case Intrinsic::aarch64_neon_smin:
1857 return LowerBinOp(TargetOpcode::G_SMIN);
1858 case Intrinsic::aarch64_neon_umax:
1859 return LowerBinOp(TargetOpcode::G_UMAX);
1860 case Intrinsic::aarch64_neon_umin:
1861 return LowerBinOp(TargetOpcode::G_UMIN);
1862 case Intrinsic::aarch64_neon_fmax:
1863 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1864 case Intrinsic::aarch64_neon_fmin:
1865 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1866 case Intrinsic::aarch64_neon_fmaxnm:
1867 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1868 case Intrinsic::aarch64_neon_fminnm:
1869 return LowerBinOp(TargetOpcode::G_FMINNUM);
1870 case Intrinsic::aarch64_neon_pmull:
1871 case Intrinsic::aarch64_neon_pmull64:
1872 return LowerBinOp(AArch64::G_PMULL);
1873 case Intrinsic::aarch64_neon_smull:
1874 return LowerBinOp(AArch64::G_SMULL);
1875 case Intrinsic::aarch64_neon_umull:
1876 return LowerBinOp(AArch64::G_UMULL);
1877 case Intrinsic::aarch64_neon_sabd:
1878 return LowerBinOp(TargetOpcode::G_ABDS);
1879 case Intrinsic::aarch64_neon_uabd:
1880 return LowerBinOp(TargetOpcode::G_ABDU);
1881 case Intrinsic::aarch64_neon_uhadd:
1882 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1883 case Intrinsic::aarch64_neon_urhadd:
1884 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1885 case Intrinsic::aarch64_neon_shadd:
1886 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1887 case Intrinsic::aarch64_neon_srhadd:
1888 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1889 case Intrinsic::aarch64_neon_sqshrn: {
1890 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1891 return true;
1892 // Create right shift instruction. Store the output register in Shr.
1893 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1894 {MRI.getType(MI.getOperand(2).getReg())},
1895 {MI.getOperand(2), MI.getOperand(3).getImm()});
1896 // Build the narrow intrinsic, taking in Shr.
1897 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1898 MI.eraseFromParent();
1899 return true;
1900 }
1901 case Intrinsic::aarch64_neon_sqshrun: {
1902 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1903 return true;
1904 // Create right shift instruction. Store the output register in Shr.
1905 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1906 {MRI.getType(MI.getOperand(2).getReg())},
1907 {MI.getOperand(2), MI.getOperand(3).getImm()});
1908 // Build the narrow intrinsic, taking in Shr.
1909 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1910 MI.eraseFromParent();
1911 return true;
1912 }
1913 case Intrinsic::aarch64_neon_sqrshrn: {
1914 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1915 return true;
1916 // Create right shift instruction. Store the output register in Shr.
1917 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1918 {MRI.getType(MI.getOperand(2).getReg())},
1919 {MI.getOperand(2), MI.getOperand(3).getImm()});
1920 // Build the narrow intrinsic, taking in Shr.
1921 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1922 MI.eraseFromParent();
1923 return true;
1924 }
1925 case Intrinsic::aarch64_neon_sqrshrun: {
1926 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1927 return true;
1928 // Create right shift instruction. Store the output register in Shr.
1929 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1930 {MRI.getType(MI.getOperand(2).getReg())},
1931 {MI.getOperand(2), MI.getOperand(3).getImm()});
1932 // Build the narrow intrinsic, taking in Shr.
1933 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1934 MI.eraseFromParent();
1935 return true;
1936 }
1937 case Intrinsic::aarch64_neon_uqrshrn: {
1938 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1939 return true;
1940 // Create right shift instruction. Store the output register in Shr.
1941 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
1942 {MRI.getType(MI.getOperand(2).getReg())},
1943 {MI.getOperand(2), MI.getOperand(3).getImm()});
1944 // Build the narrow intrinsic, taking in Shr.
1945 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1946 MI.eraseFromParent();
1947 return true;
1948 }
1949 case Intrinsic::aarch64_neon_uqshrn: {
1950 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1951 return true;
1952 // Create right shift instruction. Store the output register in Shr.
1953 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
1954 {MRI.getType(MI.getOperand(2).getReg())},
1955 {MI.getOperand(2), MI.getOperand(3).getImm()});
1956 // Build the narrow intrinsic, taking in Shr.
1957 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1958 MI.eraseFromParent();
1959 return true;
1960 }
1961 case Intrinsic::aarch64_neon_sqshlu: {
1962 // Check if last operand is constant vector dup
1963 auto ShiftAmount = isConstantOrConstantSplatVector(
1964 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
1965 if (ShiftAmount) {
1966 // If so, create a new intrinsic with the correct shift amount
1967 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
1968 {MI.getOperand(2)})
1969 .addImm(ShiftAmount->getSExtValue());
1970 MI.eraseFromParent();
1971 return true;
1972 }
1973 return false;
1974 }
1975 case Intrinsic::aarch64_neon_vsli: {
1976 MIB.buildInstr(
1977 AArch64::G_SLI, {MI.getOperand(0)},
1978 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
1979 MI.eraseFromParent();
1980 break;
1981 }
1982 case Intrinsic::aarch64_neon_vsri: {
1983 MIB.buildInstr(
1984 AArch64::G_SRI, {MI.getOperand(0)},
1985 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
1986 MI.eraseFromParent();
1987 break;
1988 }
1989 case Intrinsic::aarch64_neon_abs: {
1990 // Lower the intrinsic to G_ABS.
1991 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1992 MI.eraseFromParent();
1993 return true;
1994 }
1995 case Intrinsic::aarch64_neon_sqadd: {
1996 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1997 return LowerBinOp(TargetOpcode::G_SADDSAT);
1998 break;
1999 }
2000 case Intrinsic::aarch64_neon_sqsub: {
2001 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2002 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2003 break;
2004 }
2005 case Intrinsic::aarch64_neon_uqadd: {
2006 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2007 return LowerBinOp(TargetOpcode::G_UADDSAT);
2008 break;
2009 }
2010 case Intrinsic::aarch64_neon_uqsub: {
2011 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2012 return LowerBinOp(TargetOpcode::G_USUBSAT);
2013 break;
2014 }
2015 case Intrinsic::aarch64_neon_udot:
2016 return LowerTriOp(AArch64::G_UDOT);
2017 case Intrinsic::aarch64_neon_sdot:
2018 return LowerTriOp(AArch64::G_SDOT);
2019 case Intrinsic::aarch64_neon_usdot:
2020 return LowerTriOp(AArch64::G_USDOT);
2021 case Intrinsic::aarch64_neon_sqxtn:
2022 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2023 case Intrinsic::aarch64_neon_sqxtun:
2024 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2025 case Intrinsic::aarch64_neon_uqxtn:
2026 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2027 case Intrinsic::aarch64_neon_fcvtzu:
2028 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2029 case Intrinsic::aarch64_neon_fcvtzs:
2030 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2031
2032 case Intrinsic::vector_reverse:
2033 // TODO: Add support for vector_reverse
2034 return false;
2035 }
2036
2037 return true;
2038}
2039
2040bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2042 GISelChangeObserver &Observer) const {
2043 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2044 MI.getOpcode() == TargetOpcode::G_LSHR ||
2045 MI.getOpcode() == TargetOpcode::G_SHL);
2046 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2047 // imported patterns can select it later. Either way, it will be legal.
2048 Register AmtReg = MI.getOperand(2).getReg();
2049 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2050 if (!VRegAndVal)
2051 return true;
2052 // Check the shift amount is in range for an immediate form.
2053 int64_t Amount = VRegAndVal->Value.getSExtValue();
2054 if (Amount > 31)
2055 return true; // This will have to remain a register variant.
2056 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
2057 Observer.changingInstr(MI);
2058 MI.getOperand(2).setReg(ExtCst.getReg(0));
2059 Observer.changedInstr(MI);
2060 return true;
2061}
2062
2064 MachineRegisterInfo &MRI) {
2065 Base = Root;
2066 Offset = 0;
2067
2068 Register NewBase;
2069 int64_t NewOffset;
2070 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2071 isShiftedInt<7, 3>(NewOffset)) {
2072 Base = NewBase;
2073 Offset = NewOffset;
2074 }
2075}
2076
2077// FIXME: This should be removed and replaced with the generic bitcast legalize
2078// action.
2079bool AArch64LegalizerInfo::legalizeLoadStore(
2081 GISelChangeObserver &Observer) const {
2082 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2083 MI.getOpcode() == TargetOpcode::G_LOAD);
2084 // Here we just try to handle vector loads/stores where our value type might
2085 // have pointer elements, which the SelectionDAG importer can't handle. To
2086 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2087 // the value to use s64 types.
2088
2089 // Custom legalization requires the instruction, if not deleted, must be fully
2090 // legalized. In order to allow further legalization of the inst, we create
2091 // a new instruction and erase the existing one.
2092
2093 Register ValReg = MI.getOperand(0).getReg();
2094 const LLT ValTy = MRI.getType(ValReg);
2095
2096 if (ValTy == LLT::scalar(128)) {
2097
2098 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2099 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2100 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2101 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2102 bool IsRcpC3 =
2103 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2104
2105 LLT s64 = LLT::scalar(64);
2106
2107 unsigned Opcode;
2108 if (IsRcpC3) {
2109 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2110 } else {
2111 // For LSE2, loads/stores should have been converted to monotonic and had
2112 // a fence inserted after them.
2113 assert(Ordering == AtomicOrdering::Monotonic ||
2114 Ordering == AtomicOrdering::Unordered);
2115 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2116
2117 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2118 }
2119
2120 MachineInstrBuilder NewI;
2121 if (IsLoad) {
2122 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2123 MIRBuilder.buildMergeLikeInstr(
2124 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2125 } else {
2126 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2127 NewI = MIRBuilder.buildInstr(
2128 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2129 }
2130
2131 if (IsRcpC3) {
2132 NewI.addUse(MI.getOperand(1).getReg());
2133 } else {
2134 Register Base;
2135 int Offset;
2136 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2137 NewI.addUse(Base);
2138 NewI.addImm(Offset / 8);
2139 }
2140
2141 NewI.cloneMemRefs(MI);
2142 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2143 *MRI.getTargetRegisterInfo(),
2144 *ST->getRegBankInfo());
2145 MI.eraseFromParent();
2146 return true;
2147 }
2148
2149 if (!ValTy.isPointerVector() ||
2150 ValTy.getElementType().getAddressSpace() != 0) {
2151 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2152 return false;
2153 }
2154
2155 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2156 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
2157 auto &MMO = **MI.memoperands_begin();
2158 MMO.setType(NewTy);
2159
2160 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2161 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2162 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2163 } else {
2164 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2165 MIRBuilder.buildBitcast(ValReg, NewLoad);
2166 }
2167 MI.eraseFromParent();
2168 return true;
2169}
2170
2171bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2173 MachineIRBuilder &MIRBuilder) const {
2174 MachineFunction &MF = MIRBuilder.getMF();
2175 Align Alignment(MI.getOperand(2).getImm());
2176 Register Dst = MI.getOperand(0).getReg();
2177 Register ListPtr = MI.getOperand(1).getReg();
2178
2179 LLT PtrTy = MRI.getType(ListPtr);
2180 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2181
2182 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2183 const Align PtrAlign = Align(PtrSize);
2184 auto List = MIRBuilder.buildLoad(
2185 PtrTy, ListPtr,
2186 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2187 PtrTy, PtrAlign));
2188
2189 MachineInstrBuilder DstPtr;
2190 if (Alignment > PtrAlign) {
2191 // Realign the list to the actual required alignment.
2192 auto AlignMinus1 =
2193 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2194 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2195 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2196 } else
2197 DstPtr = List;
2198
2199 LLT ValTy = MRI.getType(Dst);
2200 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2201 MIRBuilder.buildLoad(
2202 Dst, DstPtr,
2203 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2204 ValTy, std::max(Alignment, PtrAlign)));
2205
2206 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2207
2208 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2209
2210 MIRBuilder.buildStore(NewList, ListPtr,
2211 *MF.getMachineMemOperand(MachinePointerInfo(),
2213 PtrTy, PtrAlign));
2214
2215 MI.eraseFromParent();
2216 return true;
2217}
2218
2219bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2220 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2221 // Only legal if we can select immediate forms.
2222 // TODO: Lower this otherwise.
2223 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2224 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2225}
2226
2227bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2229 LegalizerHelper &Helper) const {
2230 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2231 // it can be more efficiently lowered to the following sequence that uses
2232 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2233 // registers are cheap.
2234 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2235 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2236 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2237 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2238 //
2239 // For 128 bit vector popcounts, we lower to the following sequence:
2240 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2241 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2242 // uaddlp.4s v0, v0 // v4s32, v2s64
2243 // uaddlp.2d v0, v0 // v2s64
2244 //
2245 // For 64 bit vector popcounts, we lower to the following sequence:
2246 // cnt.8b v0, v0 // v4s16, v2s32
2247 // uaddlp.4h v0, v0 // v4s16, v2s32
2248 // uaddlp.2s v0, v0 // v2s32
2249
2250 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2251 Register Dst = MI.getOperand(0).getReg();
2252 Register Val = MI.getOperand(1).getReg();
2253 LLT Ty = MRI.getType(Val);
2254 unsigned Size = Ty.getSizeInBits();
2255
2256 assert(Ty == MRI.getType(Dst) &&
2257 "Expected src and dst to have the same type!");
2258
2259 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2260 LLT s64 = LLT::scalar(64);
2261
2262 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2263 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2264 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2265 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2266
2267 MIRBuilder.buildZExt(Dst, Add);
2268 MI.eraseFromParent();
2269 return true;
2270 }
2271
2272 if (!ST->hasNEON() ||
2273 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2274 // Use generic lowering when custom lowering is not possible.
2275 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2276 Helper.lowerBitCount(MI) ==
2278 }
2279
2280 // Pre-conditioning: widen Val up to the nearest vector type.
2281 // s32,s64,v4s16,v2s32 -> v8i8
2282 // v8s16,v4s32,v2s64 -> v16i8
2283 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2284 if (Ty.isScalar()) {
2285 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2286 if (Size == 32) {
2287 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2288 }
2289 }
2290 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2291
2292 // Count bits in each byte-sized lane.
2293 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2294
2295 // Sum across lanes.
2296
2297 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2298 Ty.getScalarSizeInBits() != 16) {
2299 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2300 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2301 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2302 MachineInstrBuilder Sum;
2303
2304 if (Ty == LLT::fixed_vector(2, 64)) {
2305 auto UDOT =
2306 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2307 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2308 } else if (Ty == LLT::fixed_vector(4, 32)) {
2309 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2310 } else if (Ty == LLT::fixed_vector(2, 32)) {
2311 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2312 } else {
2313 llvm_unreachable("unexpected vector shape");
2314 }
2315
2316 Sum->getOperand(0).setReg(Dst);
2317 MI.eraseFromParent();
2318 return true;
2319 }
2320
2321 Register HSum = CTPOP.getReg(0);
2322 unsigned Opc;
2323 SmallVector<LLT> HAddTys;
2324 if (Ty.isScalar()) {
2325 Opc = Intrinsic::aarch64_neon_uaddlv;
2326 HAddTys.push_back(LLT::scalar(32));
2327 } else if (Ty == LLT::fixed_vector(8, 16)) {
2328 Opc = Intrinsic::aarch64_neon_uaddlp;
2329 HAddTys.push_back(LLT::fixed_vector(8, 16));
2330 } else if (Ty == LLT::fixed_vector(4, 32)) {
2331 Opc = Intrinsic::aarch64_neon_uaddlp;
2332 HAddTys.push_back(LLT::fixed_vector(8, 16));
2333 HAddTys.push_back(LLT::fixed_vector(4, 32));
2334 } else if (Ty == LLT::fixed_vector(2, 64)) {
2335 Opc = Intrinsic::aarch64_neon_uaddlp;
2336 HAddTys.push_back(LLT::fixed_vector(8, 16));
2337 HAddTys.push_back(LLT::fixed_vector(4, 32));
2338 HAddTys.push_back(LLT::fixed_vector(2, 64));
2339 } else if (Ty == LLT::fixed_vector(4, 16)) {
2340 Opc = Intrinsic::aarch64_neon_uaddlp;
2341 HAddTys.push_back(LLT::fixed_vector(4, 16));
2342 } else if (Ty == LLT::fixed_vector(2, 32)) {
2343 Opc = Intrinsic::aarch64_neon_uaddlp;
2344 HAddTys.push_back(LLT::fixed_vector(4, 16));
2345 HAddTys.push_back(LLT::fixed_vector(2, 32));
2346 } else
2347 llvm_unreachable("unexpected vector shape");
2349 for (LLT HTy : HAddTys) {
2350 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2351 HSum = UADD.getReg(0);
2352 }
2353
2354 // Post-conditioning.
2355 if (Ty.isScalar() && (Size == 64 || Size == 128))
2356 MIRBuilder.buildZExt(Dst, UADD);
2357 else
2358 UADD->getOperand(0).setReg(Dst);
2359 MI.eraseFromParent();
2360 return true;
2361}
2362
2363bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2364 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2365 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2366 LLT s64 = LLT::scalar(64);
2367 auto Addr = MI.getOperand(1).getReg();
2368 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2369 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2370 auto DstLo = MRI.createGenericVirtualRegister(s64);
2371 auto DstHi = MRI.createGenericVirtualRegister(s64);
2372
2373 MachineInstrBuilder CAS;
2374 if (ST->hasLSE()) {
2375 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2376 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2377 // the rest of the MIR so we must reassemble the extracted registers into a
2378 // 128-bit known-regclass one with code like this:
2379 //
2380 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2381 // %out = CASP %in1, ...
2382 // %OldLo = G_EXTRACT %out, 0
2383 // %OldHi = G_EXTRACT %out, 64
2384 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2385 unsigned Opcode;
2386 switch (Ordering) {
2388 Opcode = AArch64::CASPAX;
2389 break;
2391 Opcode = AArch64::CASPLX;
2392 break;
2395 Opcode = AArch64::CASPALX;
2396 break;
2397 default:
2398 Opcode = AArch64::CASPX;
2399 break;
2400 }
2401
2402 LLT s128 = LLT::scalar(128);
2403 auto CASDst = MRI.createGenericVirtualRegister(s128);
2404 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2405 auto CASNew = MRI.createGenericVirtualRegister(s128);
2406 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2407 .addUse(DesiredI->getOperand(0).getReg())
2408 .addImm(AArch64::sube64)
2409 .addUse(DesiredI->getOperand(1).getReg())
2410 .addImm(AArch64::subo64);
2411 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2412 .addUse(NewI->getOperand(0).getReg())
2413 .addImm(AArch64::sube64)
2414 .addUse(NewI->getOperand(1).getReg())
2415 .addImm(AArch64::subo64);
2416
2417 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2418
2419 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2420 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2421 } else {
2422 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2423 // can take arbitrary registers so it just has the normal GPR64 operands the
2424 // rest of AArch64 is expecting.
2425 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2426 unsigned Opcode;
2427 switch (Ordering) {
2429 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2430 break;
2432 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2433 break;
2436 Opcode = AArch64::CMP_SWAP_128;
2437 break;
2438 default:
2439 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2440 break;
2441 }
2442
2443 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2444 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2445 {Addr, DesiredI->getOperand(0),
2446 DesiredI->getOperand(1), NewI->getOperand(0),
2447 NewI->getOperand(1)});
2448 }
2449
2450 CAS.cloneMemRefs(MI);
2451 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2452 *MRI.getTargetRegisterInfo(),
2453 *ST->getRegBankInfo());
2454
2455 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2456 MI.eraseFromParent();
2457 return true;
2458}
2459
2460bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2461 LegalizerHelper &Helper) const {
2462 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2463 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2464 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2465 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2466 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2467 MI.eraseFromParent();
2468 return true;
2469}
2470
2471bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2472 LegalizerHelper &Helper) const {
2473 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2474
2475 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2476 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2477 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2478 // the instruction).
2479 auto &Value = MI.getOperand(1);
2480 Register ExtValueReg =
2481 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2482 Value.setReg(ExtValueReg);
2483 return true;
2484 }
2485
2486 return false;
2487}
2488
2489bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2490 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2491 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2492 auto VRegAndVal =
2494 if (VRegAndVal)
2495 return true;
2496 LLT VecTy = MRI.getType(Element->getVectorReg());
2497 if (VecTy.isScalableVector())
2498 return true;
2499 return Helper.lowerExtractInsertVectorElt(MI) !=
2501}
2502
2503bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2504 MachineInstr &MI, LegalizerHelper &Helper) const {
2505 MachineFunction &MF = *MI.getParent()->getParent();
2506 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2507 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2508
2509 // If stack probing is not enabled for this function, use the default
2510 // lowering.
2511 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2512 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2513 "inline-asm") {
2514 Helper.lowerDynStackAlloc(MI);
2515 return true;
2516 }
2517
2518 Register Dst = MI.getOperand(0).getReg();
2519 Register AllocSize = MI.getOperand(1).getReg();
2520 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2521
2522 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2523 "Unexpected type for dynamic alloca");
2524 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2525 "Unexpected type for dynamic alloca");
2526
2527 LLT PtrTy = MRI.getType(Dst);
2528 Register SPReg =
2530 Register SPTmp =
2531 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2532 auto NewMI =
2533 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2534 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2535 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2536 MIRBuilder.buildCopy(Dst, SPTmp);
2537
2538 MI.eraseFromParent();
2539 return true;
2540}
2541
2542bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2543 LegalizerHelper &Helper) const {
2544 MachineIRBuilder &MIB = Helper.MIRBuilder;
2545 auto &AddrVal = MI.getOperand(0);
2546
2547 int64_t IsWrite = MI.getOperand(1).getImm();
2548 int64_t Locality = MI.getOperand(2).getImm();
2549 int64_t IsData = MI.getOperand(3).getImm();
2550
2551 bool IsStream = Locality == 0;
2552 if (Locality != 0) {
2553 assert(Locality <= 3 && "Prefetch locality out-of-range");
2554 // The locality degree is the opposite of the cache speed.
2555 // Put the number the other way around.
2556 // The encoding starts at 0 for level 1
2557 Locality = 3 - Locality;
2558 }
2559
2560 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2561
2562 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2563 MI.eraseFromParent();
2564 return true;
2565}
2566
2567bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2568 MachineIRBuilder &MIRBuilder,
2569 MachineRegisterInfo &MRI) const {
2570 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2571 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2572 "Expected a power of 2 elements");
2573
2574 LLT s16 = LLT::scalar(16);
2575 LLT s32 = LLT::scalar(32);
2576 LLT s64 = LLT::scalar(64);
2577 LLT v2s16 = LLT::fixed_vector(2, s16);
2578 LLT v4s16 = LLT::fixed_vector(4, s16);
2579 LLT v2s32 = LLT::fixed_vector(2, s32);
2580 LLT v4s32 = LLT::fixed_vector(4, s32);
2581 LLT v2s64 = LLT::fixed_vector(2, s64);
2582
2583 SmallVector<Register> RegsToUnmergeTo;
2584 SmallVector<Register> TruncOddDstRegs;
2585 SmallVector<Register> RegsToMerge;
2586
2587 unsigned ElemCount = SrcTy.getNumElements();
2588
2589 // Find the biggest size chunks we can work with
2590 int StepSize = ElemCount % 4 ? 2 : 4;
2591
2592 // If we have a power of 2 greater than 2, we need to first unmerge into
2593 // enough pieces
2594 if (ElemCount <= 2)
2595 RegsToUnmergeTo.push_back(Src);
2596 else {
2597 for (unsigned i = 0; i < ElemCount / 2; ++i)
2598 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2599
2600 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2601 }
2602
2603 // Create all of the round-to-odd instructions and store them
2604 for (auto SrcReg : RegsToUnmergeTo) {
2605 Register Mid =
2606 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2607 .getReg(0);
2608 TruncOddDstRegs.push_back(Mid);
2609 }
2610
2611 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2612 // truncate 2s32 to 2s16.
2613 unsigned Index = 0;
2614 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2615 if (StepSize == 4) {
2616 Register ConcatDst =
2617 MIRBuilder
2619 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2620 .getReg(0);
2621
2622 RegsToMerge.push_back(
2623 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2624 } else {
2625 RegsToMerge.push_back(
2626 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2627 }
2628 }
2629
2630 // If there is only one register, replace the destination
2631 if (RegsToMerge.size() == 1) {
2632 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2633 MI.eraseFromParent();
2634 return true;
2635 }
2636
2637 // Merge the rest of the instructions & replace the register
2638 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2639 MRI.replaceRegWith(Dst, Fin);
2640 MI.eraseFromParent();
2641 return true;
2642}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1697
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1565
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...