LLVM 22.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
69 v16s8, v8s16, v4s32,
70 v2s64, v2p0,
71 /* End 128bit types */
72 /* Begin 64bit types */
73 v8s8, v4s16, v2s32};
74 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
75 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
76 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
77
78 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
79
80 // FIXME: support subtargets which have neon/fp-armv8 disabled.
81 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
83 return;
84 }
85
86 // Some instructions only support s16 if the subtarget has full 16-bit FP
87 // support.
88 const bool HasFP16 = ST.hasFullFP16();
89 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
90
91 const bool HasCSSC = ST.hasCSSC();
92 const bool HasRCPC3 = ST.hasRCPC3();
93 const bool HasSVE = ST.hasSVE();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
99 v2s64, v2p0})
100 .widenScalarToNextPow2(0)
101 .clampScalar(0, s8, s64)
104 .clampNumElements(0, v8s8, v16s8)
105 .clampNumElements(0, v4s16, v8s16)
106 .clampNumElements(0, v2s32, v4s32)
107 .clampMaxNumElements(0, s64, 2)
108 .clampMaxNumElements(0, p0, 2)
110
112 .legalFor({p0, s16, s32, s64})
113 .legalFor(PackedVectorAllTypeList)
117 .clampScalar(0, s16, s64)
118 .clampNumElements(0, v8s8, v16s8)
119 .clampNumElements(0, v4s16, v8s16)
120 .clampNumElements(0, v2s32, v4s32)
121 .clampMaxNumElements(0, s64, 2)
122 .clampMaxNumElements(0, p0, 2);
123
125 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
126 smallerThan(1, 0)))
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
130 .minScalar(1, s8)
131 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
132 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
133
135 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
136 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
137 .widenScalarToNextPow2(1)
138 .clampScalar(1, s32, s128)
140 .minScalar(0, s16)
141 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
142 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
143 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
144
145 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
146 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
147 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
148 .widenScalarToNextPow2(0)
149 .clampScalar(0, s32, s64)
150 .clampMaxNumElements(0, s8, 16)
151 .clampMaxNumElements(0, s16, 8)
152 .clampNumElements(0, v2s32, v4s32)
153 .clampNumElements(0, v2s64, v2s64)
155 [=](const LegalityQuery &Query) {
156 return Query.Types[0].getNumElements() <= 2;
157 },
158 0, s32)
159 .minScalarOrEltIf(
160 [=](const LegalityQuery &Query) {
161 return Query.Types[0].getNumElements() <= 4;
162 },
163 0, s16)
164 .minScalarOrEltIf(
165 [=](const LegalityQuery &Query) {
166 return Query.Types[0].getNumElements() <= 16;
167 },
168 0, s8)
169 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
171
173 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
174 .widenScalarToNextPow2(0)
175 .clampScalar(0, s32, s64)
176 .clampMaxNumElements(0, s8, 16)
177 .clampMaxNumElements(0, s16, 8)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 [=](const LegalityQuery &Query) {
182 return Query.Types[0].getNumElements() <= 2;
183 },
184 0, s32)
185 .minScalarOrEltIf(
186 [=](const LegalityQuery &Query) {
187 return Query.Types[0].getNumElements() <= 4;
188 },
189 0, s16)
190 .minScalarOrEltIf(
191 [=](const LegalityQuery &Query) {
192 return Query.Types[0].getNumElements() <= 16;
193 },
194 0, s8)
195 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
197
198 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
199 .customIf([=](const LegalityQuery &Query) {
200 const auto &SrcTy = Query.Types[0];
201 const auto &AmtTy = Query.Types[1];
202 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
203 AmtTy.getSizeInBits() == 32;
204 })
205 .legalFor({
206 {s32, s32},
207 {s32, s64},
208 {s64, s64},
209 {v8s8, v8s8},
210 {v16s8, v16s8},
211 {v4s16, v4s16},
212 {v8s16, v8s16},
213 {v2s32, v2s32},
214 {v4s32, v4s32},
215 {v2s64, v2s64},
216 })
217 .widenScalarToNextPow2(0)
218 .clampScalar(1, s32, s64)
219 .clampScalar(0, s32, s64)
220 .clampNumElements(0, v8s8, v16s8)
221 .clampNumElements(0, v4s16, v8s16)
222 .clampNumElements(0, v2s32, v4s32)
223 .clampNumElements(0, v2s64, v2s64)
225 .minScalarSameAs(1, 0)
229
231 .legalFor({{p0, s64}, {v2p0, v2s64}})
232 .clampScalarOrElt(1, s64, s64)
233 .clampNumElements(0, v2p0, v2p0);
234
235 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
236
237 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
238 .legalFor({s32, s64})
239 .libcallFor({s128})
240 .clampScalar(0, s32, s64)
242 .scalarize(0);
243
244 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
245 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
246 .libcallFor({s128})
248 .minScalarOrElt(0, s32)
249 .clampNumElements(0, v2s32, v4s32)
250 .clampNumElements(0, v2s64, v2s64)
251 .scalarize(0);
252
253 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
254 .widenScalarToNextPow2(0, /*Min = */ 32)
255 .clampScalar(0, s32, s64)
256 .lower();
257
258 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
259 .legalFor({s64, v16s8, v8s16, v4s32})
260 .lower();
261
262 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
263 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
264 .legalFor(HasCSSC, {s32, s64})
265 .minScalar(HasCSSC, 0, s32)
266 .clampNumElements(0, v8s8, v16s8)
267 .clampNumElements(0, v4s16, v8s16)
268 .clampNumElements(0, v2s32, v4s32)
269 .lower();
270
271 // FIXME: Legal vector types are only legal with NEON.
273 .legalFor(HasCSSC, {s32, s64})
274 .legalFor(PackedVectorAllTypeList)
275 .customIf([=](const LegalityQuery &Q) {
276 // TODO: Fix suboptimal codegen for 128+ bit types.
277 LLT SrcTy = Q.Types[0];
278 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
279 })
280 .widenScalarIf(
281 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
282 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
283 .widenScalarIf(
284 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
285 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
286 .clampNumElements(0, v8s8, v16s8)
287 .clampNumElements(0, v4s16, v8s16)
288 .clampNumElements(0, v2s32, v4s32)
289 .clampNumElements(0, v2s64, v2s64)
291 .lower();
292
294 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
295 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
296 .lower();
297
299 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
300 .legalFor({{s32, s32}, {s64, s32}})
301 .clampScalar(0, s32, s64)
302 .clampScalar(1, s32, s64)
304
305 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
306 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
307 .lower();
308
310 .legalFor({{s32, s64}, {s64, s64}})
311 .customIf([=](const LegalityQuery &Q) {
312 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
313 })
314 .lower();
316
317 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
318 .customFor({{s32, s32}, {s64, s64}});
319
320 auto always = [=](const LegalityQuery &Q) { return true; };
322 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
323 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
324 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
325 .customFor({{s128, s128},
326 {v4s16, v4s16},
327 {v8s16, v8s16},
328 {v2s32, v2s32},
329 {v4s32, v4s32},
330 {v2s64, v2s64}})
331 .clampScalar(0, s32, s128)
333 .minScalarEltSameAsIf(always, 1, 0)
334 .maxScalarEltSameAsIf(always, 1, 0)
335 .clampNumElements(0, v8s8, v16s8)
336 .clampNumElements(0, v4s16, v8s16)
337 .clampNumElements(0, v2s32, v4s32)
338 .clampNumElements(0, v2s64, v2s64)
341
343 .legalFor({{s32, s32},
344 {s64, s64},
345 {v8s8, v8s8},
346 {v16s8, v16s8},
347 {v4s16, v4s16},
348 {v8s16, v8s16},
349 {v2s32, v2s32},
350 {v4s32, v4s32}})
351 .widenScalarToNextPow2(1, /*Min=*/32)
352 .clampScalar(1, s32, s64)
353 .clampNumElements(0, v8s8, v16s8)
354 .clampNumElements(0, v4s16, v8s16)
355 .clampNumElements(0, v2s32, v4s32)
358 .scalarSameSizeAs(0, 1);
359
360 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
361
363 .lowerIf(isVector(0))
364 .widenScalarToNextPow2(1, /*Min=*/32)
365 .clampScalar(1, s32, s64)
366 .scalarSameSizeAs(0, 1)
367 .legalFor(HasCSSC, {s32, s64})
368 .customFor(!HasCSSC, {s32, s64});
369
370 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
371
372 getActionDefinitionsBuilder(G_BITREVERSE)
373 .legalFor({s32, s64, v8s8, v16s8})
374 .widenScalarToNextPow2(0, /*Min = */ 32)
376 .clampScalar(0, s32, s64)
377 .clampNumElements(0, v8s8, v16s8)
378 .clampNumElements(0, v4s16, v8s16)
379 .clampNumElements(0, v2s32, v4s32)
380 .clampNumElements(0, v2s64, v2s64)
383 .lower();
384
386 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
388 .clampScalar(0, s32, s64)
389 .clampNumElements(0, v4s16, v8s16)
390 .clampNumElements(0, v2s32, v4s32)
391 .clampNumElements(0, v2s64, v2s64)
393
394 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
395 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
396 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
397 .clampNumElements(0, v8s8, v16s8)
398 .clampNumElements(0, v4s16, v8s16)
399 .clampNumElements(0, v2s32, v4s32)
400 .clampMaxNumElements(0, s64, 2)
403 .lower();
404
406 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
407 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
408 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
409 .legalFor({s32, s64, v2s32, v4s32, v2s64})
410 .legalFor(HasFP16, {s16, v4s16, v8s16})
411 .libcallFor({s128})
412 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
413 .minScalarOrElt(0, MinFPScalar)
414 .clampNumElements(0, v4s16, v8s16)
415 .clampNumElements(0, v2s32, v4s32)
416 .clampNumElements(0, v2s64, v2s64)
418
419 getActionDefinitionsBuilder({G_FABS, G_FNEG})
420 .legalFor({s32, s64, v2s32, v4s32, v2s64})
421 .legalFor(HasFP16, {s16, v4s16, v8s16})
422 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
424 .clampNumElements(0, v4s16, v8s16)
425 .clampNumElements(0, v2s32, v4s32)
426 .clampNumElements(0, v2s64, v2s64)
428 .lowerFor({s16, v4s16, v8s16});
429
431 .libcallFor({s32, s64, s128})
432 .minScalar(0, s32)
433 .scalarize(0);
434
435 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
436 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
437 .libcallFor({{s64, s128}})
438 .minScalarOrElt(1, MinFPScalar);
439
440 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
441 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
442 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
443 G_FSINH, G_FTANH, G_FMODF})
444 // We need a call for these, so we always need to scalarize.
445 .scalarize(0)
446 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
447 .minScalar(0, s32)
448 .libcallFor({s32, s64, s128});
449 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
450 .scalarize(0)
451 .minScalar(0, s32)
452 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
453
454 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
455 .legalFor({{s64, s32}, {s64, s64}})
456 .legalFor(HasFP16, {{s64, s16}})
457 .minScalar(0, s64)
458 .minScalar(1, s32)
459 .libcallFor({{s64, s128}});
460
461 // TODO: Custom legalization for mismatched types.
462 getActionDefinitionsBuilder(G_FCOPYSIGN)
464 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
465 [=](const LegalityQuery &Query) {
466 const LLT Ty = Query.Types[0];
467 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
468 })
469 .lower();
470
472
473 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
474 auto &Actions = getActionDefinitionsBuilder(Op);
475
476 if (Op == G_SEXTLOAD)
478
479 // Atomics have zero extending behavior.
480 Actions
481 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
482 {s32, p0, s16, 8},
483 {s32, p0, s32, 8},
484 {s64, p0, s8, 2},
485 {s64, p0, s16, 2},
486 {s64, p0, s32, 4},
487 {s64, p0, s64, 8},
488 {p0, p0, s64, 8},
489 {v2s32, p0, s64, 8}})
490 .widenScalarToNextPow2(0)
491 .clampScalar(0, s32, s64)
492 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
493 // how to do that yet.
494 .unsupportedIfMemSizeNotPow2()
495 // Lower anything left over into G_*EXT and G_LOAD
496 .lower();
497 }
498
499 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
500 const LLT &ValTy = Query.Types[0];
501 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
502 };
503
505 .customIf([=](const LegalityQuery &Query) {
506 return HasRCPC3 && Query.Types[0] == s128 &&
507 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
508 })
509 .customIf([=](const LegalityQuery &Query) {
510 return Query.Types[0] == s128 &&
511 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
512 })
513 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
514 {s16, p0, s16, 8},
515 {s32, p0, s32, 8},
516 {s64, p0, s64, 8},
517 {p0, p0, s64, 8},
518 {s128, p0, s128, 8},
519 {v8s8, p0, s64, 8},
520 {v16s8, p0, s128, 8},
521 {v4s16, p0, s64, 8},
522 {v8s16, p0, s128, 8},
523 {v2s32, p0, s64, 8},
524 {v4s32, p0, s128, 8},
525 {v2s64, p0, s128, 8}})
526 // These extends are also legal
527 .legalForTypesWithMemDesc(
528 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
529 .legalForTypesWithMemDesc({
530 // SVE vscale x 128 bit base sizes
531 {nxv16s8, p0, nxv16s8, 8},
532 {nxv8s16, p0, nxv8s16, 8},
533 {nxv4s32, p0, nxv4s32, 8},
534 {nxv2s64, p0, nxv2s64, 8},
535 })
536 .widenScalarToNextPow2(0, /* MinSize = */ 8)
537 .clampMaxNumElements(0, s8, 16)
538 .clampMaxNumElements(0, s16, 8)
539 .clampMaxNumElements(0, s32, 4)
540 .clampMaxNumElements(0, s64, 2)
541 .clampMaxNumElements(0, p0, 2)
543 .clampScalar(0, s8, s64)
545 [=](const LegalityQuery &Query) {
546 // Clamp extending load results to 32-bits.
547 return Query.Types[0].isScalar() &&
548 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
549 Query.Types[0].getSizeInBits() > 32;
550 },
551 changeTo(0, s32))
552 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
553 .bitcastIf(typeInSet(0, {v4s8}),
554 [=](const LegalityQuery &Query) {
555 const LLT VecTy = Query.Types[0];
556 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
557 })
558 .customIf(IsPtrVecPred)
559 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
560 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
561
563 .customIf([=](const LegalityQuery &Query) {
564 return HasRCPC3 && Query.Types[0] == s128 &&
565 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
566 })
567 .customIf([=](const LegalityQuery &Query) {
568 return Query.Types[0] == s128 &&
569 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
570 })
571 .legalForTypesWithMemDesc(
572 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
573 {s32, p0, s8, 8}, // truncstorei8 from s32
574 {s64, p0, s8, 8}, // truncstorei8 from s64
575 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
576 {s64, p0, s16, 8}, // truncstorei16 from s64
577 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
578 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
579 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
580 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
581 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
582 .legalForTypesWithMemDesc({
583 // SVE vscale x 128 bit base sizes
584 // TODO: Add nxv2p0. Consider bitcastIf.
585 // See #92130
586 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
587 {nxv16s8, p0, nxv16s8, 8},
588 {nxv8s16, p0, nxv8s16, 8},
589 {nxv4s32, p0, nxv4s32, 8},
590 {nxv2s64, p0, nxv2s64, 8},
591 })
592 .clampScalar(0, s8, s64)
593 .minScalarOrElt(0, s8)
594 .lowerIf([=](const LegalityQuery &Query) {
595 return Query.Types[0].isScalar() &&
596 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
597 })
598 // Maximum: sN * k = 128
599 .clampMaxNumElements(0, s8, 16)
600 .clampMaxNumElements(0, s16, 8)
601 .clampMaxNumElements(0, s32, 4)
602 .clampMaxNumElements(0, s64, 2)
603 .clampMaxNumElements(0, p0, 2)
605 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
606 .bitcastIf(all(typeInSet(0, {v4s8}),
607 LegalityPredicate([=](const LegalityQuery &Query) {
608 return Query.Types[0].getSizeInBits() ==
609 Query.MMODescrs[0].MemoryTy.getSizeInBits();
610 })),
611 [=](const LegalityQuery &Query) {
612 const LLT VecTy = Query.Types[0];
613 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
614 })
615 .customIf(IsPtrVecPred)
616 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
617 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
618 .lower();
619
620 getActionDefinitionsBuilder(G_INDEXED_STORE)
621 // Idx 0 == Ptr, Idx 1 == Val
622 // TODO: we can implement legalizations but as of now these are
623 // generated in a very specific way.
625 {p0, s8, s8, 8},
626 {p0, s16, s16, 8},
627 {p0, s32, s8, 8},
628 {p0, s32, s16, 8},
629 {p0, s32, s32, 8},
630 {p0, s64, s64, 8},
631 {p0, p0, p0, 8},
632 {p0, v8s8, v8s8, 8},
633 {p0, v16s8, v16s8, 8},
634 {p0, v4s16, v4s16, 8},
635 {p0, v8s16, v8s16, 8},
636 {p0, v2s32, v2s32, 8},
637 {p0, v4s32, v4s32, 8},
638 {p0, v2s64, v2s64, 8},
639 {p0, v2p0, v2p0, 8},
640 {p0, s128, s128, 8},
641 })
642 .unsupported();
643
644 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
645 LLT LdTy = Query.Types[0];
646 LLT PtrTy = Query.Types[1];
647 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
648 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
649 return false;
650 if (PtrTy != p0)
651 return false;
652 return true;
653 };
654 getActionDefinitionsBuilder(G_INDEXED_LOAD)
657 .legalIf(IndexedLoadBasicPred)
658 .unsupported();
659 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
660 .unsupportedIf(
662 .legalIf(all(typeInSet(0, {s16, s32, s64}),
663 LegalityPredicate([=](const LegalityQuery &Q) {
664 LLT LdTy = Q.Types[0];
665 LLT PtrTy = Q.Types[1];
666 LLT MemTy = Q.MMODescrs[0].MemoryTy;
667 if (PtrTy != p0)
668 return false;
669 if (LdTy == s16)
670 return MemTy == s8;
671 if (LdTy == s32)
672 return MemTy == s8 || MemTy == s16;
673 if (LdTy == s64)
674 return MemTy == s8 || MemTy == s16 || MemTy == s32;
675 return false;
676 })))
677 .unsupported();
678
679 // Constants
681 .legalFor({p0, s8, s16, s32, s64})
682 .widenScalarToNextPow2(0)
683 .clampScalar(0, s8, s64);
684 getActionDefinitionsBuilder(G_FCONSTANT)
685 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
686 .legalFor({s16, s32, s64, s128})
687 .clampScalar(0, MinFPScalar, s128);
688
689 // FIXME: fix moreElementsToNextPow2
691 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
693 .clampScalar(1, s32, s64)
694 .clampScalar(0, s32, s32)
697 [=](const LegalityQuery &Query) {
698 const LLT &Ty = Query.Types[0];
699 const LLT &SrcTy = Query.Types[1];
700 return Ty.isVector() && !SrcTy.isPointerVector() &&
701 Ty.getElementType() != SrcTy.getElementType();
702 },
703 0, 1)
704 .minScalarOrEltIf(
705 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
706 1, s32)
707 .minScalarOrEltIf(
708 [=](const LegalityQuery &Query) {
709 return Query.Types[1].isPointerVector();
710 },
711 0, s64)
713 .clampNumElements(1, v8s8, v16s8)
714 .clampNumElements(1, v4s16, v8s16)
715 .clampNumElements(1, v2s32, v4s32)
716 .clampNumElements(1, v2s64, v2s64)
717 .clampNumElements(1, v2p0, v2p0)
718 .customIf(isVector(0));
719
721 .legalFor({{s32, s32},
722 {s32, s64},
723 {v4s32, v4s32},
724 {v2s32, v2s32},
725 {v2s64, v2s64}})
726 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
728 .clampScalar(0, s32, s32)
729 .minScalarOrElt(1, MinFPScalar)
732 [=](const LegalityQuery &Query) {
733 const LLT &Ty = Query.Types[0];
734 const LLT &SrcTy = Query.Types[1];
735 return Ty.isVector() && !SrcTy.isPointerVector() &&
736 Ty.getElementType() != SrcTy.getElementType();
737 },
738 0, 1)
739 .clampNumElements(1, v4s16, v8s16)
740 .clampNumElements(1, v2s32, v4s32)
741 .clampMaxNumElements(1, s64, 2)
743 .libcallFor({{s32, s128}});
744
745 // Extensions
746 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
747 unsigned DstSize = Query.Types[0].getSizeInBits();
748
749 // Handle legal vectors using legalFor
750 if (Query.Types[0].isVector())
751 return false;
752
753 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
754 return false; // Extending to a scalar s128 needs narrowing.
755
756 const LLT &SrcTy = Query.Types[1];
757
758 // Make sure we fit in a register otherwise. Don't bother checking that
759 // the source type is below 128 bits. We shouldn't be allowing anything
760 // through which is wider than the destination in the first place.
761 unsigned SrcSize = SrcTy.getSizeInBits();
762 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
763 return false;
764
765 return true;
766 };
767 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
768 .legalIf(ExtLegalFunc)
769 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
770 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
772 .clampMaxNumElements(1, s8, 8)
773 .clampMaxNumElements(1, s16, 4)
774 .clampMaxNumElements(1, s32, 2)
775 // Tries to convert a large EXTEND into two smaller EXTENDs
776 .lowerIf([=](const LegalityQuery &Query) {
777 return (Query.Types[0].getScalarSizeInBits() >
778 Query.Types[1].getScalarSizeInBits() * 2) &&
779 Query.Types[0].isVector() &&
780 (Query.Types[1].getScalarSizeInBits() == 8 ||
781 Query.Types[1].getScalarSizeInBits() == 16);
782 })
783 .clampMinNumElements(1, s8, 8)
784 .clampMinNumElements(1, s16, 4)
786
788 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
790 .clampMaxNumElements(0, s8, 8)
791 .clampMaxNumElements(0, s16, 4)
792 .clampMaxNumElements(0, s32, 2)
794 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
795 0, s8)
796 .lowerIf([=](const LegalityQuery &Query) {
797 LLT DstTy = Query.Types[0];
798 LLT SrcTy = Query.Types[1];
799 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
800 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
801 })
802 .clampMinNumElements(0, s8, 8)
803 .clampMinNumElements(0, s16, 4)
804 .alwaysLegal();
805
806 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
807 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}});
808
809 getActionDefinitionsBuilder(G_SEXT_INREG)
810 .legalFor({s32, s64})
811 .legalFor(PackedVectorAllTypeList)
812 .maxScalar(0, s64)
813 .clampNumElements(0, v8s8, v16s8)
814 .clampNumElements(0, v4s16, v8s16)
815 .clampNumElements(0, v2s32, v4s32)
816 .clampMaxNumElements(0, s64, 2)
817 .lower();
818
819 // FP conversions
821 .legalFor(
822 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
823 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
825 .customIf([](const LegalityQuery &Q) {
826 LLT DstTy = Q.Types[0];
827 LLT SrcTy = Q.Types[1];
828 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
829 SrcTy.getScalarSizeInBits() == 64 &&
830 DstTy.getScalarSizeInBits() == 16;
831 })
832 // Clamp based on input
833 .clampNumElements(1, v4s32, v4s32)
834 .clampNumElements(1, v2s64, v2s64)
835 .scalarize(0);
836
838 .legalFor(
839 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
840 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
843 [](const LegalityQuery &Q) {
844 LLT DstTy = Q.Types[0];
845 LLT SrcTy = Q.Types[1];
846 return SrcTy.isVector() && DstTy.isVector() &&
847 SrcTy.getScalarSizeInBits() == 16 &&
848 DstTy.getScalarSizeInBits() == 64;
849 },
850 changeElementTo(1, s32))
851 .clampNumElements(0, v4s32, v4s32)
852 .clampNumElements(0, v2s64, v2s64)
853 .scalarize(0);
854
855 // Conversions
856 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
857 .legalFor({{s32, s32},
858 {s64, s32},
859 {s32, s64},
860 {s64, s64},
861 {v2s32, v2s32},
862 {v4s32, v4s32},
863 {v2s64, v2s64}})
864 .legalFor(HasFP16,
865 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
866 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
868 // The range of a fp16 value fits into an i17, so we can lower the width
869 // to i64.
871 [=](const LegalityQuery &Query) {
872 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
873 },
874 changeTo(0, s64))
877 .minScalar(0, s32)
878 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
880 [=](const LegalityQuery &Query) {
881 return Query.Types[0].getScalarSizeInBits() <= 64 &&
882 Query.Types[0].getScalarSizeInBits() >
883 Query.Types[1].getScalarSizeInBits();
884 },
886 .widenScalarIf(
887 [=](const LegalityQuery &Query) {
888 return Query.Types[1].getScalarSizeInBits() <= 64 &&
889 Query.Types[0].getScalarSizeInBits() <
890 Query.Types[1].getScalarSizeInBits();
891 },
893 .clampNumElements(0, v4s16, v8s16)
894 .clampNumElements(0, v2s32, v4s32)
895 .clampMaxNumElements(0, s64, 2)
896 .libcallFor(
897 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
898
899 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
900 .legalFor({{s32, s32},
901 {s64, s32},
902 {s32, s64},
903 {s64, s64},
904 {v2s32, v2s32},
905 {v4s32, v4s32},
906 {v2s64, v2s64}})
907 .legalFor(
908 HasFP16,
909 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
910 // Handle types larger than i64 by scalarizing/lowering.
911 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
913 // The range of a fp16 value fits into an i17, so we can lower the width
914 // to i64.
916 [=](const LegalityQuery &Query) {
917 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
918 },
919 changeTo(0, s64))
920 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
922 .widenScalarToNextPow2(0, /*MinSize=*/32)
923 .minScalar(0, s32)
924 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
926 [=](const LegalityQuery &Query) {
927 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
928 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
929 ITySize > Query.Types[1].getScalarSizeInBits();
930 },
932 .widenScalarIf(
933 [=](const LegalityQuery &Query) {
934 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
935 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
936 Query.Types[0].getScalarSizeInBits() < FTySize;
937 },
940 .clampNumElements(0, v4s16, v8s16)
941 .clampNumElements(0, v2s32, v4s32)
942 .clampMaxNumElements(0, s64, 2);
943
944 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
945 .legalFor({{s32, s32},
946 {s64, s32},
947 {s32, s64},
948 {s64, s64},
949 {v2s32, v2s32},
950 {v4s32, v4s32},
951 {v2s64, v2s64}})
952 .legalFor(HasFP16,
953 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
954 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
958 .minScalar(1, s32)
959 .lowerIf([](const LegalityQuery &Query) {
960 return Query.Types[1].isVector() &&
961 Query.Types[1].getScalarSizeInBits() == 64 &&
962 Query.Types[0].getScalarSizeInBits() == 16;
963 })
964 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
966 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
967 [](const LegalityQuery &Query) {
968 return Query.Types[0].getScalarSizeInBits() == 32 &&
969 Query.Types[1].getScalarSizeInBits() == 64;
970 },
971 0)
972 .widenScalarIf(
973 [](const LegalityQuery &Query) {
974 return Query.Types[1].getScalarSizeInBits() <= 64 &&
975 Query.Types[0].getScalarSizeInBits() <
976 Query.Types[1].getScalarSizeInBits();
977 },
979 .widenScalarIf(
980 [](const LegalityQuery &Query) {
981 return Query.Types[0].getScalarSizeInBits() <= 64 &&
982 Query.Types[0].getScalarSizeInBits() >
983 Query.Types[1].getScalarSizeInBits();
984 },
986 .clampNumElements(0, v4s16, v8s16)
987 .clampNumElements(0, v2s32, v4s32)
988 .clampMaxNumElements(0, s64, 2)
989 .libcallFor({{s16, s128},
990 {s32, s128},
991 {s64, s128},
992 {s128, s128},
993 {s128, s32},
994 {s128, s64}});
995
996 // Control-flow
999 .legalFor({s32})
1000 .clampScalar(0, s32, s32);
1001 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1002
1004 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1005 .widenScalarToNextPow2(0)
1006 .clampScalar(0, s32, s64)
1007 .clampScalar(1, s32, s32)
1010 .lowerIf(isVector(0));
1011
1012 // Pointer-handling
1013 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1014
1015 if (TM.getCodeModel() == CodeModel::Small)
1016 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1017 else
1018 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1019
1020 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1021 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1022
1023 getActionDefinitionsBuilder(G_PTRTOINT)
1024 .legalFor({{s64, p0}, {v2s64, v2p0}})
1025 .widenScalarToNextPow2(0, 64)
1026 .clampScalar(0, s64, s64)
1027 .clampMaxNumElements(0, s64, 2);
1028
1029 getActionDefinitionsBuilder(G_INTTOPTR)
1030 .unsupportedIf([&](const LegalityQuery &Query) {
1031 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1032 })
1033 .legalFor({{p0, s64}, {v2p0, v2s64}})
1034 .clampMaxNumElements(1, s64, 2);
1035
1036 // Casts for 32 and 64-bit width type are just copies.
1037 // Same for 128-bit width type, except they are on the FPR bank.
1039 // Keeping 32-bit instructions legal to prevent regression in some tests
1040 .legalForCartesianProduct({s32, v2s16, v4s8})
1041 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1042 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1043 .customIf([=](const LegalityQuery &Query) {
1044 // Handle casts from i1 vectors to scalars.
1045 LLT DstTy = Query.Types[0];
1046 LLT SrcTy = Query.Types[1];
1047 return DstTy.isScalar() && SrcTy.isVector() &&
1048 SrcTy.getScalarSizeInBits() == 1;
1049 })
1050 .lowerIf([=](const LegalityQuery &Query) {
1051 return Query.Types[0].isVector() != Query.Types[1].isVector();
1052 })
1054 .clampNumElements(0, v8s8, v16s8)
1055 .clampNumElements(0, v4s16, v8s16)
1056 .clampNumElements(0, v2s32, v4s32)
1057 .lower();
1058
1059 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1060
1061 // va_list must be a pointer, but most sized types are pretty easy to handle
1062 // as the destination.
1064 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1065 .clampScalar(0, s8, s64)
1066 .widenScalarToNextPow2(0, /*Min*/ 8);
1067
1068 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1069 .lowerIf(
1070 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1071
1072 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1073
1074 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1075 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1076 .customFor(!UseOutlineAtomics, {{s128, p0}})
1077 .libcallFor(UseOutlineAtomics,
1078 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1079 .clampScalar(0, s32, s64);
1080
1081 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1082 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1083 G_ATOMICRMW_XOR})
1084 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1085 .libcallFor(UseOutlineAtomics,
1086 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1087 .clampScalar(0, s32, s64);
1088
1089 // Do not outline these atomics operations, as per comment in
1090 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1092 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1093 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1094 .clampScalar(0, s32, s64);
1095
1096 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1097
1098 // Merge/Unmerge
1099 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1100 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1101 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1103 .widenScalarToNextPow2(LitTyIdx, 8)
1104 .widenScalarToNextPow2(BigTyIdx, 32)
1105 .clampScalar(LitTyIdx, s8, s64)
1106 .clampScalar(BigTyIdx, s32, s128)
1107 .legalIf([=](const LegalityQuery &Q) {
1108 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1109 case 32:
1110 case 64:
1111 case 128:
1112 break;
1113 default:
1114 return false;
1115 }
1116 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1117 case 8:
1118 case 16:
1119 case 32:
1120 case 64:
1121 return true;
1122 default:
1123 return false;
1124 }
1125 });
1126 }
1127
1128 // TODO : nxv4s16, nxv2s16, nxv2s32
1129 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1130 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1131 {s16, nxv8s16, s64},
1132 {s32, nxv4s32, s64},
1133 {s64, nxv2s64, s64}})
1134 .unsupportedIf([=](const LegalityQuery &Query) {
1135 const LLT &EltTy = Query.Types[1].getElementType();
1136 if (Query.Types[1].isScalableVector())
1137 return false;
1138 return Query.Types[0] != EltTy;
1139 })
1140 .minScalar(2, s64)
1141 .customIf([=](const LegalityQuery &Query) {
1142 const LLT &VecTy = Query.Types[1];
1143 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1144 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1145 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1146 })
1147 .minScalarOrEltIf(
1148 [=](const LegalityQuery &Query) {
1149 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1150 // cause the total vec size to be > 128b.
1151 return Query.Types[1].isFixedVector() &&
1152 Query.Types[1].getNumElements() <= 2;
1153 },
1154 0, s64)
1155 .minScalarOrEltIf(
1156 [=](const LegalityQuery &Query) {
1157 return Query.Types[1].isFixedVector() &&
1158 Query.Types[1].getNumElements() <= 4;
1159 },
1160 0, s32)
1161 .minScalarOrEltIf(
1162 [=](const LegalityQuery &Query) {
1163 return Query.Types[1].isFixedVector() &&
1164 Query.Types[1].getNumElements() <= 8;
1165 },
1166 0, s16)
1167 .minScalarOrEltIf(
1168 [=](const LegalityQuery &Query) {
1169 return Query.Types[1].isFixedVector() &&
1170 Query.Types[1].getNumElements() <= 16;
1171 },
1172 0, s8)
1173 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1175 .clampMaxNumElements(1, s64, 2)
1176 .clampMaxNumElements(1, s32, 4)
1177 .clampMaxNumElements(1, s16, 8)
1178 .clampMaxNumElements(1, s8, 16)
1179 .clampMaxNumElements(1, p0, 2)
1181
1182 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1183 .legalIf(
1184 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1185 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1186 {nxv8s16, s32, s64},
1187 {nxv4s32, s32, s64},
1188 {nxv2s64, s64, s64}})
1191 .clampNumElements(0, v8s8, v16s8)
1192 .clampNumElements(0, v4s16, v8s16)
1193 .clampNumElements(0, v2s32, v4s32)
1194 .clampMaxNumElements(0, s64, 2)
1195 .clampMaxNumElements(0, p0, 2)
1197
1198 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1199 .legalFor({{v8s8, s8},
1200 {v16s8, s8},
1201 {v4s16, s16},
1202 {v8s16, s16},
1203 {v2s32, s32},
1204 {v4s32, s32},
1205 {v2s64, s64},
1206 {v2p0, p0}})
1207 .clampNumElements(0, v4s32, v4s32)
1208 .clampNumElements(0, v2s64, v2s64)
1209 .minScalarOrElt(0, s8)
1212 .minScalarSameAs(1, 0);
1213
1214 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1215
1216 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1217 .legalIf([=](const LegalityQuery &Query) {
1218 const LLT &DstTy = Query.Types[0];
1219 const LLT &SrcTy = Query.Types[1];
1220 // For now just support the TBL2 variant which needs the source vectors
1221 // to be the same size as the dest.
1222 if (DstTy != SrcTy)
1223 return false;
1224 return llvm::is_contained(
1225 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1226 })
1227 .moreElementsIf(
1228 [](const LegalityQuery &Query) {
1229 return Query.Types[0].getNumElements() >
1230 Query.Types[1].getNumElements();
1231 },
1232 changeTo(1, 0))
1235 [](const LegalityQuery &Query) {
1236 return Query.Types[0].getNumElements() <
1237 Query.Types[1].getNumElements();
1238 },
1239 changeTo(0, 1))
1240 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1241 .clampNumElements(0, v8s8, v16s8)
1242 .clampNumElements(0, v4s16, v8s16)
1243 .clampNumElements(0, v4s32, v4s32)
1244 .clampNumElements(0, v2s64, v2s64)
1246 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1247 // Bitcast pointers vector to i64.
1248 const LLT DstTy = Query.Types[0];
1249 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1250 });
1251
1252 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1253 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1254 .bitcastIf(
1255 [=](const LegalityQuery &Query) {
1256 return Query.Types[0].isFixedVector() &&
1257 Query.Types[1].isFixedVector() &&
1258 Query.Types[0].getSizeInBits() <= 128 &&
1259 Query.Types[1].getSizeInBits() <= 64;
1260 },
1261 [=](const LegalityQuery &Query) {
1262 const LLT DstTy = Query.Types[0];
1263 const LLT SrcTy = Query.Types[1];
1264 return std::pair(
1265 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1268 SrcTy.getNumElements())));
1269 });
1270
1271 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1272 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1274 .immIdx(0); // Inform verifier imm idx 0 is handled.
1275
1276 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1277 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1278 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1279
1280 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1281
1282 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1283
1284 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1285
1286 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1287
1288 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1289
1290 if (ST.hasMOPS()) {
1291 // G_BZERO is not supported. Currently it is only emitted by
1292 // PreLegalizerCombiner for G_MEMSET with zero constant.
1294
1296 .legalForCartesianProduct({p0}, {s64}, {s64})
1297 .customForCartesianProduct({p0}, {s8}, {s64})
1298 .immIdx(0); // Inform verifier imm idx 0 is handled.
1299
1300 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1301 .legalForCartesianProduct({p0}, {p0}, {s64})
1302 .immIdx(0); // Inform verifier imm idx 0 is handled.
1303
1304 // G_MEMCPY_INLINE does not have a tailcall immediate
1305 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1306 .legalForCartesianProduct({p0}, {p0}, {s64});
1307
1308 } else {
1309 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1310 .libcall();
1311 }
1312
1313 // For fadd reductions we have pairwise operations available. We treat the
1314 // usual legal types as legal and handle the lowering to pairwise instructions
1315 // later.
1316 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1317 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1318 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1319 .minScalarOrElt(0, MinFPScalar)
1320 .clampMaxNumElements(1, s64, 2)
1321 .clampMaxNumElements(1, s32, 4)
1322 .clampMaxNumElements(1, s16, 8)
1324 .scalarize(1)
1325 .lower();
1326
1327 // For fmul reductions we need to split up into individual operations. We
1328 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1329 // smaller types, followed by scalarizing what remains.
1330 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1331 .minScalarOrElt(0, MinFPScalar)
1332 .clampMaxNumElements(1, s64, 2)
1333 .clampMaxNumElements(1, s32, 4)
1334 .clampMaxNumElements(1, s16, 8)
1335 .clampMaxNumElements(1, s32, 2)
1336 .clampMaxNumElements(1, s16, 4)
1337 .scalarize(1)
1338 .lower();
1339
1340 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1341 .scalarize(2)
1342 .lower();
1343
1344 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1345 .legalFor({{s8, v8s8},
1346 {s8, v16s8},
1347 {s16, v4s16},
1348 {s16, v8s16},
1349 {s32, v2s32},
1350 {s32, v4s32},
1351 {s64, v2s64}})
1353 .clampMaxNumElements(1, s64, 2)
1354 .clampMaxNumElements(1, s32, 4)
1355 .clampMaxNumElements(1, s16, 8)
1356 .clampMaxNumElements(1, s8, 16)
1358 .scalarize(1);
1359
1360 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1361 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1362 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1363 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1364 .minScalarOrElt(0, MinFPScalar)
1365 .clampMaxNumElements(1, s64, 2)
1366 .clampMaxNumElements(1, s32, 4)
1367 .clampMaxNumElements(1, s16, 8)
1368 .scalarize(1)
1369 .lower();
1370
1371 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1372 .clampMaxNumElements(1, s32, 2)
1373 .clampMaxNumElements(1, s16, 4)
1374 .clampMaxNumElements(1, s8, 8)
1375 .scalarize(1)
1376 .lower();
1377
1379 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1380 .legalFor({{s8, v8s8},
1381 {s8, v16s8},
1382 {s16, v4s16},
1383 {s16, v8s16},
1384 {s32, v2s32},
1385 {s32, v4s32}})
1386 .moreElementsIf(
1387 [=](const LegalityQuery &Query) {
1388 return Query.Types[1].isVector() &&
1389 Query.Types[1].getElementType() != s8 &&
1390 Query.Types[1].getNumElements() & 1;
1391 },
1393 .clampMaxNumElements(1, s64, 2)
1394 .clampMaxNumElements(1, s32, 4)
1395 .clampMaxNumElements(1, s16, 8)
1396 .clampMaxNumElements(1, s8, 16)
1397 .scalarize(1)
1398 .lower();
1399
1401 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1402 // Try to break down into smaller vectors as long as they're at least 64
1403 // bits. This lets us use vector operations for some parts of the
1404 // reduction.
1405 .fewerElementsIf(
1406 [=](const LegalityQuery &Q) {
1407 LLT SrcTy = Q.Types[1];
1408 if (SrcTy.isScalar())
1409 return false;
1410 if (!isPowerOf2_32(SrcTy.getNumElements()))
1411 return false;
1412 // We can usually perform 64b vector operations.
1413 return SrcTy.getSizeInBits() > 64;
1414 },
1415 [=](const LegalityQuery &Q) {
1416 LLT SrcTy = Q.Types[1];
1417 return std::make_pair(1, SrcTy.divide(2));
1418 })
1419 .scalarize(1)
1420 .lower();
1421
1422 // TODO: Update this to correct handling when adding AArch64/SVE support.
1423 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1424
1425 // Access to floating-point environment.
1426 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1427 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1428 .libcall();
1429
1430 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1431
1432 getActionDefinitionsBuilder(G_PREFETCH).custom();
1433
1434 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1435
1437 verify(*ST.getInstrInfo());
1438}
1439
1442 LostDebugLocObserver &LocObserver) const {
1443 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1444 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1445 GISelChangeObserver &Observer = Helper.Observer;
1446 switch (MI.getOpcode()) {
1447 default:
1448 // No idea what to do.
1449 return false;
1450 case TargetOpcode::G_VAARG:
1451 return legalizeVaArg(MI, MRI, MIRBuilder);
1452 case TargetOpcode::G_LOAD:
1453 case TargetOpcode::G_STORE:
1454 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1455 case TargetOpcode::G_SHL:
1456 case TargetOpcode::G_ASHR:
1457 case TargetOpcode::G_LSHR:
1458 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1459 case TargetOpcode::G_GLOBAL_VALUE:
1460 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1461 case TargetOpcode::G_SBFX:
1462 case TargetOpcode::G_UBFX:
1463 return legalizeBitfieldExtract(MI, MRI, Helper);
1464 case TargetOpcode::G_FSHL:
1465 case TargetOpcode::G_FSHR:
1466 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1467 case TargetOpcode::G_ROTR:
1468 return legalizeRotate(MI, MRI, Helper);
1469 case TargetOpcode::G_CTPOP:
1470 return legalizeCTPOP(MI, MRI, Helper);
1471 case TargetOpcode::G_ATOMIC_CMPXCHG:
1472 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1473 case TargetOpcode::G_CTTZ:
1474 return legalizeCTTZ(MI, Helper);
1475 case TargetOpcode::G_BZERO:
1476 case TargetOpcode::G_MEMCPY:
1477 case TargetOpcode::G_MEMMOVE:
1478 case TargetOpcode::G_MEMSET:
1479 return legalizeMemOps(MI, Helper);
1480 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1481 return legalizeExtractVectorElt(MI, MRI, Helper);
1482 case TargetOpcode::G_DYN_STACKALLOC:
1483 return legalizeDynStackAlloc(MI, Helper);
1484 case TargetOpcode::G_PREFETCH:
1485 return legalizePrefetch(MI, Helper);
1486 case TargetOpcode::G_ABS:
1487 return Helper.lowerAbsToCNeg(MI);
1488 case TargetOpcode::G_ICMP:
1489 return legalizeICMP(MI, MRI, MIRBuilder);
1490 case TargetOpcode::G_BITCAST:
1491 return legalizeBitcast(MI, Helper);
1492 case TargetOpcode::G_FPTRUNC:
1493 // In order to lower f16 to f64 properly, we need to use f32 as an
1494 // intermediary
1495 return legalizeFptrunc(MI, MIRBuilder, MRI);
1496 }
1497
1498 llvm_unreachable("expected switch to return");
1499}
1500
1501bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1502 LegalizerHelper &Helper) const {
1503 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1504 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1505 // We're trying to handle casts from i1 vectors to scalars but reloading from
1506 // stack.
1507 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1508 SrcTy.getElementType() != LLT::scalar(1))
1509 return false;
1510
1511 Helper.createStackStoreLoad(DstReg, SrcReg);
1512 MI.eraseFromParent();
1513 return true;
1514}
1515
1516bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1518 MachineIRBuilder &MIRBuilder,
1519 GISelChangeObserver &Observer,
1520 LegalizerHelper &Helper) const {
1521 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1522 MI.getOpcode() == TargetOpcode::G_FSHR);
1523
1524 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1525 // lowering
1526 Register ShiftNo = MI.getOperand(3).getReg();
1527 LLT ShiftTy = MRI.getType(ShiftNo);
1528 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1529
1530 // Adjust shift amount according to Opcode (FSHL/FSHR)
1531 // Convert FSHL to FSHR
1532 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1533 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1534
1535 // Lower non-constant shifts and leave zero shifts to the optimizer.
1536 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1537 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1539
1540 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1541
1542 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1543
1544 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1545 // in the range of 0 <-> BitWidth, it is legal
1546 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1547 VRegAndVal->Value.ult(BitWidth))
1548 return true;
1549
1550 // Cast the ShiftNumber to a 64-bit type
1551 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1552
1553 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1554 Observer.changingInstr(MI);
1555 MI.getOperand(3).setReg(Cast64.getReg(0));
1556 Observer.changedInstr(MI);
1557 }
1558 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1559 // instruction
1560 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1561 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1562 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1563 Cast64.getReg(0)});
1564 MI.eraseFromParent();
1565 }
1566 return true;
1567}
1568
1569bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1571 MachineIRBuilder &MIRBuilder) const {
1572 Register DstReg = MI.getOperand(0).getReg();
1573 Register SrcReg1 = MI.getOperand(2).getReg();
1574 Register SrcReg2 = MI.getOperand(3).getReg();
1575 LLT DstTy = MRI.getType(DstReg);
1576 LLT SrcTy = MRI.getType(SrcReg1);
1577
1578 // Check the vector types are legal
1579 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1580 DstTy.getNumElements() != SrcTy.getNumElements() ||
1581 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1582 return false;
1583
1584 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1585 // following passes
1586 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1587 if (Pred != CmpInst::ICMP_NE)
1588 return true;
1589 Register CmpReg =
1590 MIRBuilder
1591 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1592 .getReg(0);
1593 MIRBuilder.buildNot(DstReg, CmpReg);
1594
1595 MI.eraseFromParent();
1596 return true;
1597}
1598
1599bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1601 LegalizerHelper &Helper) const {
1602 // To allow for imported patterns to match, we ensure that the rotate amount
1603 // is 64b with an extension.
1604 Register AmtReg = MI.getOperand(2).getReg();
1605 LLT AmtTy = MRI.getType(AmtReg);
1606 (void)AmtTy;
1607 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1608 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1609 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1610 Helper.Observer.changingInstr(MI);
1611 MI.getOperand(2).setReg(NewAmt.getReg(0));
1612 Helper.Observer.changedInstr(MI);
1613 return true;
1614}
1615
1616bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1618 GISelChangeObserver &Observer) const {
1619 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1620 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1621 // G_ADD_LOW instructions.
1622 // By splitting this here, we can optimize accesses in the small code model by
1623 // folding in the G_ADD_LOW into the load/store offset.
1624 auto &GlobalOp = MI.getOperand(1);
1625 // Don't modify an intrinsic call.
1626 if (GlobalOp.isSymbol())
1627 return true;
1628 const auto* GV = GlobalOp.getGlobal();
1629 if (GV->isThreadLocal())
1630 return true; // Don't want to modify TLS vars.
1631
1632 auto &TM = ST->getTargetLowering()->getTargetMachine();
1633 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1634
1635 if (OpFlags & AArch64II::MO_GOT)
1636 return true;
1637
1638 auto Offset = GlobalOp.getOffset();
1639 Register DstReg = MI.getOperand(0).getReg();
1640 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1641 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1642 // Set the regclass on the dest reg too.
1643 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1644
1645 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1646 // by creating a MOVK that sets bits 48-63 of the register to (global address
1647 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1648 // prevent an incorrect tag being generated during relocation when the
1649 // global appears before the code section. Without the offset, a global at
1650 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1651 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1652 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1653 // instead of `0xf`.
1654 // This assumes that we're in the small code model so we can assume a binary
1655 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1656 // binary must also be loaded into address range [0, 2^48). Both of these
1657 // properties need to be ensured at runtime when using tagged addresses.
1658 if (OpFlags & AArch64II::MO_TAGGED) {
1659 assert(!Offset &&
1660 "Should not have folded in an offset for a tagged global!");
1661 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1662 .addGlobalAddress(GV, 0x100000000,
1664 .addImm(48);
1665 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1666 }
1667
1668 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1669 .addGlobalAddress(GV, Offset,
1671 MI.eraseFromParent();
1672 return true;
1673}
1674
1676 MachineInstr &MI) const {
1677 MachineIRBuilder &MIB = Helper.MIRBuilder;
1678 MachineRegisterInfo &MRI = *MIB.getMRI();
1679
1680 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1681 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1682 MI.eraseFromParent();
1683 return true;
1684 };
1685 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1686 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1687 {MI.getOperand(2), MI.getOperand(3)});
1688 MI.eraseFromParent();
1689 return true;
1690 };
1691 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1692 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1693 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1694 MI.eraseFromParent();
1695 return true;
1696 };
1697
1698 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1699 switch (IntrinsicID) {
1700 case Intrinsic::vacopy: {
1701 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1702 unsigned VaListSize =
1703 (ST->isTargetDarwin() || ST->isTargetWindows())
1704 ? PtrSize
1705 : ST->isTargetILP32() ? 20 : 32;
1706
1707 MachineFunction &MF = *MI.getMF();
1709 LLT::scalar(VaListSize * 8));
1710 MIB.buildLoad(Val, MI.getOperand(2),
1713 VaListSize, Align(PtrSize)));
1714 MIB.buildStore(Val, MI.getOperand(1),
1717 VaListSize, Align(PtrSize)));
1718 MI.eraseFromParent();
1719 return true;
1720 }
1721 case Intrinsic::get_dynamic_area_offset: {
1722 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1723 MI.eraseFromParent();
1724 return true;
1725 }
1726 case Intrinsic::aarch64_mops_memset_tag: {
1727 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1728 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1729 // the instruction).
1730 auto &Value = MI.getOperand(3);
1731 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1732 Value.setReg(ExtValueReg);
1733 return true;
1734 }
1735 case Intrinsic::aarch64_prefetch: {
1736 auto &AddrVal = MI.getOperand(1);
1737
1738 int64_t IsWrite = MI.getOperand(2).getImm();
1739 int64_t Target = MI.getOperand(3).getImm();
1740 int64_t IsStream = MI.getOperand(4).getImm();
1741 int64_t IsData = MI.getOperand(5).getImm();
1742
1743 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1744 (!IsData << 3) | // IsDataCache bit
1745 (Target << 1) | // Cache level bits
1746 (unsigned)IsStream; // Stream bit
1747
1748 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1749 MI.eraseFromParent();
1750 return true;
1751 }
1752 case Intrinsic::aarch64_neon_uaddv:
1753 case Intrinsic::aarch64_neon_saddv:
1754 case Intrinsic::aarch64_neon_umaxv:
1755 case Intrinsic::aarch64_neon_smaxv:
1756 case Intrinsic::aarch64_neon_uminv:
1757 case Intrinsic::aarch64_neon_sminv: {
1758 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1759 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1760 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1761
1762 auto OldDst = MI.getOperand(0).getReg();
1763 auto OldDstTy = MRI.getType(OldDst);
1764 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1765 if (OldDstTy == NewDstTy)
1766 return true;
1767
1768 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1769
1770 Helper.Observer.changingInstr(MI);
1771 MI.getOperand(0).setReg(NewDst);
1772 Helper.Observer.changedInstr(MI);
1773
1774 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1775 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1776 OldDst, NewDst);
1777
1778 return true;
1779 }
1780 case Intrinsic::aarch64_neon_uaddlp:
1781 case Intrinsic::aarch64_neon_saddlp: {
1782 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1783 ? AArch64::G_UADDLP
1784 : AArch64::G_SADDLP;
1785 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1786 MI.eraseFromParent();
1787
1788 return true;
1789 }
1790 case Intrinsic::aarch64_neon_uaddlv:
1791 case Intrinsic::aarch64_neon_saddlv: {
1792 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1793 ? AArch64::G_UADDLV
1794 : AArch64::G_SADDLV;
1795 Register DstReg = MI.getOperand(0).getReg();
1796 Register SrcReg = MI.getOperand(2).getReg();
1797 LLT DstTy = MRI.getType(DstReg);
1798
1799 LLT MidTy, ExtTy;
1800 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1801 MidTy = LLT::fixed_vector(4, 32);
1802 ExtTy = LLT::scalar(32);
1803 } else {
1804 MidTy = LLT::fixed_vector(2, 64);
1805 ExtTy = LLT::scalar(64);
1806 }
1807
1808 Register MidReg =
1809 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1810 Register ZeroReg =
1811 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1812 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1813 {MidReg, ZeroReg})
1814 .getReg(0);
1815
1816 if (DstTy.getScalarSizeInBits() < 32)
1817 MIB.buildTrunc(DstReg, ExtReg);
1818 else
1819 MIB.buildCopy(DstReg, ExtReg);
1820
1821 MI.eraseFromParent();
1822
1823 return true;
1824 }
1825 case Intrinsic::aarch64_neon_smax:
1826 return LowerBinOp(TargetOpcode::G_SMAX);
1827 case Intrinsic::aarch64_neon_smin:
1828 return LowerBinOp(TargetOpcode::G_SMIN);
1829 case Intrinsic::aarch64_neon_umax:
1830 return LowerBinOp(TargetOpcode::G_UMAX);
1831 case Intrinsic::aarch64_neon_umin:
1832 return LowerBinOp(TargetOpcode::G_UMIN);
1833 case Intrinsic::aarch64_neon_fmax:
1834 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1835 case Intrinsic::aarch64_neon_fmin:
1836 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1837 case Intrinsic::aarch64_neon_fmaxnm:
1838 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1839 case Intrinsic::aarch64_neon_fminnm:
1840 return LowerBinOp(TargetOpcode::G_FMINNUM);
1841 case Intrinsic::aarch64_neon_pmull:
1842 case Intrinsic::aarch64_neon_pmull64:
1843 return LowerBinOp(AArch64::G_PMULL);
1844 case Intrinsic::aarch64_neon_smull:
1845 return LowerBinOp(AArch64::G_SMULL);
1846 case Intrinsic::aarch64_neon_umull:
1847 return LowerBinOp(AArch64::G_UMULL);
1848 case Intrinsic::aarch64_neon_sabd:
1849 return LowerBinOp(TargetOpcode::G_ABDS);
1850 case Intrinsic::aarch64_neon_uabd:
1851 return LowerBinOp(TargetOpcode::G_ABDU);
1852 case Intrinsic::aarch64_neon_uhadd:
1853 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1854 case Intrinsic::aarch64_neon_urhadd:
1855 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1856 case Intrinsic::aarch64_neon_shadd:
1857 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1858 case Intrinsic::aarch64_neon_srhadd:
1859 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1860 case Intrinsic::aarch64_neon_abs: {
1861 // Lower the intrinsic to G_ABS.
1862 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1863 MI.eraseFromParent();
1864 return true;
1865 }
1866 case Intrinsic::aarch64_neon_sqadd: {
1867 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1868 return LowerBinOp(TargetOpcode::G_SADDSAT);
1869 break;
1870 }
1871 case Intrinsic::aarch64_neon_sqsub: {
1872 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1873 return LowerBinOp(TargetOpcode::G_SSUBSAT);
1874 break;
1875 }
1876 case Intrinsic::aarch64_neon_uqadd: {
1877 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1878 return LowerBinOp(TargetOpcode::G_UADDSAT);
1879 break;
1880 }
1881 case Intrinsic::aarch64_neon_uqsub: {
1882 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1883 return LowerBinOp(TargetOpcode::G_USUBSAT);
1884 break;
1885 }
1886 case Intrinsic::aarch64_neon_udot:
1887 return LowerTriOp(AArch64::G_UDOT);
1888 case Intrinsic::aarch64_neon_sdot:
1889 return LowerTriOp(AArch64::G_SDOT);
1890 case Intrinsic::aarch64_neon_usdot:
1891 return LowerTriOp(AArch64::G_USDOT);
1892 case Intrinsic::aarch64_neon_sqxtn:
1893 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
1894 case Intrinsic::aarch64_neon_sqxtun:
1895 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
1896 case Intrinsic::aarch64_neon_uqxtn:
1897 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
1898
1899 case Intrinsic::vector_reverse:
1900 // TODO: Add support for vector_reverse
1901 return false;
1902 }
1903
1904 return true;
1905}
1906
1907bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1909 GISelChangeObserver &Observer) const {
1910 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1911 MI.getOpcode() == TargetOpcode::G_LSHR ||
1912 MI.getOpcode() == TargetOpcode::G_SHL);
1913 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1914 // imported patterns can select it later. Either way, it will be legal.
1915 Register AmtReg = MI.getOperand(2).getReg();
1916 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1917 if (!VRegAndVal)
1918 return true;
1919 // Check the shift amount is in range for an immediate form.
1920 int64_t Amount = VRegAndVal->Value.getSExtValue();
1921 if (Amount > 31)
1922 return true; // This will have to remain a register variant.
1923 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1924 Observer.changingInstr(MI);
1925 MI.getOperand(2).setReg(ExtCst.getReg(0));
1926 Observer.changedInstr(MI);
1927 return true;
1928}
1929
1932 Base = Root;
1933 Offset = 0;
1934
1935 Register NewBase;
1936 int64_t NewOffset;
1937 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1938 isShiftedInt<7, 3>(NewOffset)) {
1939 Base = NewBase;
1940 Offset = NewOffset;
1941 }
1942}
1943
1944// FIXME: This should be removed and replaced with the generic bitcast legalize
1945// action.
1946bool AArch64LegalizerInfo::legalizeLoadStore(
1948 GISelChangeObserver &Observer) const {
1949 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1950 MI.getOpcode() == TargetOpcode::G_LOAD);
1951 // Here we just try to handle vector loads/stores where our value type might
1952 // have pointer elements, which the SelectionDAG importer can't handle. To
1953 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1954 // the value to use s64 types.
1955
1956 // Custom legalization requires the instruction, if not deleted, must be fully
1957 // legalized. In order to allow further legalization of the inst, we create
1958 // a new instruction and erase the existing one.
1959
1960 Register ValReg = MI.getOperand(0).getReg();
1961 const LLT ValTy = MRI.getType(ValReg);
1962
1963 if (ValTy == LLT::scalar(128)) {
1964
1965 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1966 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1967 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1968 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1969 bool IsRcpC3 =
1970 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1971
1972 LLT s64 = LLT::scalar(64);
1973
1974 unsigned Opcode;
1975 if (IsRcpC3) {
1976 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1977 } else {
1978 // For LSE2, loads/stores should have been converted to monotonic and had
1979 // a fence inserted after them.
1980 assert(Ordering == AtomicOrdering::Monotonic ||
1981 Ordering == AtomicOrdering::Unordered);
1982 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1983
1984 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1985 }
1986
1987 MachineInstrBuilder NewI;
1988 if (IsLoad) {
1989 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1990 MIRBuilder.buildMergeLikeInstr(
1991 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1992 } else {
1993 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1994 NewI = MIRBuilder.buildInstr(
1995 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1996 }
1997
1998 if (IsRcpC3) {
1999 NewI.addUse(MI.getOperand(1).getReg());
2000 } else {
2001 Register Base;
2002 int Offset;
2003 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2004 NewI.addUse(Base);
2005 NewI.addImm(Offset / 8);
2006 }
2007
2008 NewI.cloneMemRefs(MI);
2009 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2010 *MRI.getTargetRegisterInfo(),
2011 *ST->getRegBankInfo());
2012 MI.eraseFromParent();
2013 return true;
2014 }
2015
2016 if (!ValTy.isPointerVector() ||
2017 ValTy.getElementType().getAddressSpace() != 0) {
2018 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2019 return false;
2020 }
2021
2022 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2023 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
2024 auto &MMO = **MI.memoperands_begin();
2025 MMO.setType(NewTy);
2026
2027 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2028 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2029 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2030 } else {
2031 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2032 MIRBuilder.buildBitcast(ValReg, NewLoad);
2033 }
2034 MI.eraseFromParent();
2035 return true;
2036}
2037
2038bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2040 MachineIRBuilder &MIRBuilder) const {
2041 MachineFunction &MF = MIRBuilder.getMF();
2042 Align Alignment(MI.getOperand(2).getImm());
2043 Register Dst = MI.getOperand(0).getReg();
2044 Register ListPtr = MI.getOperand(1).getReg();
2045
2046 LLT PtrTy = MRI.getType(ListPtr);
2047 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2048
2049 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2050 const Align PtrAlign = Align(PtrSize);
2051 auto List = MIRBuilder.buildLoad(
2052 PtrTy, ListPtr,
2053 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2054 PtrTy, PtrAlign));
2055
2056 MachineInstrBuilder DstPtr;
2057 if (Alignment > PtrAlign) {
2058 // Realign the list to the actual required alignment.
2059 auto AlignMinus1 =
2060 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2061 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2062 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2063 } else
2064 DstPtr = List;
2065
2066 LLT ValTy = MRI.getType(Dst);
2067 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2068 MIRBuilder.buildLoad(
2069 Dst, DstPtr,
2070 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2071 ValTy, std::max(Alignment, PtrAlign)));
2072
2073 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2074
2075 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2076
2077 MIRBuilder.buildStore(NewList, ListPtr,
2078 *MF.getMachineMemOperand(MachinePointerInfo(),
2080 PtrTy, PtrAlign));
2081
2082 MI.eraseFromParent();
2083 return true;
2084}
2085
2086bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2088 // Only legal if we can select immediate forms.
2089 // TODO: Lower this otherwise.
2090 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2091 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2092}
2093
2094bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2096 LegalizerHelper &Helper) const {
2097 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2098 // it can be more efficiently lowered to the following sequence that uses
2099 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2100 // registers are cheap.
2101 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2102 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2103 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2104 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2105 //
2106 // For 128 bit vector popcounts, we lower to the following sequence:
2107 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2108 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2109 // uaddlp.4s v0, v0 // v4s32, v2s64
2110 // uaddlp.2d v0, v0 // v2s64
2111 //
2112 // For 64 bit vector popcounts, we lower to the following sequence:
2113 // cnt.8b v0, v0 // v4s16, v2s32
2114 // uaddlp.4h v0, v0 // v4s16, v2s32
2115 // uaddlp.2s v0, v0 // v2s32
2116
2117 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2118 Register Dst = MI.getOperand(0).getReg();
2119 Register Val = MI.getOperand(1).getReg();
2120 LLT Ty = MRI.getType(Val);
2121 unsigned Size = Ty.getSizeInBits();
2122
2123 assert(Ty == MRI.getType(Dst) &&
2124 "Expected src and dst to have the same type!");
2125
2126 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2127 LLT s64 = LLT::scalar(64);
2128
2129 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2130 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2131 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2132 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2133
2134 MIRBuilder.buildZExt(Dst, Add);
2135 MI.eraseFromParent();
2136 return true;
2137 }
2138
2139 if (!ST->hasNEON() ||
2140 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2141 // Use generic lowering when custom lowering is not possible.
2142 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2143 Helper.lowerBitCount(MI) ==
2145 }
2146
2147 // Pre-conditioning: widen Val up to the nearest vector type.
2148 // s32,s64,v4s16,v2s32 -> v8i8
2149 // v8s16,v4s32,v2s64 -> v16i8
2150 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2151 if (Ty.isScalar()) {
2152 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2153 if (Size == 32) {
2154 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2155 }
2156 }
2157 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2158
2159 // Count bits in each byte-sized lane.
2160 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2161
2162 // Sum across lanes.
2163
2164 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2165 Ty.getScalarSizeInBits() != 16) {
2166 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2167 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2168 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2169 MachineInstrBuilder Sum;
2170
2171 if (Ty == LLT::fixed_vector(2, 64)) {
2172 auto UDOT =
2173 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2174 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2175 } else if (Ty == LLT::fixed_vector(4, 32)) {
2176 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2177 } else if (Ty == LLT::fixed_vector(2, 32)) {
2178 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2179 } else {
2180 llvm_unreachable("unexpected vector shape");
2181 }
2182
2183 Sum->getOperand(0).setReg(Dst);
2184 MI.eraseFromParent();
2185 return true;
2186 }
2187
2188 Register HSum = CTPOP.getReg(0);
2189 unsigned Opc;
2190 SmallVector<LLT> HAddTys;
2191 if (Ty.isScalar()) {
2192 Opc = Intrinsic::aarch64_neon_uaddlv;
2193 HAddTys.push_back(LLT::scalar(32));
2194 } else if (Ty == LLT::fixed_vector(8, 16)) {
2195 Opc = Intrinsic::aarch64_neon_uaddlp;
2196 HAddTys.push_back(LLT::fixed_vector(8, 16));
2197 } else if (Ty == LLT::fixed_vector(4, 32)) {
2198 Opc = Intrinsic::aarch64_neon_uaddlp;
2199 HAddTys.push_back(LLT::fixed_vector(8, 16));
2200 HAddTys.push_back(LLT::fixed_vector(4, 32));
2201 } else if (Ty == LLT::fixed_vector(2, 64)) {
2202 Opc = Intrinsic::aarch64_neon_uaddlp;
2203 HAddTys.push_back(LLT::fixed_vector(8, 16));
2204 HAddTys.push_back(LLT::fixed_vector(4, 32));
2205 HAddTys.push_back(LLT::fixed_vector(2, 64));
2206 } else if (Ty == LLT::fixed_vector(4, 16)) {
2207 Opc = Intrinsic::aarch64_neon_uaddlp;
2208 HAddTys.push_back(LLT::fixed_vector(4, 16));
2209 } else if (Ty == LLT::fixed_vector(2, 32)) {
2210 Opc = Intrinsic::aarch64_neon_uaddlp;
2211 HAddTys.push_back(LLT::fixed_vector(4, 16));
2212 HAddTys.push_back(LLT::fixed_vector(2, 32));
2213 } else
2214 llvm_unreachable("unexpected vector shape");
2216 for (LLT HTy : HAddTys) {
2217 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2218 HSum = UADD.getReg(0);
2219 }
2220
2221 // Post-conditioning.
2222 if (Ty.isScalar() && (Size == 64 || Size == 128))
2223 MIRBuilder.buildZExt(Dst, UADD);
2224 else
2225 UADD->getOperand(0).setReg(Dst);
2226 MI.eraseFromParent();
2227 return true;
2228}
2229
2230bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2232 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2233 LLT s64 = LLT::scalar(64);
2234 auto Addr = MI.getOperand(1).getReg();
2235 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2236 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2237 auto DstLo = MRI.createGenericVirtualRegister(s64);
2238 auto DstHi = MRI.createGenericVirtualRegister(s64);
2239
2240 MachineInstrBuilder CAS;
2241 if (ST->hasLSE()) {
2242 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2243 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2244 // the rest of the MIR so we must reassemble the extracted registers into a
2245 // 128-bit known-regclass one with code like this:
2246 //
2247 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2248 // %out = CASP %in1, ...
2249 // %OldLo = G_EXTRACT %out, 0
2250 // %OldHi = G_EXTRACT %out, 64
2251 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2252 unsigned Opcode;
2253 switch (Ordering) {
2255 Opcode = AArch64::CASPAX;
2256 break;
2258 Opcode = AArch64::CASPLX;
2259 break;
2262 Opcode = AArch64::CASPALX;
2263 break;
2264 default:
2265 Opcode = AArch64::CASPX;
2266 break;
2267 }
2268
2269 LLT s128 = LLT::scalar(128);
2270 auto CASDst = MRI.createGenericVirtualRegister(s128);
2271 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2272 auto CASNew = MRI.createGenericVirtualRegister(s128);
2273 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2274 .addUse(DesiredI->getOperand(0).getReg())
2275 .addImm(AArch64::sube64)
2276 .addUse(DesiredI->getOperand(1).getReg())
2277 .addImm(AArch64::subo64);
2278 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2279 .addUse(NewI->getOperand(0).getReg())
2280 .addImm(AArch64::sube64)
2281 .addUse(NewI->getOperand(1).getReg())
2282 .addImm(AArch64::subo64);
2283
2284 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2285
2286 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2287 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2288 } else {
2289 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2290 // can take arbitrary registers so it just has the normal GPR64 operands the
2291 // rest of AArch64 is expecting.
2292 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2293 unsigned Opcode;
2294 switch (Ordering) {
2296 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2297 break;
2299 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2300 break;
2303 Opcode = AArch64::CMP_SWAP_128;
2304 break;
2305 default:
2306 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2307 break;
2308 }
2309
2310 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2311 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2312 {Addr, DesiredI->getOperand(0),
2313 DesiredI->getOperand(1), NewI->getOperand(0),
2314 NewI->getOperand(1)});
2315 }
2316
2317 CAS.cloneMemRefs(MI);
2318 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2319 *MRI.getTargetRegisterInfo(),
2320 *ST->getRegBankInfo());
2321
2322 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2323 MI.eraseFromParent();
2324 return true;
2325}
2326
2327bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2328 LegalizerHelper &Helper) const {
2329 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2330 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2331 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2332 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2333 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2334 MI.eraseFromParent();
2335 return true;
2336}
2337
2338bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2339 LegalizerHelper &Helper) const {
2340 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2341
2342 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2343 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2344 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2345 // the instruction).
2346 auto &Value = MI.getOperand(1);
2347 Register ExtValueReg =
2348 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2349 Value.setReg(ExtValueReg);
2350 return true;
2351 }
2352
2353 return false;
2354}
2355
2356bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2358 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2359 auto VRegAndVal =
2361 if (VRegAndVal)
2362 return true;
2363 LLT VecTy = MRI.getType(Element->getVectorReg());
2364 if (VecTy.isScalableVector())
2365 return true;
2366 return Helper.lowerExtractInsertVectorElt(MI) !=
2368}
2369
2370bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2371 MachineInstr &MI, LegalizerHelper &Helper) const {
2372 MachineFunction &MF = *MI.getParent()->getParent();
2373 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2374 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2375
2376 // If stack probing is not enabled for this function, use the default
2377 // lowering.
2378 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2379 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2380 "inline-asm") {
2381 Helper.lowerDynStackAlloc(MI);
2382 return true;
2383 }
2384
2385 Register Dst = MI.getOperand(0).getReg();
2386 Register AllocSize = MI.getOperand(1).getReg();
2387 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2388
2389 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2390 "Unexpected type for dynamic alloca");
2391 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2392 "Unexpected type for dynamic alloca");
2393
2394 LLT PtrTy = MRI.getType(Dst);
2395 Register SPReg =
2397 Register SPTmp =
2398 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2399 auto NewMI =
2400 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2401 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2402 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2403 MIRBuilder.buildCopy(Dst, SPTmp);
2404
2405 MI.eraseFromParent();
2406 return true;
2407}
2408
2409bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2410 LegalizerHelper &Helper) const {
2411 MachineIRBuilder &MIB = Helper.MIRBuilder;
2412 auto &AddrVal = MI.getOperand(0);
2413
2414 int64_t IsWrite = MI.getOperand(1).getImm();
2415 int64_t Locality = MI.getOperand(2).getImm();
2416 int64_t IsData = MI.getOperand(3).getImm();
2417
2418 bool IsStream = Locality == 0;
2419 if (Locality != 0) {
2420 assert(Locality <= 3 && "Prefetch locality out-of-range");
2421 // The locality degree is the opposite of the cache speed.
2422 // Put the number the other way around.
2423 // The encoding starts at 0 for level 1
2424 Locality = 3 - Locality;
2425 }
2426
2427 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2428
2429 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2430 MI.eraseFromParent();
2431 return true;
2432}
2433
2434bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2435 MachineIRBuilder &MIRBuilder,
2436 MachineRegisterInfo &MRI) const {
2437 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2438 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2439 "Expected a power of 2 elements");
2440
2441 LLT s16 = LLT::scalar(16);
2442 LLT s32 = LLT::scalar(32);
2443 LLT s64 = LLT::scalar(64);
2444 LLT v2s16 = LLT::fixed_vector(2, s16);
2445 LLT v4s16 = LLT::fixed_vector(4, s16);
2446 LLT v2s32 = LLT::fixed_vector(2, s32);
2447 LLT v4s32 = LLT::fixed_vector(4, s32);
2448 LLT v2s64 = LLT::fixed_vector(2, s64);
2449
2450 SmallVector<Register> RegsToUnmergeTo;
2451 SmallVector<Register> TruncOddDstRegs;
2452 SmallVector<Register> RegsToMerge;
2453
2454 unsigned ElemCount = SrcTy.getNumElements();
2455
2456 // Find the biggest size chunks we can work with
2457 int StepSize = ElemCount % 4 ? 2 : 4;
2458
2459 // If we have a power of 2 greater than 2, we need to first unmerge into
2460 // enough pieces
2461 if (ElemCount <= 2)
2462 RegsToUnmergeTo.push_back(Src);
2463 else {
2464 for (unsigned i = 0; i < ElemCount / 2; ++i)
2465 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2466
2467 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2468 }
2469
2470 // Create all of the round-to-odd instructions and store them
2471 for (auto SrcReg : RegsToUnmergeTo) {
2472 Register Mid =
2473 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2474 .getReg(0);
2475 TruncOddDstRegs.push_back(Mid);
2476 }
2477
2478 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2479 // truncate 2s32 to 2s16.
2480 unsigned Index = 0;
2481 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2482 if (StepSize == 4) {
2483 Register ConcatDst =
2484 MIRBuilder
2486 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2487 .getReg(0);
2488
2489 RegsToMerge.push_back(
2490 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2491 } else {
2492 RegsToMerge.push_back(
2493 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2494 }
2495 }
2496
2497 // If there is only one register, replace the destination
2498 if (RegsToMerge.size() == 1) {
2499 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2500 MI.eraseFromParent();
2501 return true;
2502 }
2503
2504 // Merge the rest of the instructions & replace the register
2505 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2506 MRI.replaceRegWith(Dst, Fin);
2507 MI.eraseFromParent();
2508 return true;
2509}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...