LLVM 20.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v2s8 = LLT::fixed_vector(2, 8);
54 const LLT v8s16 = LLT::fixed_vector(8, 16);
55 const LLT v4s16 = LLT::fixed_vector(4, 16);
56 const LLT v2s16 = LLT::fixed_vector(2, 16);
57 const LLT v2s32 = LLT::fixed_vector(2, 32);
58 const LLT v4s32 = LLT::fixed_vector(4, 32);
59 const LLT v2s64 = LLT::fixed_vector(2, 64);
60 const LLT v2p0 = LLT::fixed_vector(2, p0);
61
62 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
63 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
64 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
65 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
66
67 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
68 v16s8, v8s16, v4s32,
69 v2s64, v2p0,
70 /* End 128bit types */
71 /* Begin 64bit types */
72 v8s8, v4s16, v2s32};
73 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
74 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
75 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
76
77 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
78
79 // FIXME: support subtargets which have neon/fp-armv8 disabled.
80 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 return;
83 }
84
85 // Some instructions only support s16 if the subtarget has full 16-bit FP
86 // support.
87 const bool HasFP16 = ST.hasFullFP16();
88 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
89
90 const bool HasCSSC = ST.hasCSSC();
91 const bool HasRCPC3 = ST.hasRCPC3();
92 const bool HasSVE = ST.hasSVE();
93
95 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
96 .legalFor({p0, s8, s16, s32, s64})
97 .legalFor({v16s8, v8s16, v4s32, v2s64, v2p0, v8s8, v4s16, v2s32, v4s8,
98 v2s16, v2s8})
99 .widenScalarToNextPow2(0)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampMaxNumElements(0, s64, 2)
107 .clampMaxNumElements(0, p0, 2)
109
111 .legalFor({p0, s16, s32, s64})
112 .legalFor(PackedVectorAllTypeList)
116 .clampScalar(0, s16, s64)
117 .clampNumElements(0, v8s8, v16s8)
118 .clampNumElements(0, v4s16, v8s16)
119 .clampNumElements(0, v2s32, v4s32)
120 .clampMaxNumElements(0, s64, 2)
121 .clampMaxNumElements(0, p0, 2);
122
124 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
126 .clampScalar(0, s32, s64)
127 .clampNumElements(0, v4s16, v8s16)
128 .clampNumElements(0, v2s32, v4s32)
129 .clampNumElements(0, v2s64, v2s64)
130 .moreElementsToNextPow2(0);
131
132 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
133 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
134 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
135 .widenScalarToNextPow2(0)
136 .clampScalar(0, s32, s64)
137 .clampMaxNumElements(0, s8, 16)
138 .clampMaxNumElements(0, s16, 8)
139 .clampNumElements(0, v2s32, v4s32)
140 .clampNumElements(0, v2s64, v2s64)
142 [=](const LegalityQuery &Query) {
143 return Query.Types[0].getNumElements() <= 2;
144 },
145 0, s32)
146 .minScalarOrEltIf(
147 [=](const LegalityQuery &Query) {
148 return Query.Types[0].getNumElements() <= 4;
149 },
150 0, s16)
151 .minScalarOrEltIf(
152 [=](const LegalityQuery &Query) {
153 return Query.Types[0].getNumElements() <= 16;
154 },
155 0, s8)
156 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
158
160 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
161 .widenScalarToNextPow2(0)
162 .clampScalar(0, s32, s64)
163 .clampMaxNumElements(0, s8, 16)
164 .clampMaxNumElements(0, s16, 8)
165 .clampNumElements(0, v2s32, v4s32)
166 .clampNumElements(0, v2s64, v2s64)
168 [=](const LegalityQuery &Query) {
169 return Query.Types[0].getNumElements() <= 2;
170 },
171 0, s32)
172 .minScalarOrEltIf(
173 [=](const LegalityQuery &Query) {
174 return Query.Types[0].getNumElements() <= 4;
175 },
176 0, s16)
177 .minScalarOrEltIf(
178 [=](const LegalityQuery &Query) {
179 return Query.Types[0].getNumElements() <= 16;
180 },
181 0, s8)
182 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
184
185 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
186 .customIf([=](const LegalityQuery &Query) {
187 const auto &SrcTy = Query.Types[0];
188 const auto &AmtTy = Query.Types[1];
189 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
190 AmtTy.getSizeInBits() == 32;
191 })
192 .legalFor({
193 {s32, s32},
194 {s32, s64},
195 {s64, s64},
196 {v8s8, v8s8},
197 {v16s8, v16s8},
198 {v4s16, v4s16},
199 {v8s16, v8s16},
200 {v2s32, v2s32},
201 {v4s32, v4s32},
202 {v2s64, v2s64},
203 })
204 .widenScalarToNextPow2(0)
205 .clampScalar(1, s32, s64)
206 .clampScalar(0, s32, s64)
207 .clampNumElements(0, v8s8, v16s8)
208 .clampNumElements(0, v4s16, v8s16)
209 .clampNumElements(0, v2s32, v4s32)
210 .clampNumElements(0, v2s64, v2s64)
212 .minScalarSameAs(1, 0)
214
216 .legalFor({{p0, s64}, {v2p0, v2s64}})
217 .clampScalarOrElt(1, s64, s64)
218 .clampNumElements(0, v2p0, v2p0);
219
220 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
221
222 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
223 .legalFor({s32, s64})
224 .libcallFor({s128})
225 .clampScalar(0, s32, s64)
227 .scalarize(0);
228
229 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
230 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
231 .libcallFor({s128})
233 .minScalarOrElt(0, s32)
234 .clampNumElements(0, v2s32, v4s32)
235 .clampNumElements(0, v2s64, v2s64)
236 .scalarize(0);
237
238 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
239 .widenScalarToNextPow2(0, /*Min = */ 32)
240 .clampScalar(0, s32, s64)
241 .lower();
242
243 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
244 .legalFor({s64, v8s16, v16s8, v4s32})
245 .lower();
246
247 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
248 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
249 .legalFor(HasCSSC, {s32, s64})
250 .minScalar(HasCSSC, 0, s32)
251 .clampNumElements(0, v8s8, v16s8)
252 .clampNumElements(0, v4s16, v8s16)
253 .clampNumElements(0, v2s32, v4s32)
254 // FIXME: This sholdn't be needed as v2s64 types are going to
255 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
256 .clampNumElements(0, v2s64, v2s64)
257 .lower();
258
260 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
261 .legalFor({{s32, s32}, {s64, s32}})
262 .clampScalar(0, s32, s64)
263 .clampScalar(1, s32, s64)
265
267 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
268 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
269 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
270 .legalFor({s32, s64, v2s32, v4s32, v2s64})
271 .legalFor(HasFP16, {s16, v4s16, v8s16})
272 .libcallFor({s128})
273 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
274 .minScalarOrElt(0, MinFPScalar)
275 .clampNumElements(0, v4s16, v8s16)
276 .clampNumElements(0, v2s32, v4s32)
277 .clampNumElements(0, v2s64, v2s64)
279
280 getActionDefinitionsBuilder({G_FABS, G_FNEG})
281 .legalFor({s32, s64, v2s32, v4s32, v2s64})
282 .legalFor(HasFP16, {s16, v4s16, v8s16})
283 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
285 .clampNumElements(0, v4s16, v8s16)
286 .clampNumElements(0, v2s32, v4s32)
287 .clampNumElements(0, v2s64, v2s64)
289 .lowerFor({s16, v4s16, v8s16});
290
292 .libcallFor({s32, s64, s128})
293 .minScalar(0, s32)
294 .scalarize(0);
295
296 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
297 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
298 .libcallFor({{s64, s128}})
299 .minScalarOrElt(1, MinFPScalar);
300
301 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
302 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
303 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
304 G_FSINH, G_FTANH})
305 // We need a call for these, so we always need to scalarize.
306 .scalarize(0)
307 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
308 .minScalar(0, s32)
309 .libcallFor({s32, s64, s128});
311 .scalarize(0)
312 .minScalar(0, s32)
313 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
314
316 .legalIf(all(typeInSet(0, {s32, s64, p0}),
317 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
319 .clampScalar(0, s32, s64)
321 .minScalar(1, s8)
322 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
323 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
324
326 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
327 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
329 .clampScalar(1, s32, s128)
331 .minScalar(0, s16)
332 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
333 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
334 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
335
336
337 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
338 auto &Actions = getActionDefinitionsBuilder(Op);
339
340 if (Op == G_SEXTLOAD)
342
343 // Atomics have zero extending behavior.
344 Actions
345 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
346 {s32, p0, s16, 8},
347 {s32, p0, s32, 8},
348 {s64, p0, s8, 2},
349 {s64, p0, s16, 2},
350 {s64, p0, s32, 4},
351 {s64, p0, s64, 8},
352 {p0, p0, s64, 8},
353 {v2s32, p0, s64, 8}})
354 .widenScalarToNextPow2(0)
355 .clampScalar(0, s32, s64)
356 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
357 // how to do that yet.
358 .unsupportedIfMemSizeNotPow2()
359 // Lower anything left over into G_*EXT and G_LOAD
360 .lower();
361 }
362
363 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
364 const LLT &ValTy = Query.Types[0];
365 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
366 };
367
369 .customIf([=](const LegalityQuery &Query) {
370 return HasRCPC3 && Query.Types[0] == s128 &&
371 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
372 })
373 .customIf([=](const LegalityQuery &Query) {
374 return Query.Types[0] == s128 &&
375 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
376 })
377 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
378 {s16, p0, s16, 8},
379 {s32, p0, s32, 8},
380 {s64, p0, s64, 8},
381 {p0, p0, s64, 8},
382 {s128, p0, s128, 8},
383 {v8s8, p0, s64, 8},
384 {v16s8, p0, s128, 8},
385 {v4s16, p0, s64, 8},
386 {v8s16, p0, s128, 8},
387 {v2s32, p0, s64, 8},
388 {v4s32, p0, s128, 8},
389 {v2s64, p0, s128, 8}})
390 // These extends are also legal
391 .legalForTypesWithMemDesc(
392 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
393 .legalForTypesWithMemDesc({
394 // SVE vscale x 128 bit base sizes
395 {nxv16s8, p0, nxv16s8, 8},
396 {nxv8s16, p0, nxv8s16, 8},
397 {nxv4s32, p0, nxv4s32, 8},
398 {nxv2s64, p0, nxv2s64, 8},
399 })
400 .widenScalarToNextPow2(0, /* MinSize = */ 8)
401 .clampMaxNumElements(0, s8, 16)
402 .clampMaxNumElements(0, s16, 8)
403 .clampMaxNumElements(0, s32, 4)
404 .clampMaxNumElements(0, s64, 2)
405 .clampMaxNumElements(0, p0, 2)
407 .clampScalar(0, s8, s64)
409 [=](const LegalityQuery &Query) {
410 // Clamp extending load results to 32-bits.
411 return Query.Types[0].isScalar() &&
412 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
413 Query.Types[0].getSizeInBits() > 32;
414 },
415 changeTo(0, s32))
416 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
417 .bitcastIf(typeInSet(0, {v4s8}),
418 [=](const LegalityQuery &Query) {
419 const LLT VecTy = Query.Types[0];
420 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
421 })
422 .customIf(IsPtrVecPred)
423 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
425
427 .customIf([=](const LegalityQuery &Query) {
428 return HasRCPC3 && Query.Types[0] == s128 &&
429 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
430 })
431 .customIf([=](const LegalityQuery &Query) {
432 return Query.Types[0] == s128 &&
433 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
434 })
435 .legalForTypesWithMemDesc(
436 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
437 {s32, p0, s8, 8}, // truncstorei8 from s32
438 {s64, p0, s8, 8}, // truncstorei8 from s64
439 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
440 {s64, p0, s16, 8}, // truncstorei16 from s64
441 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
442 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
443 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
444 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
445 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
446 .legalForTypesWithMemDesc({
447 // SVE vscale x 128 bit base sizes
448 // TODO: Add nxv2p0. Consider bitcastIf.
449 // See #92130
450 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
451 {nxv16s8, p0, nxv16s8, 8},
452 {nxv8s16, p0, nxv8s16, 8},
453 {nxv4s32, p0, nxv4s32, 8},
454 {nxv2s64, p0, nxv2s64, 8},
455 })
456 .clampScalar(0, s8, s64)
457 .minScalarOrElt(0, s8)
458 .lowerIf([=](const LegalityQuery &Query) {
459 return Query.Types[0].isScalar() &&
460 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
461 })
462 // Maximum: sN * k = 128
463 .clampMaxNumElements(0, s8, 16)
464 .clampMaxNumElements(0, s16, 8)
465 .clampMaxNumElements(0, s32, 4)
466 .clampMaxNumElements(0, s64, 2)
467 .clampMaxNumElements(0, p0, 2)
469 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
470 .bitcastIf(typeInSet(0, {v4s8}),
471 [=](const LegalityQuery &Query) {
472 const LLT VecTy = Query.Types[0];
473 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
474 })
475 .customIf(IsPtrVecPred)
476 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
478
479 getActionDefinitionsBuilder(G_INDEXED_STORE)
480 // Idx 0 == Ptr, Idx 1 == Val
481 // TODO: we can implement legalizations but as of now these are
482 // generated in a very specific way.
484 {p0, s8, s8, 8},
485 {p0, s16, s16, 8},
486 {p0, s32, s8, 8},
487 {p0, s32, s16, 8},
488 {p0, s32, s32, 8},
489 {p0, s64, s64, 8},
490 {p0, p0, p0, 8},
491 {p0, v8s8, v8s8, 8},
492 {p0, v16s8, v16s8, 8},
493 {p0, v4s16, v4s16, 8},
494 {p0, v8s16, v8s16, 8},
495 {p0, v2s32, v2s32, 8},
496 {p0, v4s32, v4s32, 8},
497 {p0, v2s64, v2s64, 8},
498 {p0, v2p0, v2p0, 8},
499 {p0, s128, s128, 8},
500 })
501 .unsupported();
502
503 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
504 LLT LdTy = Query.Types[0];
505 LLT PtrTy = Query.Types[1];
506 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
507 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
508 return false;
509 if (PtrTy != p0)
510 return false;
511 return true;
512 };
513 getActionDefinitionsBuilder(G_INDEXED_LOAD)
516 .legalIf(IndexedLoadBasicPred)
517 .unsupported();
518 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
519 .unsupportedIf(
521 .legalIf(all(typeInSet(0, {s16, s32, s64}),
522 LegalityPredicate([=](const LegalityQuery &Q) {
523 LLT LdTy = Q.Types[0];
524 LLT PtrTy = Q.Types[1];
525 LLT MemTy = Q.MMODescrs[0].MemoryTy;
526 if (PtrTy != p0)
527 return false;
528 if (LdTy == s16)
529 return MemTy == s8;
530 if (LdTy == s32)
531 return MemTy == s8 || MemTy == s16;
532 if (LdTy == s64)
533 return MemTy == s8 || MemTy == s16 || MemTy == s32;
534 return false;
535 })))
536 .unsupported();
537
538 // Constants
540 .legalFor({p0, s8, s16, s32, s64})
541 .widenScalarToNextPow2(0)
542 .clampScalar(0, s8, s64);
543 getActionDefinitionsBuilder(G_FCONSTANT)
544 .legalFor({s32, s64, s128})
545 .legalFor(HasFP16, {s16})
546 .clampScalar(0, MinFPScalar, s128);
547
548 // FIXME: fix moreElementsToNextPow2
550 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
552 .clampScalar(1, s32, s64)
553 .clampScalar(0, s32, s32)
554 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
555 .minScalarEltSameAsIf(
556 [=](const LegalityQuery &Query) {
557 const LLT &Ty = Query.Types[0];
558 const LLT &SrcTy = Query.Types[1];
559 return Ty.isVector() && !SrcTy.isPointerVector() &&
560 Ty.getElementType() != SrcTy.getElementType();
561 },
562 0, 1)
563 .minScalarOrEltIf(
564 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
565 1, s32)
566 .minScalarOrEltIf(
567 [=](const LegalityQuery &Query) {
568 return Query.Types[1].isPointerVector();
569 },
570 0, s64)
572 .clampNumElements(1, v8s8, v16s8)
573 .clampNumElements(1, v4s16, v8s16)
574 .clampNumElements(1, v2s32, v4s32)
575 .clampNumElements(1, v2s64, v2s64)
576 .clampNumElements(1, v2p0, v2p0)
577 .customIf(isVector(0));
578
580 .legalFor({{s32, s32},
581 {s32, s64},
582 {v4s32, v4s32},
583 {v2s32, v2s32},
584 {v2s64, v2s64}})
585 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
587 .clampScalar(0, s32, s32)
588 .minScalarOrElt(1, MinFPScalar)
589 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
590 .minScalarEltSameAsIf(
591 [=](const LegalityQuery &Query) {
592 const LLT &Ty = Query.Types[0];
593 const LLT &SrcTy = Query.Types[1];
594 return Ty.isVector() && !SrcTy.isPointerVector() &&
595 Ty.getElementType() != SrcTy.getElementType();
596 },
597 0, 1)
598 .clampNumElements(1, v4s16, v8s16)
599 .clampNumElements(1, v2s32, v4s32)
600 .clampMaxNumElements(1, s64, 2)
601 .moreElementsToNextPow2(1)
602 .libcallFor({{s32, s128}});
603
604 // Extensions
605 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
606 unsigned DstSize = Query.Types[0].getSizeInBits();
607
608 // Handle legal vectors using legalFor
609 if (Query.Types[0].isVector())
610 return false;
611
612 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
613 return false; // Extending to a scalar s128 needs narrowing.
614
615 const LLT &SrcTy = Query.Types[1];
616
617 // Make sure we fit in a register otherwise. Don't bother checking that
618 // the source type is below 128 bits. We shouldn't be allowing anything
619 // through which is wider than the destination in the first place.
620 unsigned SrcSize = SrcTy.getSizeInBits();
621 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
622 return false;
623
624 return true;
625 };
626 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
627 .legalIf(ExtLegalFunc)
628 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
629 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
631 .clampMaxNumElements(1, s8, 8)
632 .clampMaxNumElements(1, s16, 4)
633 .clampMaxNumElements(1, s32, 2)
634 // Tries to convert a large EXTEND into two smaller EXTENDs
635 .lowerIf([=](const LegalityQuery &Query) {
636 return (Query.Types[0].getScalarSizeInBits() >
637 Query.Types[1].getScalarSizeInBits() * 2) &&
638 Query.Types[0].isVector() &&
639 (Query.Types[1].getScalarSizeInBits() == 8 ||
640 Query.Types[1].getScalarSizeInBits() == 16);
641 })
642 .clampMinNumElements(1, s8, 8)
643 .clampMinNumElements(1, s16, 4);
644
646 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
648 .clampMaxNumElements(0, s8, 8)
649 .clampMaxNumElements(0, s16, 4)
650 .clampMaxNumElements(0, s32, 2)
651 .minScalarOrEltIf(
652 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
653 0, s8)
654 .lowerIf([=](const LegalityQuery &Query) {
655 LLT DstTy = Query.Types[0];
656 LLT SrcTy = Query.Types[1];
657 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
658 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
659 })
660 .clampMinNumElements(0, s8, 8)
661 .clampMinNumElements(0, s16, 4)
662 .alwaysLegal();
663
664 getActionDefinitionsBuilder(G_SEXT_INREG)
665 .legalFor({s32, s64})
666 .legalFor(PackedVectorAllTypeList)
667 .maxScalar(0, s64)
668 .clampNumElements(0, v8s8, v16s8)
669 .clampNumElements(0, v4s16, v8s16)
670 .clampNumElements(0, v2s32, v4s32)
671 .clampMaxNumElements(0, s64, 2)
672 .lower();
673
674 // FP conversions
676 .legalFor(
677 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
678 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
679 .clampNumElements(0, v4s16, v4s16)
680 .clampNumElements(0, v2s32, v2s32)
681 .scalarize(0);
682
684 .legalFor(
685 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
686 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
687 .clampNumElements(0, v4s32, v4s32)
688 .clampNumElements(0, v2s64, v2s64)
689 .scalarize(0);
690
691 // Conversions
692 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
693 .legalFor({{s32, s32},
694 {s64, s32},
695 {s32, s64},
696 {s64, s64},
697 {v2s64, v2s64},
698 {v4s32, v4s32},
699 {v2s32, v2s32}})
700 .legalFor(HasFP16,
701 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
702 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
704 // The range of a fp16 value fits into an i17, so we can lower the width
705 // to i64.
707 [=](const LegalityQuery &Query) {
708 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
709 },
710 changeTo(0, s64))
712 .widenScalarOrEltToNextPow2OrMinSize(0)
713 .minScalar(0, s32)
714 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
715 .widenScalarIf(
716 [=](const LegalityQuery &Query) {
717 return Query.Types[0].getScalarSizeInBits() <= 64 &&
718 Query.Types[0].getScalarSizeInBits() >
719 Query.Types[1].getScalarSizeInBits();
720 },
722 .widenScalarIf(
723 [=](const LegalityQuery &Query) {
724 return Query.Types[1].getScalarSizeInBits() <= 64 &&
725 Query.Types[0].getScalarSizeInBits() <
726 Query.Types[1].getScalarSizeInBits();
727 },
729 .clampNumElements(0, v4s16, v8s16)
730 .clampNumElements(0, v2s32, v4s32)
731 .clampMaxNumElements(0, s64, 2)
732 .libcallFor(
733 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
734
735 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
736 .legalFor({{s32, s32},
737 {s64, s32},
738 {s32, s64},
739 {s64, s64},
740 {v2s64, v2s64},
741 {v4s32, v4s32},
742 {v2s32, v2s32}})
743 .legalFor(HasFP16,
744 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
745 // Handle types larger than i64 by scalarizing/lowering.
746 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
748 // The range of a fp16 value fits into an i17, so we can lower the width
749 // to i64.
751 [=](const LegalityQuery &Query) {
752 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
753 },
754 changeTo(0, s64))
755 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
757 .widenScalarToNextPow2(0, /*MinSize=*/32)
758 .minScalar(0, s32)
759 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
760 .widenScalarIf(
761 [=](const LegalityQuery &Query) {
762 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
763 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
764 ITySize > Query.Types[1].getScalarSizeInBits();
765 },
767 .widenScalarIf(
768 [=](const LegalityQuery &Query) {
769 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
770 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
771 Query.Types[0].getScalarSizeInBits() < FTySize;
772 },
775 .clampNumElements(0, v4s16, v8s16)
776 .clampNumElements(0, v2s32, v4s32)
777 .clampMaxNumElements(0, s64, 2);
778
779 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
780 .legalFor({{s32, s32},
781 {s64, s32},
782 {s32, s64},
783 {s64, s64},
784 {v2s64, v2s64},
785 {v4s32, v4s32},
786 {v2s32, v2s32}})
787 .legalFor(HasFP16,
788 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
789 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
793 .minScalar(1, s32)
794 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
796 [=](const LegalityQuery &Query) {
797 return Query.Types[1].getScalarSizeInBits() <= 64 &&
798 Query.Types[0].getScalarSizeInBits() <
799 Query.Types[1].getScalarSizeInBits();
800 },
802 .widenScalarIf(
803 [=](const LegalityQuery &Query) {
804 return Query.Types[0].getScalarSizeInBits() <= 64 &&
805 Query.Types[0].getScalarSizeInBits() >
806 Query.Types[1].getScalarSizeInBits();
807 },
809 .clampNumElements(0, v4s16, v8s16)
810 .clampNumElements(0, v2s32, v4s32)
811 .clampMaxNumElements(0, s64, 2)
812 .libcallFor({{s16, s128},
813 {s32, s128},
814 {s64, s128},
815 {s128, s128},
816 {s128, s32},
817 {s128, s64}});
818
819 // Control-flow
821 .legalFor({s32})
822 .clampScalar(0, s32, s32);
823 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
824
826 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
827 .widenScalarToNextPow2(0)
828 .clampScalar(0, s32, s64)
829 .clampScalar(1, s32, s32)
832 .lowerIf(isVector(0));
833
834 // Pointer-handling
835 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
836
837 if (TM.getCodeModel() == CodeModel::Small)
838 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
839 else
840 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
841
842 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
843 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
844
846 .legalFor({{s64, p0}, {v2s64, v2p0}})
847 .widenScalarToNextPow2(0, 64)
848 .clampScalar(0, s64, s64)
849 .clampMaxNumElements(0, s64, 2);
850
852 .unsupportedIf([&](const LegalityQuery &Query) {
853 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
854 })
855 .legalFor({{p0, s64}, {v2p0, v2s64}})
856 .clampMaxNumElements(1, s64, 2);
857
858 // Casts for 32 and 64-bit width type are just copies.
859 // Same for 128-bit width type, except they are on the FPR bank.
861 // Keeping 32-bit instructions legal to prevent regression in some tests
862 .legalForCartesianProduct({s32, v2s16, v4s8})
863 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
864 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
865 .customIf([=](const LegalityQuery &Query) {
866 // Handle casts from i1 vectors to scalars.
867 LLT DstTy = Query.Types[0];
868 LLT SrcTy = Query.Types[1];
869 return DstTy.isScalar() && SrcTy.isVector() &&
870 SrcTy.getScalarSizeInBits() == 1;
871 })
872 .lowerIf([=](const LegalityQuery &Query) {
873 return Query.Types[0].isVector() != Query.Types[1].isVector();
874 })
876 .clampNumElements(0, v8s8, v16s8)
877 .clampNumElements(0, v4s16, v8s16)
878 .clampNumElements(0, v2s32, v4s32)
879 .lower();
880
881 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
882
883 // va_list must be a pointer, but most sized types are pretty easy to handle
884 // as the destination.
886 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
887 .clampScalar(0, s8, s64)
888 .widenScalarToNextPow2(0, /*Min*/ 8);
889
890 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
891 .lowerIf(
892 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
893
894 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
895
896 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
897 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
898 .customFor(!UseOutlineAtomics, {{s128, p0}})
899 .libcallFor(UseOutlineAtomics,
900 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
901 .clampScalar(0, s32, s64);
902
903 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
904 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
905 G_ATOMICRMW_XOR})
906 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
907 .libcallFor(UseOutlineAtomics,
908 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
909 .clampScalar(0, s32, s64);
910
911 // Do not outline these atomics operations, as per comment in
912 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
914 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
915 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
916 .clampScalar(0, s32, s64);
917
918 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
919
920 // Merge/Unmerge
921 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
922 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
923 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
925 .widenScalarToNextPow2(LitTyIdx, 8)
926 .widenScalarToNextPow2(BigTyIdx, 32)
927 .clampScalar(LitTyIdx, s8, s64)
928 .clampScalar(BigTyIdx, s32, s128)
929 .legalIf([=](const LegalityQuery &Q) {
930 switch (Q.Types[BigTyIdx].getSizeInBits()) {
931 case 32:
932 case 64:
933 case 128:
934 break;
935 default:
936 return false;
937 }
938 switch (Q.Types[LitTyIdx].getSizeInBits()) {
939 case 8:
940 case 16:
941 case 32:
942 case 64:
943 return true;
944 default:
945 return false;
946 }
947 });
948 }
949
950 // TODO : nxv4s16, nxv2s16, nxv2s32
951 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
952 .legalFor(HasSVE, {{s16, nxv16s8, s64},
953 {s16, nxv8s16, s64},
954 {s32, nxv4s32, s64},
955 {s64, nxv2s64, s64}})
956 .unsupportedIf([=](const LegalityQuery &Query) {
957 const LLT &EltTy = Query.Types[1].getElementType();
958 if (Query.Types[1].isScalableVector())
959 return false;
960 return Query.Types[0] != EltTy;
961 })
962 .minScalar(2, s64)
963 .customIf([=](const LegalityQuery &Query) {
964 const LLT &VecTy = Query.Types[1];
965 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
966 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
967 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
968 })
969 .minScalarOrEltIf(
970 [=](const LegalityQuery &Query) {
971 // We want to promote to <M x s1> to <M x s64> if that wouldn't
972 // cause the total vec size to be > 128b.
973 return Query.Types[1].isFixedVector() &&
974 Query.Types[1].getNumElements() <= 2;
975 },
976 0, s64)
977 .minScalarOrEltIf(
978 [=](const LegalityQuery &Query) {
979 return Query.Types[1].isFixedVector() &&
980 Query.Types[1].getNumElements() <= 4;
981 },
982 0, s32)
983 .minScalarOrEltIf(
984 [=](const LegalityQuery &Query) {
985 return Query.Types[1].isFixedVector() &&
986 Query.Types[1].getNumElements() <= 8;
987 },
988 0, s16)
989 .minScalarOrEltIf(
990 [=](const LegalityQuery &Query) {
991 return Query.Types[1].isFixedVector() &&
992 Query.Types[1].getNumElements() <= 16;
993 },
994 0, s8)
995 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
997 .clampMaxNumElements(1, s64, 2)
998 .clampMaxNumElements(1, s32, 4)
999 .clampMaxNumElements(1, s16, 8)
1000 .clampMaxNumElements(1, s8, 16)
1001 .clampMaxNumElements(1, p0, 2);
1002
1003 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1004 .legalIf(
1005 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
1006 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1007 {nxv8s16, s32, s64},
1008 {nxv4s32, s32, s64},
1009 {nxv2s64, s64, s64}})
1011 .widenVectorEltsToVectorMinSize(0, 64)
1012 .clampNumElements(0, v8s8, v16s8)
1013 .clampNumElements(0, v4s16, v8s16)
1014 .clampNumElements(0, v2s32, v4s32)
1015 .clampMaxNumElements(0, s64, 2)
1016 .clampMaxNumElements(0, p0, 2);
1017
1018 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1019 .legalFor({{v8s8, s8},
1020 {v16s8, s8},
1021 {v4s16, s16},
1022 {v8s16, s16},
1023 {v2s32, s32},
1024 {v4s32, s32},
1025 {v2p0, p0},
1026 {v2s64, s64}})
1027 .clampNumElements(0, v4s32, v4s32)
1028 .clampNumElements(0, v2s64, v2s64)
1029 .minScalarOrElt(0, s8)
1032 .minScalarSameAs(1, 0);
1033
1034 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1035
1038 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
1039 .scalarize(1)
1040 .widenScalarToNextPow2(1, /*Min=*/32)
1041 .clampScalar(1, s32, s64)
1042 .scalarSameSizeAs(0, 1);
1043 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
1044
1045 // TODO: Custom lowering for v2s32, v4s32, v2s64.
1046 getActionDefinitionsBuilder(G_BITREVERSE)
1047 .legalFor({s32, s64, v8s8, v16s8})
1048 .widenScalarToNextPow2(0, /*Min = */ 32)
1049 .clampScalar(0, s32, s64)
1050 .lower();
1051
1052 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
1053
1055 .lowerIf(isVector(0))
1056 .widenScalarToNextPow2(1, /*Min=*/32)
1057 .clampScalar(1, s32, s64)
1058 .scalarSameSizeAs(0, 1)
1059 .legalFor(HasCSSC, {s32, s64})
1060 .customFor(!HasCSSC, {s32, s64});
1061
1062 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1063 .legalIf([=](const LegalityQuery &Query) {
1064 const LLT &DstTy = Query.Types[0];
1065 const LLT &SrcTy = Query.Types[1];
1066 // For now just support the TBL2 variant which needs the source vectors
1067 // to be the same size as the dest.
1068 if (DstTy != SrcTy)
1069 return false;
1070 return llvm::is_contained(
1071 {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
1072 })
1073 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
1074 // destinations, we just want those lowered into G_BUILD_VECTOR or
1075 // G_EXTRACT_ELEMENT.
1076 .lowerIf([=](const LegalityQuery &Query) {
1077 return !Query.Types[0].isVector() || !Query.Types[1].isVector();
1078 })
1079 .moreElementsIf(
1080 [](const LegalityQuery &Query) {
1081 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1082 Query.Types[0].getNumElements() >
1083 Query.Types[1].getNumElements();
1084 },
1085 changeTo(1, 0))
1087 .moreElementsIf(
1088 [](const LegalityQuery &Query) {
1089 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1090 Query.Types[0].getNumElements() <
1091 Query.Types[1].getNumElements();
1092 },
1093 changeTo(0, 1))
1094 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1095 .clampNumElements(0, v8s8, v16s8)
1096 .clampNumElements(0, v4s16, v8s16)
1097 .clampNumElements(0, v4s32, v4s32)
1098 .clampNumElements(0, v2s64, v2s64)
1099 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1100 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1101 // Bitcast pointers vector to i64.
1102 const LLT DstTy = Query.Types[0];
1103 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1104 });
1105
1106 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1107 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1108 .bitcastIf(
1109 [=](const LegalityQuery &Query) {
1110 return Query.Types[0].getSizeInBits() <= 128 &&
1111 Query.Types[1].getSizeInBits() <= 64;
1112 },
1113 [=](const LegalityQuery &Query) {
1114 const LLT DstTy = Query.Types[0];
1115 const LLT SrcTy = Query.Types[1];
1116 return std::pair(
1117 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1120 SrcTy.getNumElements())));
1121 });
1122
1123 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1124
1125 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1126
1127 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1128
1129 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1130
1131 if (ST.hasMOPS()) {
1132 // G_BZERO is not supported. Currently it is only emitted by
1133 // PreLegalizerCombiner for G_MEMSET with zero constant.
1135
1137 .legalForCartesianProduct({p0}, {s64}, {s64})
1138 .customForCartesianProduct({p0}, {s8}, {s64})
1139 .immIdx(0); // Inform verifier imm idx 0 is handled.
1140
1141 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1142 .legalForCartesianProduct({p0}, {p0}, {s64})
1143 .immIdx(0); // Inform verifier imm idx 0 is handled.
1144
1145 // G_MEMCPY_INLINE does not have a tailcall immediate
1146 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1147 .legalForCartesianProduct({p0}, {p0}, {s64});
1148
1149 } else {
1150 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1151 .libcall();
1152 }
1153
1154 // FIXME: Legal vector types are only legal with NEON.
1156 .legalFor(HasCSSC, {s32, s64})
1157 .legalFor(PackedVectorAllTypeList)
1158 .customIf([=](const LegalityQuery &Q) {
1159 // TODO: Fix suboptimal codegen for 128+ bit types.
1160 LLT SrcTy = Q.Types[0];
1161 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1162 })
1163 .widenScalarIf(
1164 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1165 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1166 .widenScalarIf(
1167 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1168 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1169 .clampNumElements(0, v8s8, v16s8)
1170 .clampNumElements(0, v4s16, v8s16)
1171 .clampNumElements(0, v2s32, v4s32)
1172 .clampNumElements(0, v2s64, v2s64)
1174 .lower();
1175
1176 // For fadd reductions we have pairwise operations available. We treat the
1177 // usual legal types as legal and handle the lowering to pairwise instructions
1178 // later.
1179 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1180 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1181 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1182 .minScalarOrElt(0, MinFPScalar)
1183 .clampMaxNumElements(1, s64, 2)
1184 .clampMaxNumElements(1, s32, 4)
1185 .clampMaxNumElements(1, s16, 8)
1186 .lower();
1187
1188 // For fmul reductions we need to split up into individual operations. We
1189 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1190 // smaller types, followed by scalarizing what remains.
1191 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1192 .minScalarOrElt(0, MinFPScalar)
1193 .clampMaxNumElements(1, s64, 2)
1194 .clampMaxNumElements(1, s32, 4)
1195 .clampMaxNumElements(1, s16, 8)
1196 .clampMaxNumElements(1, s32, 2)
1197 .clampMaxNumElements(1, s16, 4)
1198 .scalarize(1)
1199 .lower();
1200
1201 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1202 .scalarize(2)
1203 .lower();
1204
1205 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1206 .legalFor({{s8, v16s8},
1207 {s8, v8s8},
1208 {s16, v8s16},
1209 {s16, v4s16},
1210 {s32, v4s32},
1211 {s32, v2s32},
1212 {s64, v2s64}})
1213 .clampMaxNumElements(1, s64, 2)
1214 .clampMaxNumElements(1, s32, 4)
1215 .clampMaxNumElements(1, s16, 8)
1216 .clampMaxNumElements(1, s8, 16)
1217 .lower();
1218
1219 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1220 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1221 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1222 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1223 .minScalarOrElt(0, MinFPScalar)
1224 .clampMaxNumElements(1, s64, 2)
1225 .clampMaxNumElements(1, s32, 4)
1226 .clampMaxNumElements(1, s16, 8)
1227 .lower();
1228
1229 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1230 .clampMaxNumElements(1, s32, 2)
1231 .clampMaxNumElements(1, s16, 4)
1232 .clampMaxNumElements(1, s8, 8)
1233 .scalarize(1)
1234 .lower();
1235
1237 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1238 .legalFor({{s8, v8s8},
1239 {s8, v16s8},
1240 {s16, v4s16},
1241 {s16, v8s16},
1242 {s32, v2s32},
1243 {s32, v4s32}})
1244 .moreElementsIf(
1245 [=](const LegalityQuery &Query) {
1246 return Query.Types[1].isVector() &&
1247 Query.Types[1].getElementType() != s8 &&
1248 Query.Types[1].getNumElements() & 1;
1249 },
1251 .clampMaxNumElements(1, s64, 2)
1252 .clampMaxNumElements(1, s32, 4)
1253 .clampMaxNumElements(1, s16, 8)
1254 .clampMaxNumElements(1, s8, 16)
1255 .scalarize(1)
1256 .lower();
1257
1259 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1260 // Try to break down into smaller vectors as long as they're at least 64
1261 // bits. This lets us use vector operations for some parts of the
1262 // reduction.
1263 .fewerElementsIf(
1264 [=](const LegalityQuery &Q) {
1265 LLT SrcTy = Q.Types[1];
1266 if (SrcTy.isScalar())
1267 return false;
1268 if (!isPowerOf2_32(SrcTy.getNumElements()))
1269 return false;
1270 // We can usually perform 64b vector operations.
1271 return SrcTy.getSizeInBits() > 64;
1272 },
1273 [=](const LegalityQuery &Q) {
1274 LLT SrcTy = Q.Types[1];
1275 return std::make_pair(1, SrcTy.divide(2));
1276 })
1277 .scalarize(1)
1278 .lower();
1279
1280 // TODO: Update this to correct handling when adding AArch64/SVE support.
1281 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1282
1283 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1284 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1285 .lower();
1286
1288 .legalFor({{s32, s64}, {s64, s64}})
1289 .customIf([=](const LegalityQuery &Q) {
1290 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1291 })
1292 .lower();
1294
1295 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1296 .customFor({{s32, s32}, {s64, s64}});
1297
1298 auto always = [=](const LegalityQuery &Q) { return true; };
1300 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
1301 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
1302 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
1303 .customFor({{s128, s128},
1304 {v2s64, v2s64},
1305 {v2s32, v2s32},
1306 {v4s32, v4s32},
1307 {v4s16, v4s16},
1308 {v8s16, v8s16}})
1309 .clampScalar(0, s32, s128)
1311 .minScalarEltSameAsIf(always, 1, 0)
1312 .maxScalarEltSameAsIf(always, 1, 0);
1313
1314 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1315 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1316 .legalFor(HasSVE, {nxv2s64, nxv4s32, nxv8s16, nxv16s8})
1317 .clampNumElements(0, v8s8, v16s8)
1318 .clampNumElements(0, v4s16, v8s16)
1319 .clampNumElements(0, v2s32, v4s32)
1320 .clampMaxNumElements(0, s64, 2)
1323 .lower();
1324
1325 // TODO: Libcall support for s128.
1326 // TODO: s16 should be legal with full FP16 support.
1327 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1328 .legalFor({{s64, s32}, {s64, s64}});
1329
1330 // TODO: Custom legalization for mismatched types.
1331 getActionDefinitionsBuilder(G_FCOPYSIGN)
1333 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1334 [=](const LegalityQuery &Query) {
1335 const LLT Ty = Query.Types[0];
1336 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1337 })
1338 .lower();
1339
1341
1342 // Access to floating-point environment.
1343 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1344 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1345 .libcall();
1346
1347 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1348
1349 getActionDefinitionsBuilder(G_PREFETCH).custom();
1350
1351 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1352
1353 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1354 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1356 .immIdx(0); // Inform verifier imm idx 0 is handled.
1357
1358 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1359 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1360 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1361
1363 verify(*ST.getInstrInfo());
1364}
1365
1368 LostDebugLocObserver &LocObserver) const {
1369 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1370 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1371 GISelChangeObserver &Observer = Helper.Observer;
1372 switch (MI.getOpcode()) {
1373 default:
1374 // No idea what to do.
1375 return false;
1376 case TargetOpcode::G_VAARG:
1377 return legalizeVaArg(MI, MRI, MIRBuilder);
1378 case TargetOpcode::G_LOAD:
1379 case TargetOpcode::G_STORE:
1380 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1381 case TargetOpcode::G_SHL:
1382 case TargetOpcode::G_ASHR:
1383 case TargetOpcode::G_LSHR:
1384 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1385 case TargetOpcode::G_GLOBAL_VALUE:
1386 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1387 case TargetOpcode::G_SBFX:
1388 case TargetOpcode::G_UBFX:
1389 return legalizeBitfieldExtract(MI, MRI, Helper);
1390 case TargetOpcode::G_FSHL:
1391 case TargetOpcode::G_FSHR:
1392 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1393 case TargetOpcode::G_ROTR:
1394 return legalizeRotate(MI, MRI, Helper);
1395 case TargetOpcode::G_CTPOP:
1396 return legalizeCTPOP(MI, MRI, Helper);
1397 case TargetOpcode::G_ATOMIC_CMPXCHG:
1398 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1399 case TargetOpcode::G_CTTZ:
1400 return legalizeCTTZ(MI, Helper);
1401 case TargetOpcode::G_BZERO:
1402 case TargetOpcode::G_MEMCPY:
1403 case TargetOpcode::G_MEMMOVE:
1404 case TargetOpcode::G_MEMSET:
1405 return legalizeMemOps(MI, Helper);
1406 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1407 return legalizeExtractVectorElt(MI, MRI, Helper);
1408 case TargetOpcode::G_DYN_STACKALLOC:
1409 return legalizeDynStackAlloc(MI, Helper);
1410 case TargetOpcode::G_PREFETCH:
1411 return legalizePrefetch(MI, Helper);
1412 case TargetOpcode::G_ABS:
1413 return Helper.lowerAbsToCNeg(MI);
1414 case TargetOpcode::G_ICMP:
1415 return legalizeICMP(MI, MRI, MIRBuilder);
1416 case TargetOpcode::G_BITCAST:
1417 return legalizeBitcast(MI, Helper);
1418 }
1419
1420 llvm_unreachable("expected switch to return");
1421}
1422
1423bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1424 LegalizerHelper &Helper) const {
1425 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1426 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1427 // We're trying to handle casts from i1 vectors to scalars but reloading from
1428 // stack.
1429 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1430 SrcTy.getElementType() != LLT::scalar(1))
1431 return false;
1432
1433 Helper.createStackStoreLoad(DstReg, SrcReg);
1434 MI.eraseFromParent();
1435 return true;
1436}
1437
1438bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1440 MachineIRBuilder &MIRBuilder,
1441 GISelChangeObserver &Observer,
1442 LegalizerHelper &Helper) const {
1443 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1444 MI.getOpcode() == TargetOpcode::G_FSHR);
1445
1446 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1447 // lowering
1448 Register ShiftNo = MI.getOperand(3).getReg();
1449 LLT ShiftTy = MRI.getType(ShiftNo);
1450 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1451
1452 // Adjust shift amount according to Opcode (FSHL/FSHR)
1453 // Convert FSHL to FSHR
1454 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1455 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1456
1457 // Lower non-constant shifts and leave zero shifts to the optimizer.
1458 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1459 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1461
1462 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1463
1464 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1465
1466 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1467 // in the range of 0 <-> BitWidth, it is legal
1468 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1469 VRegAndVal->Value.ult(BitWidth))
1470 return true;
1471
1472 // Cast the ShiftNumber to a 64-bit type
1473 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1474
1475 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1476 Observer.changingInstr(MI);
1477 MI.getOperand(3).setReg(Cast64.getReg(0));
1478 Observer.changedInstr(MI);
1479 }
1480 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1481 // instruction
1482 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1483 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1484 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1485 Cast64.getReg(0)});
1486 MI.eraseFromParent();
1487 }
1488 return true;
1489}
1490
1491bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1493 MachineIRBuilder &MIRBuilder) const {
1494 Register DstReg = MI.getOperand(0).getReg();
1495 Register SrcReg1 = MI.getOperand(2).getReg();
1496 Register SrcReg2 = MI.getOperand(3).getReg();
1497 LLT DstTy = MRI.getType(DstReg);
1498 LLT SrcTy = MRI.getType(SrcReg1);
1499
1500 // Check the vector types are legal
1501 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1502 DstTy.getNumElements() != SrcTy.getNumElements() ||
1503 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1504 return false;
1505
1506 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1507 // following passes
1508 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1509 if (Pred != CmpInst::ICMP_NE)
1510 return true;
1511 Register CmpReg =
1512 MIRBuilder
1513 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1514 .getReg(0);
1515 MIRBuilder.buildNot(DstReg, CmpReg);
1516
1517 MI.eraseFromParent();
1518 return true;
1519}
1520
1521bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1523 LegalizerHelper &Helper) const {
1524 // To allow for imported patterns to match, we ensure that the rotate amount
1525 // is 64b with an extension.
1526 Register AmtReg = MI.getOperand(2).getReg();
1527 LLT AmtTy = MRI.getType(AmtReg);
1528 (void)AmtTy;
1529 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1530 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1531 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1532 Helper.Observer.changingInstr(MI);
1533 MI.getOperand(2).setReg(NewAmt.getReg(0));
1534 Helper.Observer.changedInstr(MI);
1535 return true;
1536}
1537
1538bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1540 GISelChangeObserver &Observer) const {
1541 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1542 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1543 // G_ADD_LOW instructions.
1544 // By splitting this here, we can optimize accesses in the small code model by
1545 // folding in the G_ADD_LOW into the load/store offset.
1546 auto &GlobalOp = MI.getOperand(1);
1547 // Don't modify an intrinsic call.
1548 if (GlobalOp.isSymbol())
1549 return true;
1550 const auto* GV = GlobalOp.getGlobal();
1551 if (GV->isThreadLocal())
1552 return true; // Don't want to modify TLS vars.
1553
1554 auto &TM = ST->getTargetLowering()->getTargetMachine();
1555 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1556
1557 if (OpFlags & AArch64II::MO_GOT)
1558 return true;
1559
1560 auto Offset = GlobalOp.getOffset();
1561 Register DstReg = MI.getOperand(0).getReg();
1562 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1563 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1564 // Set the regclass on the dest reg too.
1565 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1566
1567 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1568 // by creating a MOVK that sets bits 48-63 of the register to (global address
1569 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1570 // prevent an incorrect tag being generated during relocation when the
1571 // global appears before the code section. Without the offset, a global at
1572 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1573 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1574 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1575 // instead of `0xf`.
1576 // This assumes that we're in the small code model so we can assume a binary
1577 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1578 // binary must also be loaded into address range [0, 2^48). Both of these
1579 // properties need to be ensured at runtime when using tagged addresses.
1580 if (OpFlags & AArch64II::MO_TAGGED) {
1581 assert(!Offset &&
1582 "Should not have folded in an offset for a tagged global!");
1583 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1584 .addGlobalAddress(GV, 0x100000000,
1586 .addImm(48);
1587 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1588 }
1589
1590 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1591 .addGlobalAddress(GV, Offset,
1593 MI.eraseFromParent();
1594 return true;
1595}
1596
1598 MachineInstr &MI) const {
1599 auto LowerBinOp = [&MI](unsigned Opcode) {
1600 MachineIRBuilder MIB(MI);
1601 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1602 {MI.getOperand(2), MI.getOperand(3)});
1603 MI.eraseFromParent();
1604 return true;
1605 };
1606
1607 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1608 switch (IntrinsicID) {
1609 case Intrinsic::vacopy: {
1610 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1611 unsigned VaListSize =
1612 (ST->isTargetDarwin() || ST->isTargetWindows())
1613 ? PtrSize
1614 : ST->isTargetILP32() ? 20 : 32;
1615
1616 MachineFunction &MF = *MI.getMF();
1618 LLT::scalar(VaListSize * 8));
1619 MachineIRBuilder MIB(MI);
1620 MIB.buildLoad(Val, MI.getOperand(2),
1623 VaListSize, Align(PtrSize)));
1624 MIB.buildStore(Val, MI.getOperand(1),
1627 VaListSize, Align(PtrSize)));
1628 MI.eraseFromParent();
1629 return true;
1630 }
1631 case Intrinsic::get_dynamic_area_offset: {
1632 MachineIRBuilder &MIB = Helper.MIRBuilder;
1633 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1634 MI.eraseFromParent();
1635 return true;
1636 }
1637 case Intrinsic::aarch64_mops_memset_tag: {
1638 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1639 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1640 // the instruction).
1641 MachineIRBuilder MIB(MI);
1642 auto &Value = MI.getOperand(3);
1643 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1644 Value.setReg(ExtValueReg);
1645 return true;
1646 }
1647 case Intrinsic::aarch64_prefetch: {
1648 MachineIRBuilder MIB(MI);
1649 auto &AddrVal = MI.getOperand(1);
1650
1651 int64_t IsWrite = MI.getOperand(2).getImm();
1652 int64_t Target = MI.getOperand(3).getImm();
1653 int64_t IsStream = MI.getOperand(4).getImm();
1654 int64_t IsData = MI.getOperand(5).getImm();
1655
1656 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1657 (!IsData << 3) | // IsDataCache bit
1658 (Target << 1) | // Cache level bits
1659 (unsigned)IsStream; // Stream bit
1660
1661 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1662 MI.eraseFromParent();
1663 return true;
1664 }
1665 case Intrinsic::aarch64_neon_uaddv:
1666 case Intrinsic::aarch64_neon_saddv:
1667 case Intrinsic::aarch64_neon_umaxv:
1668 case Intrinsic::aarch64_neon_smaxv:
1669 case Intrinsic::aarch64_neon_uminv:
1670 case Intrinsic::aarch64_neon_sminv: {
1671 MachineIRBuilder MIB(MI);
1672 MachineRegisterInfo &MRI = *MIB.getMRI();
1673 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1674 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1675 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1676
1677 auto OldDst = MI.getOperand(0).getReg();
1678 auto OldDstTy = MRI.getType(OldDst);
1679 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1680 if (OldDstTy == NewDstTy)
1681 return true;
1682
1683 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1684
1685 Helper.Observer.changingInstr(MI);
1686 MI.getOperand(0).setReg(NewDst);
1687 Helper.Observer.changedInstr(MI);
1688
1689 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1690 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1691 OldDst, NewDst);
1692
1693 return true;
1694 }
1695 case Intrinsic::aarch64_neon_uaddlp:
1696 case Intrinsic::aarch64_neon_saddlp: {
1697 MachineIRBuilder MIB(MI);
1698
1699 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1700 ? AArch64::G_UADDLP
1701 : AArch64::G_SADDLP;
1702 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1703 MI.eraseFromParent();
1704
1705 return true;
1706 }
1707 case Intrinsic::aarch64_neon_uaddlv:
1708 case Intrinsic::aarch64_neon_saddlv: {
1709 MachineIRBuilder MIB(MI);
1710 MachineRegisterInfo &MRI = *MIB.getMRI();
1711
1712 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1713 ? AArch64::G_UADDLV
1714 : AArch64::G_SADDLV;
1715 Register DstReg = MI.getOperand(0).getReg();
1716 Register SrcReg = MI.getOperand(2).getReg();
1717 LLT DstTy = MRI.getType(DstReg);
1718
1719 LLT MidTy, ExtTy;
1720 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1721 MidTy = LLT::fixed_vector(4, 32);
1722 ExtTy = LLT::scalar(32);
1723 } else {
1724 MidTy = LLT::fixed_vector(2, 64);
1725 ExtTy = LLT::scalar(64);
1726 }
1727
1728 Register MidReg =
1729 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1730 Register ZeroReg =
1731 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1732 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1733 {MidReg, ZeroReg})
1734 .getReg(0);
1735
1736 if (DstTy.getScalarSizeInBits() < 32)
1737 MIB.buildTrunc(DstReg, ExtReg);
1738 else
1739 MIB.buildCopy(DstReg, ExtReg);
1740
1741 MI.eraseFromParent();
1742
1743 return true;
1744 }
1745 case Intrinsic::aarch64_neon_smax:
1746 return LowerBinOp(TargetOpcode::G_SMAX);
1747 case Intrinsic::aarch64_neon_smin:
1748 return LowerBinOp(TargetOpcode::G_SMIN);
1749 case Intrinsic::aarch64_neon_umax:
1750 return LowerBinOp(TargetOpcode::G_UMAX);
1751 case Intrinsic::aarch64_neon_umin:
1752 return LowerBinOp(TargetOpcode::G_UMIN);
1753 case Intrinsic::aarch64_neon_fmax:
1754 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1755 case Intrinsic::aarch64_neon_fmin:
1756 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1757 case Intrinsic::aarch64_neon_fmaxnm:
1758 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1759 case Intrinsic::aarch64_neon_fminnm:
1760 return LowerBinOp(TargetOpcode::G_FMINNUM);
1761 case Intrinsic::aarch64_neon_smull:
1762 return LowerBinOp(AArch64::G_SMULL);
1763 case Intrinsic::aarch64_neon_umull:
1764 return LowerBinOp(AArch64::G_UMULL);
1765 case Intrinsic::aarch64_neon_abs: {
1766 // Lower the intrinsic to G_ABS.
1767 MachineIRBuilder MIB(MI);
1768 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1769 MI.eraseFromParent();
1770 return true;
1771 }
1772
1773 case Intrinsic::vector_reverse:
1774 // TODO: Add support for vector_reverse
1775 return false;
1776 }
1777
1778 return true;
1779}
1780
1781bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1783 GISelChangeObserver &Observer) const {
1784 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1785 MI.getOpcode() == TargetOpcode::G_LSHR ||
1786 MI.getOpcode() == TargetOpcode::G_SHL);
1787 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1788 // imported patterns can select it later. Either way, it will be legal.
1789 Register AmtReg = MI.getOperand(2).getReg();
1790 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1791 if (!VRegAndVal)
1792 return true;
1793 // Check the shift amount is in range for an immediate form.
1794 int64_t Amount = VRegAndVal->Value.getSExtValue();
1795 if (Amount > 31)
1796 return true; // This will have to remain a register variant.
1797 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1798 Observer.changingInstr(MI);
1799 MI.getOperand(2).setReg(ExtCst.getReg(0));
1800 Observer.changedInstr(MI);
1801 return true;
1802}
1803
1806 Base = Root;
1807 Offset = 0;
1808
1809 Register NewBase;
1810 int64_t NewOffset;
1811 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1812 isShiftedInt<7, 3>(NewOffset)) {
1813 Base = NewBase;
1814 Offset = NewOffset;
1815 }
1816}
1817
1818// FIXME: This should be removed and replaced with the generic bitcast legalize
1819// action.
1820bool AArch64LegalizerInfo::legalizeLoadStore(
1822 GISelChangeObserver &Observer) const {
1823 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1824 MI.getOpcode() == TargetOpcode::G_LOAD);
1825 // Here we just try to handle vector loads/stores where our value type might
1826 // have pointer elements, which the SelectionDAG importer can't handle. To
1827 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1828 // the value to use s64 types.
1829
1830 // Custom legalization requires the instruction, if not deleted, must be fully
1831 // legalized. In order to allow further legalization of the inst, we create
1832 // a new instruction and erase the existing one.
1833
1834 Register ValReg = MI.getOperand(0).getReg();
1835 const LLT ValTy = MRI.getType(ValReg);
1836
1837 if (ValTy == LLT::scalar(128)) {
1838
1839 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1840 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1841 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1842 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1843 bool IsRcpC3 =
1844 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1845
1846 LLT s64 = LLT::scalar(64);
1847
1848 unsigned Opcode;
1849 if (IsRcpC3) {
1850 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1851 } else {
1852 // For LSE2, loads/stores should have been converted to monotonic and had
1853 // a fence inserted after them.
1854 assert(Ordering == AtomicOrdering::Monotonic ||
1855 Ordering == AtomicOrdering::Unordered);
1856 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1857
1858 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1859 }
1860
1862 if (IsLoad) {
1863 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1864 MIRBuilder.buildMergeLikeInstr(
1865 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1866 } else {
1867 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1868 NewI = MIRBuilder.buildInstr(
1869 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1870 }
1871
1872 if (IsRcpC3) {
1873 NewI.addUse(MI.getOperand(1).getReg());
1874 } else {
1875 Register Base;
1876 int Offset;
1877 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1878 NewI.addUse(Base);
1879 NewI.addImm(Offset / 8);
1880 }
1881
1882 NewI.cloneMemRefs(MI);
1884 *MRI.getTargetRegisterInfo(),
1885 *ST->getRegBankInfo());
1886 MI.eraseFromParent();
1887 return true;
1888 }
1889
1890 if (!ValTy.isPointerVector() ||
1891 ValTy.getElementType().getAddressSpace() != 0) {
1892 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1893 return false;
1894 }
1895
1896 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1897 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1898 auto &MMO = **MI.memoperands_begin();
1899 MMO.setType(NewTy);
1900
1901 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1902 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1903 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1904 } else {
1905 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1906 MIRBuilder.buildBitcast(ValReg, NewLoad);
1907 }
1908 MI.eraseFromParent();
1909 return true;
1910}
1911
1912bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1914 MachineIRBuilder &MIRBuilder) const {
1915 MachineFunction &MF = MIRBuilder.getMF();
1916 Align Alignment(MI.getOperand(2).getImm());
1917 Register Dst = MI.getOperand(0).getReg();
1918 Register ListPtr = MI.getOperand(1).getReg();
1919
1920 LLT PtrTy = MRI.getType(ListPtr);
1921 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1922
1923 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1924 const Align PtrAlign = Align(PtrSize);
1925 auto List = MIRBuilder.buildLoad(
1926 PtrTy, ListPtr,
1928 PtrTy, PtrAlign));
1929
1930 MachineInstrBuilder DstPtr;
1931 if (Alignment > PtrAlign) {
1932 // Realign the list to the actual required alignment.
1933 auto AlignMinus1 =
1934 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1935 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1936 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1937 } else
1938 DstPtr = List;
1939
1940 LLT ValTy = MRI.getType(Dst);
1941 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1942 MIRBuilder.buildLoad(
1943 Dst, DstPtr,
1945 ValTy, std::max(Alignment, PtrAlign)));
1946
1947 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1948
1949 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1950
1951 MIRBuilder.buildStore(NewList, ListPtr,
1954 PtrTy, PtrAlign));
1955
1956 MI.eraseFromParent();
1957 return true;
1958}
1959
1960bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1962 // Only legal if we can select immediate forms.
1963 // TODO: Lower this otherwise.
1964 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1965 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1966}
1967
1968bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1970 LegalizerHelper &Helper) const {
1971 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1972 // it can be more efficiently lowered to the following sequence that uses
1973 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1974 // registers are cheap.
1975 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1976 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1977 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1978 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1979 //
1980 // For 128 bit vector popcounts, we lower to the following sequence:
1981 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1982 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1983 // uaddlp.4s v0, v0 // v4s32, v2s64
1984 // uaddlp.2d v0, v0 // v2s64
1985 //
1986 // For 64 bit vector popcounts, we lower to the following sequence:
1987 // cnt.8b v0, v0 // v4s16, v2s32
1988 // uaddlp.4h v0, v0 // v4s16, v2s32
1989 // uaddlp.2s v0, v0 // v2s32
1990
1991 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1992 Register Dst = MI.getOperand(0).getReg();
1993 Register Val = MI.getOperand(1).getReg();
1994 LLT Ty = MRI.getType(Val);
1995 unsigned Size = Ty.getSizeInBits();
1996
1997 assert(Ty == MRI.getType(Dst) &&
1998 "Expected src and dst to have the same type!");
1999
2000 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2001 LLT s64 = LLT::scalar(64);
2002
2003 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2004 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2005 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2006 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2007
2008 MIRBuilder.buildZExt(Dst, Add);
2009 MI.eraseFromParent();
2010 return true;
2011 }
2012
2013 if (!ST->hasNEON() ||
2014 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2015 // Use generic lowering when custom lowering is not possible.
2016 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2017 Helper.lowerBitCount(MI) ==
2019 }
2020
2021 // Pre-conditioning: widen Val up to the nearest vector type.
2022 // s32,s64,v4s16,v2s32 -> v8i8
2023 // v8s16,v4s32,v2s64 -> v16i8
2024 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2025 if (Ty.isScalar()) {
2026 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2027 if (Size == 32) {
2028 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2029 }
2030 }
2031 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2032
2033 // Count bits in each byte-sized lane.
2034 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2035
2036 // Sum across lanes.
2037
2038 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2039 Ty.getScalarSizeInBits() != 16) {
2040 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2041 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2042 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2044
2045 if (Ty == LLT::fixed_vector(2, 64)) {
2046 auto UDOT =
2047 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2048 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2049 } else if (Ty == LLT::fixed_vector(4, 32)) {
2050 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2051 } else if (Ty == LLT::fixed_vector(2, 32)) {
2052 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2053 } else {
2054 llvm_unreachable("unexpected vector shape");
2055 }
2056
2057 Sum->getOperand(0).setReg(Dst);
2058 MI.eraseFromParent();
2059 return true;
2060 }
2061
2062 Register HSum = CTPOP.getReg(0);
2063 unsigned Opc;
2064 SmallVector<LLT> HAddTys;
2065 if (Ty.isScalar()) {
2066 Opc = Intrinsic::aarch64_neon_uaddlv;
2067 HAddTys.push_back(LLT::scalar(32));
2068 } else if (Ty == LLT::fixed_vector(8, 16)) {
2069 Opc = Intrinsic::aarch64_neon_uaddlp;
2070 HAddTys.push_back(LLT::fixed_vector(8, 16));
2071 } else if (Ty == LLT::fixed_vector(4, 32)) {
2072 Opc = Intrinsic::aarch64_neon_uaddlp;
2073 HAddTys.push_back(LLT::fixed_vector(8, 16));
2074 HAddTys.push_back(LLT::fixed_vector(4, 32));
2075 } else if (Ty == LLT::fixed_vector(2, 64)) {
2076 Opc = Intrinsic::aarch64_neon_uaddlp;
2077 HAddTys.push_back(LLT::fixed_vector(8, 16));
2078 HAddTys.push_back(LLT::fixed_vector(4, 32));
2079 HAddTys.push_back(LLT::fixed_vector(2, 64));
2080 } else if (Ty == LLT::fixed_vector(4, 16)) {
2081 Opc = Intrinsic::aarch64_neon_uaddlp;
2082 HAddTys.push_back(LLT::fixed_vector(4, 16));
2083 } else if (Ty == LLT::fixed_vector(2, 32)) {
2084 Opc = Intrinsic::aarch64_neon_uaddlp;
2085 HAddTys.push_back(LLT::fixed_vector(4, 16));
2086 HAddTys.push_back(LLT::fixed_vector(2, 32));
2087 } else
2088 llvm_unreachable("unexpected vector shape");
2090 for (LLT HTy : HAddTys) {
2091 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2092 HSum = UADD.getReg(0);
2093 }
2094
2095 // Post-conditioning.
2096 if (Ty.isScalar() && (Size == 64 || Size == 128))
2097 MIRBuilder.buildZExt(Dst, UADD);
2098 else
2099 UADD->getOperand(0).setReg(Dst);
2100 MI.eraseFromParent();
2101 return true;
2102}
2103
2104bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2106 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2107 LLT s64 = LLT::scalar(64);
2108 auto Addr = MI.getOperand(1).getReg();
2109 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2110 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2111 auto DstLo = MRI.createGenericVirtualRegister(s64);
2112 auto DstHi = MRI.createGenericVirtualRegister(s64);
2113
2115 if (ST->hasLSE()) {
2116 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2117 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2118 // the rest of the MIR so we must reassemble the extracted registers into a
2119 // 128-bit known-regclass one with code like this:
2120 //
2121 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2122 // %out = CASP %in1, ...
2123 // %OldLo = G_EXTRACT %out, 0
2124 // %OldHi = G_EXTRACT %out, 64
2125 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2126 unsigned Opcode;
2127 switch (Ordering) {
2129 Opcode = AArch64::CASPAX;
2130 break;
2132 Opcode = AArch64::CASPLX;
2133 break;
2136 Opcode = AArch64::CASPALX;
2137 break;
2138 default:
2139 Opcode = AArch64::CASPX;
2140 break;
2141 }
2142
2143 LLT s128 = LLT::scalar(128);
2144 auto CASDst = MRI.createGenericVirtualRegister(s128);
2145 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2146 auto CASNew = MRI.createGenericVirtualRegister(s128);
2147 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2148 .addUse(DesiredI->getOperand(0).getReg())
2149 .addImm(AArch64::sube64)
2150 .addUse(DesiredI->getOperand(1).getReg())
2151 .addImm(AArch64::subo64);
2152 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2153 .addUse(NewI->getOperand(0).getReg())
2154 .addImm(AArch64::sube64)
2155 .addUse(NewI->getOperand(1).getReg())
2156 .addImm(AArch64::subo64);
2157
2158 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2159
2160 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2161 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2162 } else {
2163 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2164 // can take arbitrary registers so it just has the normal GPR64 operands the
2165 // rest of AArch64 is expecting.
2166 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2167 unsigned Opcode;
2168 switch (Ordering) {
2170 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2171 break;
2173 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2174 break;
2177 Opcode = AArch64::CMP_SWAP_128;
2178 break;
2179 default:
2180 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2181 break;
2182 }
2183
2184 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2185 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2186 {Addr, DesiredI->getOperand(0),
2187 DesiredI->getOperand(1), NewI->getOperand(0),
2188 NewI->getOperand(1)});
2189 }
2190
2191 CAS.cloneMemRefs(MI);
2193 *MRI.getTargetRegisterInfo(),
2194 *ST->getRegBankInfo());
2195
2196 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2197 MI.eraseFromParent();
2198 return true;
2199}
2200
2201bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2202 LegalizerHelper &Helper) const {
2203 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2204 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2205 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2206 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2207 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2208 MI.eraseFromParent();
2209 return true;
2210}
2211
2212bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2213 LegalizerHelper &Helper) const {
2214 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2215
2216 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2217 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2218 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2219 // the instruction).
2220 auto &Value = MI.getOperand(1);
2221 Register ExtValueReg =
2222 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2223 Value.setReg(ExtValueReg);
2224 return true;
2225 }
2226
2227 return false;
2228}
2229
2230bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2232 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2233 auto VRegAndVal =
2235 if (VRegAndVal)
2236 return true;
2237 LLT VecTy = MRI.getType(Element->getVectorReg());
2238 if (VecTy.isScalableVector())
2239 return true;
2240 return Helper.lowerExtractInsertVectorElt(MI) !=
2242}
2243
2244bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2245 MachineInstr &MI, LegalizerHelper &Helper) const {
2246 MachineFunction &MF = *MI.getParent()->getParent();
2247 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2248 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2249
2250 // If stack probing is not enabled for this function, use the default
2251 // lowering.
2252 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2253 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2254 "inline-asm") {
2255 Helper.lowerDynStackAlloc(MI);
2256 return true;
2257 }
2258
2259 Register Dst = MI.getOperand(0).getReg();
2260 Register AllocSize = MI.getOperand(1).getReg();
2261 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2262
2263 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2264 "Unexpected type for dynamic alloca");
2265 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2266 "Unexpected type for dynamic alloca");
2267
2268 LLT PtrTy = MRI.getType(Dst);
2269 Register SPReg =
2271 Register SPTmp =
2272 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2273 auto NewMI =
2274 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2275 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2276 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2277 MIRBuilder.buildCopy(Dst, SPTmp);
2278
2279 MI.eraseFromParent();
2280 return true;
2281}
2282
2283bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2284 LegalizerHelper &Helper) const {
2285 MachineIRBuilder &MIB = Helper.MIRBuilder;
2286 auto &AddrVal = MI.getOperand(0);
2287
2288 int64_t IsWrite = MI.getOperand(1).getImm();
2289 int64_t Locality = MI.getOperand(2).getImm();
2290 int64_t IsData = MI.getOperand(3).getImm();
2291
2292 bool IsStream = Locality == 0;
2293 if (Locality != 0) {
2294 assert(Locality <= 3 && "Prefetch locality out-of-range");
2295 // The locality degree is the opposite of the cache speed.
2296 // Put the number the other way around.
2297 // The encoding starts at 0 for level 1
2298 Locality = 3 - Locality;
2299 }
2300
2301 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2302
2303 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2304 MI.eraseFromParent();
2305 return true;
2306}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr Register SPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Represents an extract vector element.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:218
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:234
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...