LLVM 20.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v2s8 = LLT::fixed_vector(2, 8);
54 const LLT v8s16 = LLT::fixed_vector(8, 16);
55 const LLT v4s16 = LLT::fixed_vector(4, 16);
56 const LLT v2s16 = LLT::fixed_vector(2, 16);
57 const LLT v2s32 = LLT::fixed_vector(2, 32);
58 const LLT v4s32 = LLT::fixed_vector(4, 32);
59 const LLT v2s64 = LLT::fixed_vector(2, 64);
60 const LLT v2p0 = LLT::fixed_vector(2, p0);
61
62 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
63 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
64 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
65 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
66
67 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
68 v16s8, v8s16, v4s32,
69 v2s64, v2p0,
70 /* End 128bit types */
71 /* Begin 64bit types */
72 v8s8, v4s16, v2s32};
73 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
74 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
75 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
76
77 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
78
79 // FIXME: support subtargets which have neon/fp-armv8 disabled.
80 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 return;
83 }
84
85 // Some instructions only support s16 if the subtarget has full 16-bit FP
86 // support.
87 const bool HasFP16 = ST.hasFullFP16();
88 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
89
90 const bool HasCSSC = ST.hasCSSC();
91 const bool HasRCPC3 = ST.hasRCPC3();
92 const bool HasSVE = ST.hasSVE();
93
95 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
96 .legalFor({p0, s8, s16, s32, s64})
97 .legalFor({v16s8, v8s16, v4s32, v2s64, v2p0, v8s8, v4s16, v2s32, v4s8,
98 v2s16, v2s8})
99 .widenScalarToNextPow2(0)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampMaxNumElements(0, s64, 2)
107 .clampMaxNumElements(0, p0, 2)
109
111 .legalFor({p0, s16, s32, s64})
112 .legalFor(PackedVectorAllTypeList)
116 .clampScalar(0, s16, s64)
117 .clampNumElements(0, v8s8, v16s8)
118 .clampNumElements(0, v4s16, v8s16)
119 .clampNumElements(0, v2s32, v4s32)
120 .clampMaxNumElements(0, s64, 2)
121 .clampMaxNumElements(0, p0, 2);
122
124 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
126 .clampScalar(0, s32, s64)
127 .clampNumElements(0, v4s16, v8s16)
128 .clampNumElements(0, v2s32, v4s32)
129 .clampNumElements(0, v2s64, v2s64)
130 .moreElementsToNextPow2(0);
131
132 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
133 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
134 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
135 .widenScalarToNextPow2(0)
136 .clampScalar(0, s32, s64)
137 .clampMaxNumElements(0, s8, 16)
138 .clampMaxNumElements(0, s16, 8)
139 .clampNumElements(0, v2s32, v4s32)
140 .clampNumElements(0, v2s64, v2s64)
142 [=](const LegalityQuery &Query) {
143 return Query.Types[0].getNumElements() <= 2;
144 },
145 0, s32)
146 .minScalarOrEltIf(
147 [=](const LegalityQuery &Query) {
148 return Query.Types[0].getNumElements() <= 4;
149 },
150 0, s16)
151 .minScalarOrEltIf(
152 [=](const LegalityQuery &Query) {
153 return Query.Types[0].getNumElements() <= 16;
154 },
155 0, s8)
156 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
158
160 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
161 .widenScalarToNextPow2(0)
162 .clampScalar(0, s32, s64)
163 .clampMaxNumElements(0, s8, 16)
164 .clampMaxNumElements(0, s16, 8)
165 .clampNumElements(0, v2s32, v4s32)
166 .clampNumElements(0, v2s64, v2s64)
168 [=](const LegalityQuery &Query) {
169 return Query.Types[0].getNumElements() <= 2;
170 },
171 0, s32)
172 .minScalarOrEltIf(
173 [=](const LegalityQuery &Query) {
174 return Query.Types[0].getNumElements() <= 4;
175 },
176 0, s16)
177 .minScalarOrEltIf(
178 [=](const LegalityQuery &Query) {
179 return Query.Types[0].getNumElements() <= 16;
180 },
181 0, s8)
182 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
184
185 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
186 .customIf([=](const LegalityQuery &Query) {
187 const auto &SrcTy = Query.Types[0];
188 const auto &AmtTy = Query.Types[1];
189 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
190 AmtTy.getSizeInBits() == 32;
191 })
192 .legalFor({
193 {s32, s32},
194 {s32, s64},
195 {s64, s64},
196 {v8s8, v8s8},
197 {v16s8, v16s8},
198 {v4s16, v4s16},
199 {v8s16, v8s16},
200 {v2s32, v2s32},
201 {v4s32, v4s32},
202 {v2s64, v2s64},
203 })
204 .widenScalarToNextPow2(0)
205 .clampScalar(1, s32, s64)
206 .clampScalar(0, s32, s64)
207 .clampNumElements(0, v8s8, v16s8)
208 .clampNumElements(0, v4s16, v8s16)
209 .clampNumElements(0, v2s32, v4s32)
210 .clampNumElements(0, v2s64, v2s64)
212 .minScalarSameAs(1, 0)
214
216 .legalFor({{p0, s64}, {v2p0, v2s64}})
217 .clampScalarOrElt(1, s64, s64)
218 .clampNumElements(0, v2p0, v2p0);
219
220 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
221
222 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
223 .legalFor({s32, s64})
224 .libcallFor({s128})
225 .clampScalar(0, s32, s64)
227 .scalarize(0);
228
229 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
230 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
231 .libcallFor({s128})
233 .minScalarOrElt(0, s32)
234 .clampNumElements(0, v2s32, v4s32)
235 .clampNumElements(0, v2s64, v2s64)
236 .scalarize(0);
237
238 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
239 .widenScalarToNextPow2(0, /*Min = */ 32)
240 .clampScalar(0, s32, s64)
241 .lower();
242
243 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
244 .legalFor({s64, v8s16, v16s8, v4s32})
245 .lower();
246
247 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
248 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
249 .legalFor(HasCSSC, {s32, s64})
250 .minScalar(HasCSSC, 0, s32)
251 .clampNumElements(0, v8s8, v16s8)
252 .clampNumElements(0, v4s16, v8s16)
253 .clampNumElements(0, v2s32, v4s32)
254 // FIXME: This sholdn't be needed as v2s64 types are going to
255 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
256 .clampNumElements(0, v2s64, v2s64)
257 .lower();
258
260 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
261 .legalFor({{s32, s32}, {s64, s32}})
262 .clampScalar(0, s32, s64)
263 .clampScalar(1, s32, s64)
265
267 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
268 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
269 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
270 .legalFor({s32, s64, v2s32, v4s32, v2s64})
271 .legalFor(HasFP16, {s16, v4s16, v8s16})
272 .libcallFor({s128})
273 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
274 .minScalarOrElt(0, MinFPScalar)
275 .clampNumElements(0, v4s16, v8s16)
276 .clampNumElements(0, v2s32, v4s32)
277 .clampNumElements(0, v2s64, v2s64)
279
280 getActionDefinitionsBuilder({G_FABS, G_FNEG})
281 .legalFor({s32, s64, v2s32, v4s32, v2s64})
282 .legalFor(HasFP16, {s16, v4s16, v8s16})
283 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
285 .clampNumElements(0, v4s16, v8s16)
286 .clampNumElements(0, v2s32, v4s32)
287 .clampNumElements(0, v2s64, v2s64)
289 .lowerFor({s16, v4s16, v8s16});
290
292 .libcallFor({s32, s64, s128})
293 .minScalar(0, s32)
294 .scalarize(0);
295
296 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
297 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
298 .libcallFor({{s64, s128}})
299 .minScalarOrElt(1, MinFPScalar);
300
301 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
302 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
303 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
304 G_FSINH, G_FTANH})
305 // We need a call for these, so we always need to scalarize.
306 .scalarize(0)
307 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
308 .minScalar(0, s32)
309 .libcallFor({s32, s64, s128});
311 .scalarize(0)
312 .minScalar(0, s32)
313 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
314
316 .legalIf(all(typeInSet(0, {s32, s64, p0}),
317 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
319 .clampScalar(0, s32, s64)
321 .minScalar(1, s8)
322 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
323 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
324
326 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
327 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
329 .clampScalar(1, s32, s128)
331 .minScalar(0, s16)
332 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
333 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
334 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
335
336
337 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
338 auto &Actions = getActionDefinitionsBuilder(Op);
339
340 if (Op == G_SEXTLOAD)
342
343 // Atomics have zero extending behavior.
344 Actions
345 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
346 {s32, p0, s16, 8},
347 {s32, p0, s32, 8},
348 {s64, p0, s8, 2},
349 {s64, p0, s16, 2},
350 {s64, p0, s32, 4},
351 {s64, p0, s64, 8},
352 {p0, p0, s64, 8},
353 {v2s32, p0, s64, 8}})
354 .widenScalarToNextPow2(0)
355 .clampScalar(0, s32, s64)
356 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
357 // how to do that yet.
358 .unsupportedIfMemSizeNotPow2()
359 // Lower anything left over into G_*EXT and G_LOAD
360 .lower();
361 }
362
363 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
364 const LLT &ValTy = Query.Types[0];
365 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
366 };
367
369 .customIf([=](const LegalityQuery &Query) {
370 return HasRCPC3 && Query.Types[0] == s128 &&
371 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
372 })
373 .customIf([=](const LegalityQuery &Query) {
374 return Query.Types[0] == s128 &&
375 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
376 })
377 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
378 {s16, p0, s16, 8},
379 {s32, p0, s32, 8},
380 {s64, p0, s64, 8},
381 {p0, p0, s64, 8},
382 {s128, p0, s128, 8},
383 {v8s8, p0, s64, 8},
384 {v16s8, p0, s128, 8},
385 {v4s16, p0, s64, 8},
386 {v8s16, p0, s128, 8},
387 {v2s32, p0, s64, 8},
388 {v4s32, p0, s128, 8},
389 {v2s64, p0, s128, 8}})
390 // These extends are also legal
391 .legalForTypesWithMemDesc(
392 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
393 .legalForTypesWithMemDesc({
394 // SVE vscale x 128 bit base sizes
395 {nxv16s8, p0, nxv16s8, 8},
396 {nxv8s16, p0, nxv8s16, 8},
397 {nxv4s32, p0, nxv4s32, 8},
398 {nxv2s64, p0, nxv2s64, 8},
399 })
400 .widenScalarToNextPow2(0, /* MinSize = */ 8)
401 .clampMaxNumElements(0, s8, 16)
402 .clampMaxNumElements(0, s16, 8)
403 .clampMaxNumElements(0, s32, 4)
404 .clampMaxNumElements(0, s64, 2)
405 .clampMaxNumElements(0, p0, 2)
407 .clampScalar(0, s8, s64)
409 [=](const LegalityQuery &Query) {
410 // Clamp extending load results to 32-bits.
411 return Query.Types[0].isScalar() &&
412 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
413 Query.Types[0].getSizeInBits() > 32;
414 },
415 changeTo(0, s32))
416 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
417 .bitcastIf(typeInSet(0, {v4s8}),
418 [=](const LegalityQuery &Query) {
419 const LLT VecTy = Query.Types[0];
420 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
421 })
422 .customIf(IsPtrVecPred)
423 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
425
427 .customIf([=](const LegalityQuery &Query) {
428 return HasRCPC3 && Query.Types[0] == s128 &&
429 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
430 })
431 .customIf([=](const LegalityQuery &Query) {
432 return Query.Types[0] == s128 &&
433 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
434 })
435 .legalForTypesWithMemDesc(
436 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
437 {s32, p0, s8, 8}, // truncstorei8 from s32
438 {s64, p0, s8, 8}, // truncstorei8 from s64
439 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
440 {s64, p0, s16, 8}, // truncstorei16 from s64
441 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
442 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
443 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
444 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
445 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
446 .legalForTypesWithMemDesc({
447 // SVE vscale x 128 bit base sizes
448 // TODO: Add nxv2p0. Consider bitcastIf.
449 // See #92130
450 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
451 {nxv16s8, p0, nxv16s8, 8},
452 {nxv8s16, p0, nxv8s16, 8},
453 {nxv4s32, p0, nxv4s32, 8},
454 {nxv2s64, p0, nxv2s64, 8},
455 })
456 .clampScalar(0, s8, s64)
457 .lowerIf([=](const LegalityQuery &Query) {
458 return Query.Types[0].isScalar() &&
459 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
460 })
461 // Maximum: sN * k = 128
462 .clampMaxNumElements(0, s8, 16)
463 .clampMaxNumElements(0, s16, 8)
464 .clampMaxNumElements(0, s32, 4)
465 .clampMaxNumElements(0, s64, 2)
466 .clampMaxNumElements(0, p0, 2)
468 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
469 .bitcastIf(typeInSet(0, {v4s8}),
470 [=](const LegalityQuery &Query) {
471 const LLT VecTy = Query.Types[0];
472 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
473 })
474 .customIf(IsPtrVecPred)
475 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
477
478 getActionDefinitionsBuilder(G_INDEXED_STORE)
479 // Idx 0 == Ptr, Idx 1 == Val
480 // TODO: we can implement legalizations but as of now these are
481 // generated in a very specific way.
483 {p0, s8, s8, 8},
484 {p0, s16, s16, 8},
485 {p0, s32, s8, 8},
486 {p0, s32, s16, 8},
487 {p0, s32, s32, 8},
488 {p0, s64, s64, 8},
489 {p0, p0, p0, 8},
490 {p0, v8s8, v8s8, 8},
491 {p0, v16s8, v16s8, 8},
492 {p0, v4s16, v4s16, 8},
493 {p0, v8s16, v8s16, 8},
494 {p0, v2s32, v2s32, 8},
495 {p0, v4s32, v4s32, 8},
496 {p0, v2s64, v2s64, 8},
497 {p0, v2p0, v2p0, 8},
498 {p0, s128, s128, 8},
499 })
500 .unsupported();
501
502 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
503 LLT LdTy = Query.Types[0];
504 LLT PtrTy = Query.Types[1];
505 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
506 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
507 return false;
508 if (PtrTy != p0)
509 return false;
510 return true;
511 };
512 getActionDefinitionsBuilder(G_INDEXED_LOAD)
515 .legalIf(IndexedLoadBasicPred)
516 .unsupported();
517 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
518 .unsupportedIf(
520 .legalIf(all(typeInSet(0, {s16, s32, s64}),
521 LegalityPredicate([=](const LegalityQuery &Q) {
522 LLT LdTy = Q.Types[0];
523 LLT PtrTy = Q.Types[1];
524 LLT MemTy = Q.MMODescrs[0].MemoryTy;
525 if (PtrTy != p0)
526 return false;
527 if (LdTy == s16)
528 return MemTy == s8;
529 if (LdTy == s32)
530 return MemTy == s8 || MemTy == s16;
531 if (LdTy == s64)
532 return MemTy == s8 || MemTy == s16 || MemTy == s32;
533 return false;
534 })))
535 .unsupported();
536
537 // Constants
539 .legalFor({p0, s8, s16, s32, s64})
540 .widenScalarToNextPow2(0)
541 .clampScalar(0, s8, s64);
542 getActionDefinitionsBuilder(G_FCONSTANT)
543 .legalFor({s32, s64, s128})
544 .legalFor(HasFP16, {s16})
545 .clampScalar(0, MinFPScalar, s128);
546
547 // FIXME: fix moreElementsToNextPow2
549 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
551 .clampScalar(1, s32, s64)
552 .clampScalar(0, s32, s32)
553 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
554 .minScalarEltSameAsIf(
555 [=](const LegalityQuery &Query) {
556 const LLT &Ty = Query.Types[0];
557 const LLT &SrcTy = Query.Types[1];
558 return Ty.isVector() && !SrcTy.isPointerVector() &&
559 Ty.getElementType() != SrcTy.getElementType();
560 },
561 0, 1)
562 .minScalarOrEltIf(
563 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
564 1, s32)
565 .minScalarOrEltIf(
566 [=](const LegalityQuery &Query) {
567 return Query.Types[1].isPointerVector();
568 },
569 0, s64)
571 .clampNumElements(1, v8s8, v16s8)
572 .clampNumElements(1, v4s16, v8s16)
573 .clampNumElements(1, v2s32, v4s32)
574 .clampNumElements(1, v2s64, v2s64)
575 .clampNumElements(1, v2p0, v2p0)
576 .customIf(isVector(0));
577
579 .legalFor({{s32, s32},
580 {s32, s64},
581 {v4s32, v4s32},
582 {v2s32, v2s32},
583 {v2s64, v2s64}})
584 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
586 .clampScalar(0, s32, s32)
587 .minScalarOrElt(1, MinFPScalar)
588 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
589 .minScalarEltSameAsIf(
590 [=](const LegalityQuery &Query) {
591 const LLT &Ty = Query.Types[0];
592 const LLT &SrcTy = Query.Types[1];
593 return Ty.isVector() && !SrcTy.isPointerVector() &&
594 Ty.getElementType() != SrcTy.getElementType();
595 },
596 0, 1)
597 .clampNumElements(1, v4s16, v8s16)
598 .clampNumElements(1, v2s32, v4s32)
599 .clampMaxNumElements(1, s64, 2)
600 .moreElementsToNextPow2(1)
601 .libcallFor({{s32, s128}});
602
603 // Extensions
604 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
605 unsigned DstSize = Query.Types[0].getSizeInBits();
606
607 // Handle legal vectors using legalFor
608 if (Query.Types[0].isVector())
609 return false;
610
611 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
612 return false; // Extending to a scalar s128 needs narrowing.
613
614 const LLT &SrcTy = Query.Types[1];
615
616 // Make sure we fit in a register otherwise. Don't bother checking that
617 // the source type is below 128 bits. We shouldn't be allowing anything
618 // through which is wider than the destination in the first place.
619 unsigned SrcSize = SrcTy.getSizeInBits();
620 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
621 return false;
622
623 return true;
624 };
625 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
626 .legalIf(ExtLegalFunc)
627 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
628 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
630 .clampMaxNumElements(1, s8, 8)
631 .clampMaxNumElements(1, s16, 4)
632 .clampMaxNumElements(1, s32, 2)
633 // Tries to convert a large EXTEND into two smaller EXTENDs
634 .lowerIf([=](const LegalityQuery &Query) {
635 return (Query.Types[0].getScalarSizeInBits() >
636 Query.Types[1].getScalarSizeInBits() * 2) &&
637 Query.Types[0].isVector() &&
638 (Query.Types[1].getScalarSizeInBits() == 8 ||
639 Query.Types[1].getScalarSizeInBits() == 16);
640 })
641 .clampMinNumElements(1, s8, 8)
642 .clampMinNumElements(1, s16, 4);
643
645 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
647 .clampMaxNumElements(0, s8, 8)
648 .clampMaxNumElements(0, s16, 4)
649 .clampMaxNumElements(0, s32, 2)
650 .minScalarOrEltIf(
651 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
652 0, s8)
653 .lowerIf([=](const LegalityQuery &Query) {
654 LLT DstTy = Query.Types[0];
655 LLT SrcTy = Query.Types[1];
656 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
657 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
658 })
659 .clampMinNumElements(0, s8, 8)
660 .clampMinNumElements(0, s16, 4)
661 .alwaysLegal();
662
663 getActionDefinitionsBuilder(G_SEXT_INREG)
664 .legalFor({s32, s64})
665 .legalFor(PackedVectorAllTypeList)
666 .maxScalar(0, s64)
667 .clampNumElements(0, v8s8, v16s8)
668 .clampNumElements(0, v4s16, v8s16)
669 .clampNumElements(0, v2s32, v4s32)
670 .clampMaxNumElements(0, s64, 2)
671 .lower();
672
673 // FP conversions
675 .legalFor(
676 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
677 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
678 .clampNumElements(0, v4s16, v4s16)
679 .clampNumElements(0, v2s32, v2s32)
680 .scalarize(0);
681
683 .legalFor(
684 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
685 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
686 .clampNumElements(0, v4s32, v4s32)
687 .clampNumElements(0, v2s64, v2s64)
688 .scalarize(0);
689
690 // Conversions
691 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
692 .legalFor({{s32, s32},
693 {s64, s32},
694 {s32, s64},
695 {s64, s64},
696 {v2s64, v2s64},
697 {v4s32, v4s32},
698 {v2s32, v2s32}})
699 .legalFor(HasFP16,
700 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
701 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
703 // The range of a fp16 value fits into an i17, so we can lower the width
704 // to i64.
706 [=](const LegalityQuery &Query) {
707 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
708 },
709 changeTo(0, s64))
711 .widenScalarOrEltToNextPow2OrMinSize(0)
712 .minScalar(0, s32)
713 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
714 .widenScalarIf(
715 [=](const LegalityQuery &Query) {
716 return Query.Types[0].getScalarSizeInBits() <= 64 &&
717 Query.Types[0].getScalarSizeInBits() >
718 Query.Types[1].getScalarSizeInBits();
719 },
721 .widenScalarIf(
722 [=](const LegalityQuery &Query) {
723 return Query.Types[1].getScalarSizeInBits() <= 64 &&
724 Query.Types[0].getScalarSizeInBits() <
725 Query.Types[1].getScalarSizeInBits();
726 },
728 .clampNumElements(0, v4s16, v8s16)
729 .clampNumElements(0, v2s32, v4s32)
730 .clampMaxNumElements(0, s64, 2)
731 .libcallFor(
732 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
733
734 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
735 .legalFor({{s32, s32},
736 {s64, s32},
737 {s32, s64},
738 {s64, s64},
739 {v2s64, v2s64},
740 {v4s32, v4s32},
741 {v2s32, v2s32}})
742 .legalFor(HasFP16,
743 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
744 // Handle types larger than i64 by scalarizing/lowering.
745 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
747 // The range of a fp16 value fits into an i17, so we can lower the width
748 // to i64.
750 [=](const LegalityQuery &Query) {
751 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
752 },
753 changeTo(0, s64))
754 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
756 .widenScalarToNextPow2(0, /*MinSize=*/32)
757 .minScalar(0, s32)
758 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
759 .widenScalarIf(
760 [=](const LegalityQuery &Query) {
761 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
762 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
763 ITySize > Query.Types[1].getScalarSizeInBits();
764 },
766 .widenScalarIf(
767 [=](const LegalityQuery &Query) {
768 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
769 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
770 Query.Types[0].getScalarSizeInBits() < FTySize;
771 },
774 .clampNumElements(0, v4s16, v8s16)
775 .clampNumElements(0, v2s32, v4s32)
776 .clampMaxNumElements(0, s64, 2);
777
778 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
779 .legalFor({{s32, s32},
780 {s64, s32},
781 {s32, s64},
782 {s64, s64},
783 {v2s64, v2s64},
784 {v4s32, v4s32},
785 {v2s32, v2s32}})
786 .legalFor(HasFP16,
787 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
788 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
792 .minScalar(1, s32)
793 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
795 [=](const LegalityQuery &Query) {
796 return Query.Types[1].getScalarSizeInBits() <= 64 &&
797 Query.Types[0].getScalarSizeInBits() <
798 Query.Types[1].getScalarSizeInBits();
799 },
801 .widenScalarIf(
802 [=](const LegalityQuery &Query) {
803 return Query.Types[0].getScalarSizeInBits() <= 64 &&
804 Query.Types[0].getScalarSizeInBits() >
805 Query.Types[1].getScalarSizeInBits();
806 },
808 .clampNumElements(0, v4s16, v8s16)
809 .clampNumElements(0, v2s32, v4s32)
810 .clampMaxNumElements(0, s64, 2)
811 .libcallFor({{s16, s128},
812 {s32, s128},
813 {s64, s128},
814 {s128, s128},
815 {s128, s32},
816 {s128, s64}});
817
818 // Control-flow
820 .legalFor({s32})
821 .clampScalar(0, s32, s32);
822 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
823
825 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
826 .widenScalarToNextPow2(0)
827 .clampScalar(0, s32, s64)
828 .clampScalar(1, s32, s32)
831 .lowerIf(isVector(0));
832
833 // Pointer-handling
834 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
835
836 if (TM.getCodeModel() == CodeModel::Small)
837 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
838 else
839 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
840
841 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
842 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
843
845 .legalFor({{s64, p0}, {v2s64, v2p0}})
846 .widenScalarToNextPow2(0, 64)
847 .clampScalar(0, s64, s64)
848 .clampMaxNumElements(0, s64, 2);
849
851 .unsupportedIf([&](const LegalityQuery &Query) {
852 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
853 })
854 .legalFor({{p0, s64}, {v2p0, v2s64}})
855 .clampMaxNumElements(1, s64, 2);
856
857 // Casts for 32 and 64-bit width type are just copies.
858 // Same for 128-bit width type, except they are on the FPR bank.
860 // Keeping 32-bit instructions legal to prevent regression in some tests
861 .legalForCartesianProduct({s32, v2s16, v4s8})
862 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
863 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
864 .lowerIf([=](const LegalityQuery &Query) {
865 return Query.Types[0].isVector() != Query.Types[1].isVector();
866 })
868 .clampNumElements(0, v8s8, v16s8)
869 .clampNumElements(0, v4s16, v8s16)
870 .clampNumElements(0, v2s32, v4s32)
871 .lower();
872
873 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
874
875 // va_list must be a pointer, but most sized types are pretty easy to handle
876 // as the destination.
878 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
879 .clampScalar(0, s8, s64)
880 .widenScalarToNextPow2(0, /*Min*/ 8);
881
882 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
883 .lowerIf(
884 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
885
886 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
887
888 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
889 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
890 .customFor(!UseOutlineAtomics, {{s128, p0}})
891 .libcallFor(UseOutlineAtomics,
892 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
893 .clampScalar(0, s32, s64);
894
895 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
896 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
897 G_ATOMICRMW_XOR})
898 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
899 .libcallFor(UseOutlineAtomics,
900 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
901 .clampScalar(0, s32, s64);
902
903 // Do not outline these atomics operations, as per comment in
904 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
906 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
907 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
908 .clampScalar(0, s32, s64);
909
910 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
911
912 // Merge/Unmerge
913 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
914 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
915 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
917 .widenScalarToNextPow2(LitTyIdx, 8)
918 .widenScalarToNextPow2(BigTyIdx, 32)
919 .clampScalar(LitTyIdx, s8, s64)
920 .clampScalar(BigTyIdx, s32, s128)
921 .legalIf([=](const LegalityQuery &Q) {
922 switch (Q.Types[BigTyIdx].getSizeInBits()) {
923 case 32:
924 case 64:
925 case 128:
926 break;
927 default:
928 return false;
929 }
930 switch (Q.Types[LitTyIdx].getSizeInBits()) {
931 case 8:
932 case 16:
933 case 32:
934 case 64:
935 return true;
936 default:
937 return false;
938 }
939 });
940 }
941
942 // TODO : nxv4s16, nxv2s16, nxv2s32
943 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
944 .legalFor(HasSVE, {{s16, nxv16s8, s64},
945 {s16, nxv8s16, s64},
946 {s32, nxv4s32, s64},
947 {s64, nxv2s64, s64}})
948 .unsupportedIf([=](const LegalityQuery &Query) {
949 const LLT &EltTy = Query.Types[1].getElementType();
950 if (Query.Types[1].isScalableVector())
951 return false;
952 return Query.Types[0] != EltTy;
953 })
954 .minScalar(2, s64)
955 .customIf([=](const LegalityQuery &Query) {
956 const LLT &VecTy = Query.Types[1];
957 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
958 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
959 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
960 })
961 .minScalarOrEltIf(
962 [=](const LegalityQuery &Query) {
963 // We want to promote to <M x s1> to <M x s64> if that wouldn't
964 // cause the total vec size to be > 128b.
965 return Query.Types[1].isFixedVector() &&
966 Query.Types[1].getNumElements() <= 2;
967 },
968 0, s64)
969 .minScalarOrEltIf(
970 [=](const LegalityQuery &Query) {
971 return Query.Types[1].isFixedVector() &&
972 Query.Types[1].getNumElements() <= 4;
973 },
974 0, s32)
975 .minScalarOrEltIf(
976 [=](const LegalityQuery &Query) {
977 return Query.Types[1].isFixedVector() &&
978 Query.Types[1].getNumElements() <= 8;
979 },
980 0, s16)
981 .minScalarOrEltIf(
982 [=](const LegalityQuery &Query) {
983 return Query.Types[1].isFixedVector() &&
984 Query.Types[1].getNumElements() <= 16;
985 },
986 0, s8)
987 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
989 .clampMaxNumElements(1, s64, 2)
990 .clampMaxNumElements(1, s32, 4)
991 .clampMaxNumElements(1, s16, 8)
992 .clampMaxNumElements(1, s8, 16)
993 .clampMaxNumElements(1, p0, 2);
994
995 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
996 .legalIf(
997 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
998 .legalFor(HasSVE, {{nxv16s8, s32, s64},
999 {nxv8s16, s32, s64},
1000 {nxv4s32, s32, s64},
1001 {nxv2s64, s64, s64}})
1003 .widenVectorEltsToVectorMinSize(0, 64)
1004 .clampNumElements(0, v8s8, v16s8)
1005 .clampNumElements(0, v4s16, v8s16)
1006 .clampNumElements(0, v2s32, v4s32)
1007 .clampMaxNumElements(0, s64, 2)
1008 .clampMaxNumElements(0, p0, 2);
1009
1010 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1011 .legalFor({{v8s8, s8},
1012 {v16s8, s8},
1013 {v4s16, s16},
1014 {v8s16, s16},
1015 {v2s32, s32},
1016 {v4s32, s32},
1017 {v2p0, p0},
1018 {v2s64, s64}})
1019 .clampNumElements(0, v4s32, v4s32)
1020 .clampNumElements(0, v2s64, v2s64)
1021 .minScalarOrElt(0, s8)
1024 .minScalarSameAs(1, 0);
1025
1026 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1027
1030 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
1031 .scalarize(1)
1032 .widenScalarToNextPow2(1, /*Min=*/32)
1033 .clampScalar(1, s32, s64)
1034 .scalarSameSizeAs(0, 1);
1035 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
1036
1037 // TODO: Custom lowering for v2s32, v4s32, v2s64.
1038 getActionDefinitionsBuilder(G_BITREVERSE)
1039 .legalFor({s32, s64, v8s8, v16s8})
1040 .widenScalarToNextPow2(0, /*Min = */ 32)
1041 .clampScalar(0, s32, s64)
1042 .lower();
1043
1044 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
1045
1047 .lowerIf(isVector(0))
1048 .widenScalarToNextPow2(1, /*Min=*/32)
1049 .clampScalar(1, s32, s64)
1050 .scalarSameSizeAs(0, 1)
1051 .legalFor(HasCSSC, {s32, s64})
1052 .customFor(!HasCSSC, {s32, s64});
1053
1054 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1055 .legalIf([=](const LegalityQuery &Query) {
1056 const LLT &DstTy = Query.Types[0];
1057 const LLT &SrcTy = Query.Types[1];
1058 // For now just support the TBL2 variant which needs the source vectors
1059 // to be the same size as the dest.
1060 if (DstTy != SrcTy)
1061 return false;
1062 return llvm::is_contained(
1063 {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
1064 })
1065 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
1066 // just want those lowered into G_BUILD_VECTOR
1067 .lowerIf([=](const LegalityQuery &Query) {
1068 return !Query.Types[1].isVector();
1069 })
1070 .moreElementsIf(
1071 [](const LegalityQuery &Query) {
1072 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1073 Query.Types[0].getNumElements() >
1074 Query.Types[1].getNumElements();
1075 },
1076 changeTo(1, 0))
1078 .moreElementsIf(
1079 [](const LegalityQuery &Query) {
1080 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1081 Query.Types[0].getNumElements() <
1082 Query.Types[1].getNumElements();
1083 },
1084 changeTo(0, 1))
1085 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1086 .clampNumElements(0, v8s8, v16s8)
1087 .clampNumElements(0, v4s16, v8s16)
1088 .clampNumElements(0, v4s32, v4s32)
1089 .clampNumElements(0, v2s64, v2s64)
1090 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1091 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1092 // Bitcast pointers vector to i64.
1093 const LLT DstTy = Query.Types[0];
1094 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1095 });
1096
1097 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1098 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1099 .bitcastIf(
1100 [=](const LegalityQuery &Query) {
1101 return Query.Types[0].getSizeInBits() <= 128 &&
1102 Query.Types[1].getSizeInBits() <= 64;
1103 },
1104 [=](const LegalityQuery &Query) {
1105 const LLT DstTy = Query.Types[0];
1106 const LLT SrcTy = Query.Types[1];
1107 return std::pair(
1108 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1111 SrcTy.getNumElements())));
1112 });
1113
1114 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1115
1116 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1117
1118 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1119
1120 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1121
1122 if (ST.hasMOPS()) {
1123 // G_BZERO is not supported. Currently it is only emitted by
1124 // PreLegalizerCombiner for G_MEMSET with zero constant.
1126
1128 .legalForCartesianProduct({p0}, {s64}, {s64})
1129 .customForCartesianProduct({p0}, {s8}, {s64})
1130 .immIdx(0); // Inform verifier imm idx 0 is handled.
1131
1132 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1133 .legalForCartesianProduct({p0}, {p0}, {s64})
1134 .immIdx(0); // Inform verifier imm idx 0 is handled.
1135
1136 // G_MEMCPY_INLINE does not have a tailcall immediate
1137 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1138 .legalForCartesianProduct({p0}, {p0}, {s64});
1139
1140 } else {
1141 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1142 .libcall();
1143 }
1144
1145 // FIXME: Legal vector types are only legal with NEON.
1147 .legalFor(HasCSSC, {s32, s64})
1148 .legalFor(PackedVectorAllTypeList)
1149 .customIf([=](const LegalityQuery &Q) {
1150 // TODO: Fix suboptimal codegen for 128+ bit types.
1151 LLT SrcTy = Q.Types[0];
1152 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1153 })
1154 .widenScalarIf(
1155 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1156 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1157 .widenScalarIf(
1158 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1159 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1160 .clampNumElements(0, v8s8, v16s8)
1161 .clampNumElements(0, v4s16, v8s16)
1162 .clampNumElements(0, v2s32, v4s32)
1163 .clampNumElements(0, v2s64, v2s64)
1165 .lower();
1166
1167 // For fadd reductions we have pairwise operations available. We treat the
1168 // usual legal types as legal and handle the lowering to pairwise instructions
1169 // later.
1170 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1171 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1172 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1173 .minScalarOrElt(0, MinFPScalar)
1174 .clampMaxNumElements(1, s64, 2)
1175 .clampMaxNumElements(1, s32, 4)
1176 .clampMaxNumElements(1, s16, 8)
1177 .lower();
1178
1179 // For fmul reductions we need to split up into individual operations. We
1180 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1181 // smaller types, followed by scalarizing what remains.
1182 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1183 .minScalarOrElt(0, MinFPScalar)
1184 .clampMaxNumElements(1, s64, 2)
1185 .clampMaxNumElements(1, s32, 4)
1186 .clampMaxNumElements(1, s16, 8)
1187 .clampMaxNumElements(1, s32, 2)
1188 .clampMaxNumElements(1, s16, 4)
1189 .scalarize(1)
1190 .lower();
1191
1192 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1193 .scalarize(2)
1194 .lower();
1195
1196 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1197 .legalFor({{s8, v16s8},
1198 {s8, v8s8},
1199 {s16, v8s16},
1200 {s16, v4s16},
1201 {s32, v4s32},
1202 {s32, v2s32},
1203 {s64, v2s64}})
1204 .clampMaxNumElements(1, s64, 2)
1205 .clampMaxNumElements(1, s32, 4)
1206 .clampMaxNumElements(1, s16, 8)
1207 .clampMaxNumElements(1, s8, 16)
1208 .lower();
1209
1210 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1211 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1212 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1213 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1214 .minScalarOrElt(0, MinFPScalar)
1215 .clampMaxNumElements(1, s64, 2)
1216 .clampMaxNumElements(1, s32, 4)
1217 .clampMaxNumElements(1, s16, 8)
1218 .lower();
1219
1220 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1221 .clampMaxNumElements(1, s32, 2)
1222 .clampMaxNumElements(1, s16, 4)
1223 .clampMaxNumElements(1, s8, 8)
1224 .scalarize(1)
1225 .lower();
1226
1228 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1229 .legalFor({{s8, v8s8},
1230 {s8, v16s8},
1231 {s16, v4s16},
1232 {s16, v8s16},
1233 {s32, v2s32},
1234 {s32, v4s32}})
1235 .moreElementsIf(
1236 [=](const LegalityQuery &Query) {
1237 return Query.Types[1].isVector() &&
1238 Query.Types[1].getElementType() != s8 &&
1239 Query.Types[1].getNumElements() & 1;
1240 },
1242 .clampMaxNumElements(1, s64, 2)
1243 .clampMaxNumElements(1, s32, 4)
1244 .clampMaxNumElements(1, s16, 8)
1245 .clampMaxNumElements(1, s8, 16)
1246 .scalarize(1)
1247 .lower();
1248
1250 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1251 // Try to break down into smaller vectors as long as they're at least 64
1252 // bits. This lets us use vector operations for some parts of the
1253 // reduction.
1254 .fewerElementsIf(
1255 [=](const LegalityQuery &Q) {
1256 LLT SrcTy = Q.Types[1];
1257 if (SrcTy.isScalar())
1258 return false;
1259 if (!isPowerOf2_32(SrcTy.getNumElements()))
1260 return false;
1261 // We can usually perform 64b vector operations.
1262 return SrcTy.getSizeInBits() > 64;
1263 },
1264 [=](const LegalityQuery &Q) {
1265 LLT SrcTy = Q.Types[1];
1266 return std::make_pair(1, SrcTy.divide(2));
1267 })
1268 .scalarize(1)
1269 .lower();
1270
1271 // TODO: Update this to correct handling when adding AArch64/SVE support.
1272 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1273
1274 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1275 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1276 .lower();
1277
1279 .legalFor({{s32, s64}, {s64, s64}})
1280 .customIf([=](const LegalityQuery &Q) {
1281 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1282 })
1283 .lower();
1285
1286 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1287 .customFor({{s32, s32}, {s64, s64}});
1288
1289 auto always = [=](const LegalityQuery &Q) { return true; };
1291 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
1292 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
1293 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
1294 .customFor({{s128, s128},
1295 {v2s64, v2s64},
1296 {v2s32, v2s32},
1297 {v4s32, v4s32},
1298 {v4s16, v4s16},
1299 {v8s16, v8s16}})
1300 .clampScalar(0, s32, s128)
1302 .minScalarEltSameAsIf(always, 1, 0)
1303 .maxScalarEltSameAsIf(always, 1, 0);
1304
1305 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1306 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1307 .legalFor(HasSVE, {nxv2s64, nxv4s32, nxv8s16, nxv16s8})
1308 .clampNumElements(0, v8s8, v16s8)
1309 .clampNumElements(0, v4s16, v8s16)
1310 .clampNumElements(0, v2s32, v4s32)
1311 .clampMaxNumElements(0, s64, 2)
1314 .lower();
1315
1316 // TODO: Libcall support for s128.
1317 // TODO: s16 should be legal with full FP16 support.
1318 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1319 .legalFor({{s64, s32}, {s64, s64}});
1320
1321 // TODO: Custom legalization for mismatched types.
1322 getActionDefinitionsBuilder(G_FCOPYSIGN)
1324 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1325 [=](const LegalityQuery &Query) {
1326 const LLT Ty = Query.Types[0];
1327 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1328 })
1329 .lower();
1330
1332
1333 // Access to floating-point environment.
1334 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1335 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1336 .libcall();
1337
1338 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1339
1340 getActionDefinitionsBuilder(G_PREFETCH).custom();
1341
1342 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1343
1344 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1345 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1347 .immIdx(0); // Inform verifier imm idx 0 is handled.
1348
1349 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1350 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1351 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1352
1354 verify(*ST.getInstrInfo());
1355}
1356
1359 LostDebugLocObserver &LocObserver) const {
1360 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1361 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1362 GISelChangeObserver &Observer = Helper.Observer;
1363 switch (MI.getOpcode()) {
1364 default:
1365 // No idea what to do.
1366 return false;
1367 case TargetOpcode::G_VAARG:
1368 return legalizeVaArg(MI, MRI, MIRBuilder);
1369 case TargetOpcode::G_LOAD:
1370 case TargetOpcode::G_STORE:
1371 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1372 case TargetOpcode::G_SHL:
1373 case TargetOpcode::G_ASHR:
1374 case TargetOpcode::G_LSHR:
1375 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1376 case TargetOpcode::G_GLOBAL_VALUE:
1377 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1378 case TargetOpcode::G_SBFX:
1379 case TargetOpcode::G_UBFX:
1380 return legalizeBitfieldExtract(MI, MRI, Helper);
1381 case TargetOpcode::G_FSHL:
1382 case TargetOpcode::G_FSHR:
1383 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1384 case TargetOpcode::G_ROTR:
1385 return legalizeRotate(MI, MRI, Helper);
1386 case TargetOpcode::G_CTPOP:
1387 return legalizeCTPOP(MI, MRI, Helper);
1388 case TargetOpcode::G_ATOMIC_CMPXCHG:
1389 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1390 case TargetOpcode::G_CTTZ:
1391 return legalizeCTTZ(MI, Helper);
1392 case TargetOpcode::G_BZERO:
1393 case TargetOpcode::G_MEMCPY:
1394 case TargetOpcode::G_MEMMOVE:
1395 case TargetOpcode::G_MEMSET:
1396 return legalizeMemOps(MI, Helper);
1397 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1398 return legalizeExtractVectorElt(MI, MRI, Helper);
1399 case TargetOpcode::G_DYN_STACKALLOC:
1400 return legalizeDynStackAlloc(MI, Helper);
1401 case TargetOpcode::G_PREFETCH:
1402 return legalizePrefetch(MI, Helper);
1403 case TargetOpcode::G_ABS:
1404 return Helper.lowerAbsToCNeg(MI);
1405 case TargetOpcode::G_ICMP:
1406 return legalizeICMP(MI, MRI, MIRBuilder);
1407 }
1408
1409 llvm_unreachable("expected switch to return");
1410}
1411
1412bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1414 MachineIRBuilder &MIRBuilder,
1415 GISelChangeObserver &Observer,
1416 LegalizerHelper &Helper) const {
1417 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1418 MI.getOpcode() == TargetOpcode::G_FSHR);
1419
1420 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1421 // lowering
1422 Register ShiftNo = MI.getOperand(3).getReg();
1423 LLT ShiftTy = MRI.getType(ShiftNo);
1424 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1425
1426 // Adjust shift amount according to Opcode (FSHL/FSHR)
1427 // Convert FSHL to FSHR
1428 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1429 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1430
1431 // Lower non-constant shifts and leave zero shifts to the optimizer.
1432 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1433 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1435
1436 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1437
1438 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1439
1440 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1441 // in the range of 0 <-> BitWidth, it is legal
1442 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1443 VRegAndVal->Value.ult(BitWidth))
1444 return true;
1445
1446 // Cast the ShiftNumber to a 64-bit type
1447 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1448
1449 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1450 Observer.changingInstr(MI);
1451 MI.getOperand(3).setReg(Cast64.getReg(0));
1452 Observer.changedInstr(MI);
1453 }
1454 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1455 // instruction
1456 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1457 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1458 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1459 Cast64.getReg(0)});
1460 MI.eraseFromParent();
1461 }
1462 return true;
1463}
1464
1465bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1467 MachineIRBuilder &MIRBuilder) const {
1468 Register DstReg = MI.getOperand(0).getReg();
1469 Register SrcReg1 = MI.getOperand(2).getReg();
1470 Register SrcReg2 = MI.getOperand(3).getReg();
1471 LLT DstTy = MRI.getType(DstReg);
1472 LLT SrcTy = MRI.getType(SrcReg1);
1473
1474 // Check the vector types are legal
1475 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1476 DstTy.getNumElements() != SrcTy.getNumElements() ||
1477 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1478 return false;
1479
1480 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1481 // following passes
1482 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1483 if (Pred != CmpInst::ICMP_NE)
1484 return true;
1485 Register CmpReg =
1486 MIRBuilder
1487 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1488 .getReg(0);
1489 MIRBuilder.buildNot(DstReg, CmpReg);
1490
1491 MI.eraseFromParent();
1492 return true;
1493}
1494
1495bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1497 LegalizerHelper &Helper) const {
1498 // To allow for imported patterns to match, we ensure that the rotate amount
1499 // is 64b with an extension.
1500 Register AmtReg = MI.getOperand(2).getReg();
1501 LLT AmtTy = MRI.getType(AmtReg);
1502 (void)AmtTy;
1503 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1504 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1505 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1506 Helper.Observer.changingInstr(MI);
1507 MI.getOperand(2).setReg(NewAmt.getReg(0));
1508 Helper.Observer.changedInstr(MI);
1509 return true;
1510}
1511
1512bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1514 GISelChangeObserver &Observer) const {
1515 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1516 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1517 // G_ADD_LOW instructions.
1518 // By splitting this here, we can optimize accesses in the small code model by
1519 // folding in the G_ADD_LOW into the load/store offset.
1520 auto &GlobalOp = MI.getOperand(1);
1521 // Don't modify an intrinsic call.
1522 if (GlobalOp.isSymbol())
1523 return true;
1524 const auto* GV = GlobalOp.getGlobal();
1525 if (GV->isThreadLocal())
1526 return true; // Don't want to modify TLS vars.
1527
1528 auto &TM = ST->getTargetLowering()->getTargetMachine();
1529 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1530
1531 if (OpFlags & AArch64II::MO_GOT)
1532 return true;
1533
1534 auto Offset = GlobalOp.getOffset();
1535 Register DstReg = MI.getOperand(0).getReg();
1536 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1537 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1538 // Set the regclass on the dest reg too.
1539 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1540
1541 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1542 // by creating a MOVK that sets bits 48-63 of the register to (global address
1543 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1544 // prevent an incorrect tag being generated during relocation when the
1545 // global appears before the code section. Without the offset, a global at
1546 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1547 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1548 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1549 // instead of `0xf`.
1550 // This assumes that we're in the small code model so we can assume a binary
1551 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1552 // binary must also be loaded into address range [0, 2^48). Both of these
1553 // properties need to be ensured at runtime when using tagged addresses.
1554 if (OpFlags & AArch64II::MO_TAGGED) {
1555 assert(!Offset &&
1556 "Should not have folded in an offset for a tagged global!");
1557 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1558 .addGlobalAddress(GV, 0x100000000,
1560 .addImm(48);
1561 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1562 }
1563
1564 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1565 .addGlobalAddress(GV, Offset,
1567 MI.eraseFromParent();
1568 return true;
1569}
1570
1572 MachineInstr &MI) const {
1573 auto LowerBinOp = [&MI](unsigned Opcode) {
1574 MachineIRBuilder MIB(MI);
1575 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1576 {MI.getOperand(2), MI.getOperand(3)});
1577 MI.eraseFromParent();
1578 return true;
1579 };
1580
1581 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1582 switch (IntrinsicID) {
1583 case Intrinsic::vacopy: {
1584 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1585 unsigned VaListSize =
1586 (ST->isTargetDarwin() || ST->isTargetWindows())
1587 ? PtrSize
1588 : ST->isTargetILP32() ? 20 : 32;
1589
1590 MachineFunction &MF = *MI.getMF();
1592 LLT::scalar(VaListSize * 8));
1593 MachineIRBuilder MIB(MI);
1594 MIB.buildLoad(Val, MI.getOperand(2),
1597 VaListSize, Align(PtrSize)));
1598 MIB.buildStore(Val, MI.getOperand(1),
1601 VaListSize, Align(PtrSize)));
1602 MI.eraseFromParent();
1603 return true;
1604 }
1605 case Intrinsic::get_dynamic_area_offset: {
1606 MachineIRBuilder &MIB = Helper.MIRBuilder;
1607 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1608 MI.eraseFromParent();
1609 return true;
1610 }
1611 case Intrinsic::aarch64_mops_memset_tag: {
1612 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1613 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1614 // the instruction).
1615 MachineIRBuilder MIB(MI);
1616 auto &Value = MI.getOperand(3);
1617 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1618 Value.setReg(ExtValueReg);
1619 return true;
1620 }
1621 case Intrinsic::aarch64_prefetch: {
1622 MachineIRBuilder MIB(MI);
1623 auto &AddrVal = MI.getOperand(1);
1624
1625 int64_t IsWrite = MI.getOperand(2).getImm();
1626 int64_t Target = MI.getOperand(3).getImm();
1627 int64_t IsStream = MI.getOperand(4).getImm();
1628 int64_t IsData = MI.getOperand(5).getImm();
1629
1630 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1631 (!IsData << 3) | // IsDataCache bit
1632 (Target << 1) | // Cache level bits
1633 (unsigned)IsStream; // Stream bit
1634
1635 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1636 MI.eraseFromParent();
1637 return true;
1638 }
1639 case Intrinsic::aarch64_neon_uaddv:
1640 case Intrinsic::aarch64_neon_saddv:
1641 case Intrinsic::aarch64_neon_umaxv:
1642 case Intrinsic::aarch64_neon_smaxv:
1643 case Intrinsic::aarch64_neon_uminv:
1644 case Intrinsic::aarch64_neon_sminv: {
1645 MachineIRBuilder MIB(MI);
1646 MachineRegisterInfo &MRI = *MIB.getMRI();
1647 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1648 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1649 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1650
1651 auto OldDst = MI.getOperand(0).getReg();
1652 auto OldDstTy = MRI.getType(OldDst);
1653 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1654 if (OldDstTy == NewDstTy)
1655 return true;
1656
1657 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1658
1659 Helper.Observer.changingInstr(MI);
1660 MI.getOperand(0).setReg(NewDst);
1661 Helper.Observer.changedInstr(MI);
1662
1663 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1664 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1665 OldDst, NewDst);
1666
1667 return true;
1668 }
1669 case Intrinsic::aarch64_neon_uaddlp:
1670 case Intrinsic::aarch64_neon_saddlp: {
1671 MachineIRBuilder MIB(MI);
1672
1673 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1674 ? AArch64::G_UADDLP
1675 : AArch64::G_SADDLP;
1676 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1677 MI.eraseFromParent();
1678
1679 return true;
1680 }
1681 case Intrinsic::aarch64_neon_uaddlv:
1682 case Intrinsic::aarch64_neon_saddlv: {
1683 MachineIRBuilder MIB(MI);
1684 MachineRegisterInfo &MRI = *MIB.getMRI();
1685
1686 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1687 ? AArch64::G_UADDLV
1688 : AArch64::G_SADDLV;
1689 Register DstReg = MI.getOperand(0).getReg();
1690 Register SrcReg = MI.getOperand(2).getReg();
1691 LLT DstTy = MRI.getType(DstReg);
1692
1693 LLT MidTy, ExtTy;
1694 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1695 MidTy = LLT::fixed_vector(4, 32);
1696 ExtTy = LLT::scalar(32);
1697 } else {
1698 MidTy = LLT::fixed_vector(2, 64);
1699 ExtTy = LLT::scalar(64);
1700 }
1701
1702 Register MidReg =
1703 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1704 Register ZeroReg =
1705 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1706 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1707 {MidReg, ZeroReg})
1708 .getReg(0);
1709
1710 if (DstTy.getScalarSizeInBits() < 32)
1711 MIB.buildTrunc(DstReg, ExtReg);
1712 else
1713 MIB.buildCopy(DstReg, ExtReg);
1714
1715 MI.eraseFromParent();
1716
1717 return true;
1718 }
1719 case Intrinsic::aarch64_neon_smax:
1720 return LowerBinOp(TargetOpcode::G_SMAX);
1721 case Intrinsic::aarch64_neon_smin:
1722 return LowerBinOp(TargetOpcode::G_SMIN);
1723 case Intrinsic::aarch64_neon_umax:
1724 return LowerBinOp(TargetOpcode::G_UMAX);
1725 case Intrinsic::aarch64_neon_umin:
1726 return LowerBinOp(TargetOpcode::G_UMIN);
1727 case Intrinsic::aarch64_neon_fmax:
1728 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1729 case Intrinsic::aarch64_neon_fmin:
1730 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1731 case Intrinsic::aarch64_neon_fmaxnm:
1732 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1733 case Intrinsic::aarch64_neon_fminnm:
1734 return LowerBinOp(TargetOpcode::G_FMINNUM);
1735 case Intrinsic::aarch64_neon_smull:
1736 return LowerBinOp(AArch64::G_SMULL);
1737 case Intrinsic::aarch64_neon_umull:
1738 return LowerBinOp(AArch64::G_UMULL);
1739 case Intrinsic::aarch64_neon_abs: {
1740 // Lower the intrinsic to G_ABS.
1741 MachineIRBuilder MIB(MI);
1742 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1743 MI.eraseFromParent();
1744 return true;
1745 }
1746
1747 case Intrinsic::vector_reverse:
1748 // TODO: Add support for vector_reverse
1749 return false;
1750 }
1751
1752 return true;
1753}
1754
1755bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1757 GISelChangeObserver &Observer) const {
1758 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1759 MI.getOpcode() == TargetOpcode::G_LSHR ||
1760 MI.getOpcode() == TargetOpcode::G_SHL);
1761 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1762 // imported patterns can select it later. Either way, it will be legal.
1763 Register AmtReg = MI.getOperand(2).getReg();
1764 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1765 if (!VRegAndVal)
1766 return true;
1767 // Check the shift amount is in range for an immediate form.
1768 int64_t Amount = VRegAndVal->Value.getSExtValue();
1769 if (Amount > 31)
1770 return true; // This will have to remain a register variant.
1771 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1772 Observer.changingInstr(MI);
1773 MI.getOperand(2).setReg(ExtCst.getReg(0));
1774 Observer.changedInstr(MI);
1775 return true;
1776}
1777
1780 Base = Root;
1781 Offset = 0;
1782
1783 Register NewBase;
1784 int64_t NewOffset;
1785 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1786 isShiftedInt<7, 3>(NewOffset)) {
1787 Base = NewBase;
1788 Offset = NewOffset;
1789 }
1790}
1791
1792// FIXME: This should be removed and replaced with the generic bitcast legalize
1793// action.
1794bool AArch64LegalizerInfo::legalizeLoadStore(
1796 GISelChangeObserver &Observer) const {
1797 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1798 MI.getOpcode() == TargetOpcode::G_LOAD);
1799 // Here we just try to handle vector loads/stores where our value type might
1800 // have pointer elements, which the SelectionDAG importer can't handle. To
1801 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1802 // the value to use s64 types.
1803
1804 // Custom legalization requires the instruction, if not deleted, must be fully
1805 // legalized. In order to allow further legalization of the inst, we create
1806 // a new instruction and erase the existing one.
1807
1808 Register ValReg = MI.getOperand(0).getReg();
1809 const LLT ValTy = MRI.getType(ValReg);
1810
1811 if (ValTy == LLT::scalar(128)) {
1812
1813 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1814 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1815 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1816 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1817 bool IsRcpC3 =
1818 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1819
1820 LLT s64 = LLT::scalar(64);
1821
1822 unsigned Opcode;
1823 if (IsRcpC3) {
1824 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1825 } else {
1826 // For LSE2, loads/stores should have been converted to monotonic and had
1827 // a fence inserted after them.
1828 assert(Ordering == AtomicOrdering::Monotonic ||
1829 Ordering == AtomicOrdering::Unordered);
1830 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1831
1832 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1833 }
1834
1836 if (IsLoad) {
1837 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1838 MIRBuilder.buildMergeLikeInstr(
1839 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1840 } else {
1841 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1842 NewI = MIRBuilder.buildInstr(
1843 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1844 }
1845
1846 if (IsRcpC3) {
1847 NewI.addUse(MI.getOperand(1).getReg());
1848 } else {
1849 Register Base;
1850 int Offset;
1851 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1852 NewI.addUse(Base);
1853 NewI.addImm(Offset / 8);
1854 }
1855
1856 NewI.cloneMemRefs(MI);
1858 *MRI.getTargetRegisterInfo(),
1859 *ST->getRegBankInfo());
1860 MI.eraseFromParent();
1861 return true;
1862 }
1863
1864 if (!ValTy.isPointerVector() ||
1865 ValTy.getElementType().getAddressSpace() != 0) {
1866 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1867 return false;
1868 }
1869
1870 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1871 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1872 auto &MMO = **MI.memoperands_begin();
1873 MMO.setType(NewTy);
1874
1875 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1876 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1877 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1878 } else {
1879 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1880 MIRBuilder.buildBitcast(ValReg, NewLoad);
1881 }
1882 MI.eraseFromParent();
1883 return true;
1884}
1885
1886bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1888 MachineIRBuilder &MIRBuilder) const {
1889 MachineFunction &MF = MIRBuilder.getMF();
1890 Align Alignment(MI.getOperand(2).getImm());
1891 Register Dst = MI.getOperand(0).getReg();
1892 Register ListPtr = MI.getOperand(1).getReg();
1893
1894 LLT PtrTy = MRI.getType(ListPtr);
1895 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1896
1897 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1898 const Align PtrAlign = Align(PtrSize);
1899 auto List = MIRBuilder.buildLoad(
1900 PtrTy, ListPtr,
1902 PtrTy, PtrAlign));
1903
1904 MachineInstrBuilder DstPtr;
1905 if (Alignment > PtrAlign) {
1906 // Realign the list to the actual required alignment.
1907 auto AlignMinus1 =
1908 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1909 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1910 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1911 } else
1912 DstPtr = List;
1913
1914 LLT ValTy = MRI.getType(Dst);
1915 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1916 MIRBuilder.buildLoad(
1917 Dst, DstPtr,
1919 ValTy, std::max(Alignment, PtrAlign)));
1920
1921 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1922
1923 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1924
1925 MIRBuilder.buildStore(NewList, ListPtr,
1928 PtrTy, PtrAlign));
1929
1930 MI.eraseFromParent();
1931 return true;
1932}
1933
1934bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1936 // Only legal if we can select immediate forms.
1937 // TODO: Lower this otherwise.
1938 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1939 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1940}
1941
1942bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1944 LegalizerHelper &Helper) const {
1945 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1946 // it can be more efficiently lowered to the following sequence that uses
1947 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1948 // registers are cheap.
1949 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1950 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1951 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1952 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1953 //
1954 // For 128 bit vector popcounts, we lower to the following sequence:
1955 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1956 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1957 // uaddlp.4s v0, v0 // v4s32, v2s64
1958 // uaddlp.2d v0, v0 // v2s64
1959 //
1960 // For 64 bit vector popcounts, we lower to the following sequence:
1961 // cnt.8b v0, v0 // v4s16, v2s32
1962 // uaddlp.4h v0, v0 // v4s16, v2s32
1963 // uaddlp.2s v0, v0 // v2s32
1964
1965 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1966 Register Dst = MI.getOperand(0).getReg();
1967 Register Val = MI.getOperand(1).getReg();
1968 LLT Ty = MRI.getType(Val);
1969 unsigned Size = Ty.getSizeInBits();
1970
1971 assert(Ty == MRI.getType(Dst) &&
1972 "Expected src and dst to have the same type!");
1973
1974 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1975 LLT s64 = LLT::scalar(64);
1976
1977 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1978 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1979 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1980 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1981
1982 MIRBuilder.buildZExt(Dst, Add);
1983 MI.eraseFromParent();
1984 return true;
1985 }
1986
1987 if (!ST->hasNEON() ||
1988 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1989 // Use generic lowering when custom lowering is not possible.
1990 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1991 Helper.lowerBitCount(MI) ==
1993 }
1994
1995 // Pre-conditioning: widen Val up to the nearest vector type.
1996 // s32,s64,v4s16,v2s32 -> v8i8
1997 // v8s16,v4s32,v2s64 -> v16i8
1998 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1999 if (Ty.isScalar()) {
2000 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2001 if (Size == 32) {
2002 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2003 }
2004 }
2005 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2006
2007 // Count bits in each byte-sized lane.
2008 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2009
2010 // Sum across lanes.
2011
2012 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2013 Ty.getScalarSizeInBits() != 16) {
2014 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2015 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2016 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2018
2019 if (Ty == LLT::fixed_vector(2, 64)) {
2020 auto UDOT =
2021 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2022 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2023 } else if (Ty == LLT::fixed_vector(4, 32)) {
2024 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2025 } else if (Ty == LLT::fixed_vector(2, 32)) {
2026 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2027 } else {
2028 llvm_unreachable("unexpected vector shape");
2029 }
2030
2031 Sum->getOperand(0).setReg(Dst);
2032 MI.eraseFromParent();
2033 return true;
2034 }
2035
2036 Register HSum = CTPOP.getReg(0);
2037 unsigned Opc;
2038 SmallVector<LLT> HAddTys;
2039 if (Ty.isScalar()) {
2040 Opc = Intrinsic::aarch64_neon_uaddlv;
2041 HAddTys.push_back(LLT::scalar(32));
2042 } else if (Ty == LLT::fixed_vector(8, 16)) {
2043 Opc = Intrinsic::aarch64_neon_uaddlp;
2044 HAddTys.push_back(LLT::fixed_vector(8, 16));
2045 } else if (Ty == LLT::fixed_vector(4, 32)) {
2046 Opc = Intrinsic::aarch64_neon_uaddlp;
2047 HAddTys.push_back(LLT::fixed_vector(8, 16));
2048 HAddTys.push_back(LLT::fixed_vector(4, 32));
2049 } else if (Ty == LLT::fixed_vector(2, 64)) {
2050 Opc = Intrinsic::aarch64_neon_uaddlp;
2051 HAddTys.push_back(LLT::fixed_vector(8, 16));
2052 HAddTys.push_back(LLT::fixed_vector(4, 32));
2053 HAddTys.push_back(LLT::fixed_vector(2, 64));
2054 } else if (Ty == LLT::fixed_vector(4, 16)) {
2055 Opc = Intrinsic::aarch64_neon_uaddlp;
2056 HAddTys.push_back(LLT::fixed_vector(4, 16));
2057 } else if (Ty == LLT::fixed_vector(2, 32)) {
2058 Opc = Intrinsic::aarch64_neon_uaddlp;
2059 HAddTys.push_back(LLT::fixed_vector(4, 16));
2060 HAddTys.push_back(LLT::fixed_vector(2, 32));
2061 } else
2062 llvm_unreachable("unexpected vector shape");
2064 for (LLT HTy : HAddTys) {
2065 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2066 HSum = UADD.getReg(0);
2067 }
2068
2069 // Post-conditioning.
2070 if (Ty.isScalar() && (Size == 64 || Size == 128))
2071 MIRBuilder.buildZExt(Dst, UADD);
2072 else
2073 UADD->getOperand(0).setReg(Dst);
2074 MI.eraseFromParent();
2075 return true;
2076}
2077
2078bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2080 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2081 LLT s64 = LLT::scalar(64);
2082 auto Addr = MI.getOperand(1).getReg();
2083 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2084 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2085 auto DstLo = MRI.createGenericVirtualRegister(s64);
2086 auto DstHi = MRI.createGenericVirtualRegister(s64);
2087
2089 if (ST->hasLSE()) {
2090 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2091 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2092 // the rest of the MIR so we must reassemble the extracted registers into a
2093 // 128-bit known-regclass one with code like this:
2094 //
2095 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2096 // %out = CASP %in1, ...
2097 // %OldLo = G_EXTRACT %out, 0
2098 // %OldHi = G_EXTRACT %out, 64
2099 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2100 unsigned Opcode;
2101 switch (Ordering) {
2103 Opcode = AArch64::CASPAX;
2104 break;
2106 Opcode = AArch64::CASPLX;
2107 break;
2110 Opcode = AArch64::CASPALX;
2111 break;
2112 default:
2113 Opcode = AArch64::CASPX;
2114 break;
2115 }
2116
2117 LLT s128 = LLT::scalar(128);
2118 auto CASDst = MRI.createGenericVirtualRegister(s128);
2119 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2120 auto CASNew = MRI.createGenericVirtualRegister(s128);
2121 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2122 .addUse(DesiredI->getOperand(0).getReg())
2123 .addImm(AArch64::sube64)
2124 .addUse(DesiredI->getOperand(1).getReg())
2125 .addImm(AArch64::subo64);
2126 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2127 .addUse(NewI->getOperand(0).getReg())
2128 .addImm(AArch64::sube64)
2129 .addUse(NewI->getOperand(1).getReg())
2130 .addImm(AArch64::subo64);
2131
2132 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2133
2134 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2135 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2136 } else {
2137 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2138 // can take arbitrary registers so it just has the normal GPR64 operands the
2139 // rest of AArch64 is expecting.
2140 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2141 unsigned Opcode;
2142 switch (Ordering) {
2144 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2145 break;
2147 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2148 break;
2151 Opcode = AArch64::CMP_SWAP_128;
2152 break;
2153 default:
2154 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2155 break;
2156 }
2157
2158 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2159 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2160 {Addr, DesiredI->getOperand(0),
2161 DesiredI->getOperand(1), NewI->getOperand(0),
2162 NewI->getOperand(1)});
2163 }
2164
2165 CAS.cloneMemRefs(MI);
2167 *MRI.getTargetRegisterInfo(),
2168 *ST->getRegBankInfo());
2169
2170 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2171 MI.eraseFromParent();
2172 return true;
2173}
2174
2175bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2176 LegalizerHelper &Helper) const {
2177 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2178 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2179 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2180 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2181 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2182 MI.eraseFromParent();
2183 return true;
2184}
2185
2186bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2187 LegalizerHelper &Helper) const {
2188 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2189
2190 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2191 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2192 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2193 // the instruction).
2194 auto &Value = MI.getOperand(1);
2195 Register ExtValueReg =
2196 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2197 Value.setReg(ExtValueReg);
2198 return true;
2199 }
2200
2201 return false;
2202}
2203
2204bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2206 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2207 auto VRegAndVal =
2209 if (VRegAndVal)
2210 return true;
2211 LLT VecTy = MRI.getType(Element->getVectorReg());
2212 if (VecTy.isScalableVector())
2213 return true;
2214 return Helper.lowerExtractInsertVectorElt(MI) !=
2216}
2217
2218bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2219 MachineInstr &MI, LegalizerHelper &Helper) const {
2220 MachineFunction &MF = *MI.getParent()->getParent();
2221 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2222 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2223
2224 // If stack probing is not enabled for this function, use the default
2225 // lowering.
2226 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2227 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2228 "inline-asm") {
2229 Helper.lowerDynStackAlloc(MI);
2230 return true;
2231 }
2232
2233 Register Dst = MI.getOperand(0).getReg();
2234 Register AllocSize = MI.getOperand(1).getReg();
2235 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2236
2237 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2238 "Unexpected type for dynamic alloca");
2239 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2240 "Unexpected type for dynamic alloca");
2241
2242 LLT PtrTy = MRI.getType(Dst);
2243 Register SPReg =
2245 Register SPTmp =
2246 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2247 auto NewMI =
2248 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2249 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2250 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2251 MIRBuilder.buildCopy(Dst, SPTmp);
2252
2253 MI.eraseFromParent();
2254 return true;
2255}
2256
2257bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2258 LegalizerHelper &Helper) const {
2259 MachineIRBuilder &MIB = Helper.MIRBuilder;
2260 auto &AddrVal = MI.getOperand(0);
2261
2262 int64_t IsWrite = MI.getOperand(1).getImm();
2263 int64_t Locality = MI.getOperand(2).getImm();
2264 int64_t IsData = MI.getOperand(3).getImm();
2265
2266 bool IsStream = Locality == 0;
2267 if (Locality != 0) {
2268 assert(Locality <= 3 && "Prefetch locality out-of-range");
2269 // The locality degree is the opposite of the cache speed.
2270 // Put the number the other way around.
2271 // The encoding starts at 0 for level 1
2272 Locality = 3 - Locality;
2273 }
2274
2275 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2276
2277 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2278 MI.eraseFromParent();
2279 return true;
2280}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr Register SPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Represents an extract vector element.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:218
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:234
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:418
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...