LLVM 20.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v2s8 = LLT::fixed_vector(2, 8);
54 const LLT v8s16 = LLT::fixed_vector(8, 16);
55 const LLT v4s16 = LLT::fixed_vector(4, 16);
56 const LLT v2s16 = LLT::fixed_vector(2, 16);
57 const LLT v2s32 = LLT::fixed_vector(2, 32);
58 const LLT v4s32 = LLT::fixed_vector(4, 32);
59 const LLT v2s64 = LLT::fixed_vector(2, 64);
60 const LLT v2p0 = LLT::fixed_vector(2, p0);
61
62 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
63 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
64 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
65 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
66
67 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
68 v16s8, v8s16, v4s32,
69 v2s64, v2p0,
70 /* End 128bit types */
71 /* Begin 64bit types */
72 v8s8, v4s16, v2s32};
73 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
74 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
75 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
76
77 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
78
79 // FIXME: support subtargets which have neon/fp-armv8 disabled.
80 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 return;
83 }
84
85 // Some instructions only support s16 if the subtarget has full 16-bit FP
86 // support.
87 const bool HasFP16 = ST.hasFullFP16();
88 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
89
90 const bool HasCSSC = ST.hasCSSC();
91 const bool HasRCPC3 = ST.hasRCPC3();
92 const bool HasSVE = ST.hasSVE();
93
95 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
96 .legalFor({p0, s8, s16, s32, s64})
97 .legalFor({v16s8, v8s16, v4s32, v2s64, v2p0, v8s8, v4s16, v2s32, v4s8,
98 v2s16, v2s8})
99 .widenScalarToNextPow2(0)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampMaxNumElements(0, s64, 2)
107 .clampMaxNumElements(0, p0, 2)
109
111 .legalFor({p0, s16, s32, s64})
112 .legalFor(PackedVectorAllTypeList)
116 .clampScalar(0, s16, s64)
117 .clampNumElements(0, v8s8, v16s8)
118 .clampNumElements(0, v4s16, v8s16)
119 .clampNumElements(0, v2s32, v4s32)
120 .clampMaxNumElements(0, s64, 2)
121 .clampMaxNumElements(0, p0, 2);
122
124 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
126 .clampScalar(0, s32, s64)
127 .clampNumElements(0, v4s16, v8s16)
128 .clampNumElements(0, v2s32, v4s32)
129 .clampNumElements(0, v2s64, v2s64)
130 .moreElementsToNextPow2(0);
131
132 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
133 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
134 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
135 .widenScalarToNextPow2(0)
136 .clampScalar(0, s32, s64)
137 .clampMaxNumElements(0, s8, 16)
138 .clampMaxNumElements(0, s16, 8)
139 .clampNumElements(0, v2s32, v4s32)
140 .clampNumElements(0, v2s64, v2s64)
142 [=](const LegalityQuery &Query) {
143 return Query.Types[0].getNumElements() <= 2;
144 },
145 0, s32)
146 .minScalarOrEltIf(
147 [=](const LegalityQuery &Query) {
148 return Query.Types[0].getNumElements() <= 4;
149 },
150 0, s16)
151 .minScalarOrEltIf(
152 [=](const LegalityQuery &Query) {
153 return Query.Types[0].getNumElements() <= 16;
154 },
155 0, s8)
156 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
158
160 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
161 .widenScalarToNextPow2(0)
162 .clampScalar(0, s32, s64)
163 .clampMaxNumElements(0, s8, 16)
164 .clampMaxNumElements(0, s16, 8)
165 .clampNumElements(0, v2s32, v4s32)
166 .clampNumElements(0, v2s64, v2s64)
168 [=](const LegalityQuery &Query) {
169 return Query.Types[0].getNumElements() <= 2;
170 },
171 0, s32)
172 .minScalarOrEltIf(
173 [=](const LegalityQuery &Query) {
174 return Query.Types[0].getNumElements() <= 4;
175 },
176 0, s16)
177 .minScalarOrEltIf(
178 [=](const LegalityQuery &Query) {
179 return Query.Types[0].getNumElements() <= 16;
180 },
181 0, s8)
182 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
184
185 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
186 .customIf([=](const LegalityQuery &Query) {
187 const auto &SrcTy = Query.Types[0];
188 const auto &AmtTy = Query.Types[1];
189 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
190 AmtTy.getSizeInBits() == 32;
191 })
192 .legalFor({
193 {s32, s32},
194 {s32, s64},
195 {s64, s64},
196 {v8s8, v8s8},
197 {v16s8, v16s8},
198 {v4s16, v4s16},
199 {v8s16, v8s16},
200 {v2s32, v2s32},
201 {v4s32, v4s32},
202 {v2s64, v2s64},
203 })
204 .widenScalarToNextPow2(0)
205 .clampScalar(1, s32, s64)
206 .clampScalar(0, s32, s64)
207 .clampNumElements(0, v8s8, v16s8)
208 .clampNumElements(0, v4s16, v8s16)
209 .clampNumElements(0, v2s32, v4s32)
210 .clampNumElements(0, v2s64, v2s64)
212 .minScalarSameAs(1, 0)
214
216 .legalFor({{p0, s64}, {v2p0, v2s64}})
217 .clampScalarOrElt(1, s64, s64)
218 .clampNumElements(0, v2p0, v2p0);
219
220 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
221
222 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
223 .legalFor({s32, s64})
224 .libcallFor({s128})
225 .clampScalar(0, s32, s64)
227 .scalarize(0);
228
229 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
230 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
231 .libcallFor({s128})
233 .minScalarOrElt(0, s32)
234 .clampNumElements(0, v2s32, v4s32)
235 .clampNumElements(0, v2s64, v2s64)
236 .scalarize(0);
237
238 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
239 .widenScalarToNextPow2(0, /*Min = */ 32)
240 .clampScalar(0, s32, s64)
241 .lower();
242
243 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
244 .legalFor({s64, v8s16, v16s8, v4s32})
245 .lower();
246
247 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
248 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
249 .legalFor(HasCSSC, {s32, s64})
250 .minScalar(HasCSSC, 0, s32)
251 .clampNumElements(0, v8s8, v16s8)
252 .clampNumElements(0, v4s16, v8s16)
253 .clampNumElements(0, v2s32, v4s32)
254 // FIXME: This sholdn't be needed as v2s64 types are going to
255 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
256 .clampNumElements(0, v2s64, v2s64)
257 .lower();
258
260 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
261 .legalFor({{s32, s32}, {s64, s32}})
262 .clampScalar(0, s32, s64)
263 .clampScalar(1, s32, s64)
265
267 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
268 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
269 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
270 .legalFor({s32, s64, v2s32, v4s32, v2s64})
271 .legalFor(HasFP16, {s16, v4s16, v8s16})
272 .libcallFor({s128})
273 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
274 .minScalarOrElt(0, MinFPScalar)
275 .clampNumElements(0, v4s16, v8s16)
276 .clampNumElements(0, v2s32, v4s32)
277 .clampNumElements(0, v2s64, v2s64)
279
280 getActionDefinitionsBuilder({G_FABS, G_FNEG})
281 .legalFor({s32, s64, v2s32, v4s32, v2s64})
282 .legalFor(HasFP16, {s16, v4s16, v8s16})
283 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
285 .clampNumElements(0, v4s16, v8s16)
286 .clampNumElements(0, v2s32, v4s32)
287 .clampNumElements(0, v2s64, v2s64)
289 .lowerFor({s16, v4s16, v8s16});
290
292 .libcallFor({s32, s64, s128})
293 .minScalar(0, s32)
294 .scalarize(0);
295
296 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
297 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
298 .libcallFor({{s64, s128}})
299 .minScalarOrElt(1, MinFPScalar);
300
301 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
302 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
303 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
304 G_FSINH, G_FTANH})
305 // We need a call for these, so we always need to scalarize.
306 .scalarize(0)
307 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
308 .minScalar(0, s32)
309 .libcallFor({s32, s64, s128});
311 .scalarize(0)
312 .minScalar(0, s32)
313 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
314
316 .legalIf(all(typeInSet(0, {s32, s64, p0}),
317 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
319 .clampScalar(0, s32, s64)
321 .minScalar(1, s8)
322 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
323 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
324
326 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
327 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
329 .clampScalar(1, s32, s128)
331 .minScalar(0, s16)
332 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
333 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
334 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
335
336
337 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
338 auto &Actions = getActionDefinitionsBuilder(Op);
339
340 if (Op == G_SEXTLOAD)
342
343 // Atomics have zero extending behavior.
344 Actions
345 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
346 {s32, p0, s16, 8},
347 {s32, p0, s32, 8},
348 {s64, p0, s8, 2},
349 {s64, p0, s16, 2},
350 {s64, p0, s32, 4},
351 {s64, p0, s64, 8},
352 {p0, p0, s64, 8},
353 {v2s32, p0, s64, 8}})
354 .widenScalarToNextPow2(0)
355 .clampScalar(0, s32, s64)
356 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
357 // how to do that yet.
358 .unsupportedIfMemSizeNotPow2()
359 // Lower anything left over into G_*EXT and G_LOAD
360 .lower();
361 }
362
363 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
364 const LLT &ValTy = Query.Types[0];
365 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
366 };
367
369 .customIf([=](const LegalityQuery &Query) {
370 return HasRCPC3 && Query.Types[0] == s128 &&
371 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
372 })
373 .customIf([=](const LegalityQuery &Query) {
374 return Query.Types[0] == s128 &&
375 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
376 })
377 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
378 {s16, p0, s16, 8},
379 {s32, p0, s32, 8},
380 {s64, p0, s64, 8},
381 {p0, p0, s64, 8},
382 {s128, p0, s128, 8},
383 {v8s8, p0, s64, 8},
384 {v16s8, p0, s128, 8},
385 {v4s16, p0, s64, 8},
386 {v8s16, p0, s128, 8},
387 {v2s32, p0, s64, 8},
388 {v4s32, p0, s128, 8},
389 {v2s64, p0, s128, 8}})
390 // These extends are also legal
391 .legalForTypesWithMemDesc(
392 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
393 .legalForTypesWithMemDesc({
394 // SVE vscale x 128 bit base sizes
395 {nxv16s8, p0, nxv16s8, 8},
396 {nxv8s16, p0, nxv8s16, 8},
397 {nxv4s32, p0, nxv4s32, 8},
398 {nxv2s64, p0, nxv2s64, 8},
399 })
400 .widenScalarToNextPow2(0, /* MinSize = */ 8)
401 .clampMaxNumElements(0, s8, 16)
402 .clampMaxNumElements(0, s16, 8)
403 .clampMaxNumElements(0, s32, 4)
404 .clampMaxNumElements(0, s64, 2)
405 .clampMaxNumElements(0, p0, 2)
407 .clampScalar(0, s8, s64)
409 [=](const LegalityQuery &Query) {
410 // Clamp extending load results to 32-bits.
411 return Query.Types[0].isScalar() &&
412 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
413 Query.Types[0].getSizeInBits() > 32;
414 },
415 changeTo(0, s32))
416 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
417 .bitcastIf(typeInSet(0, {v4s8}),
418 [=](const LegalityQuery &Query) {
419 const LLT VecTy = Query.Types[0];
420 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
421 })
422 .customIf(IsPtrVecPred)
423 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
425
427 .customIf([=](const LegalityQuery &Query) {
428 return HasRCPC3 && Query.Types[0] == s128 &&
429 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
430 })
431 .customIf([=](const LegalityQuery &Query) {
432 return Query.Types[0] == s128 &&
433 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
434 })
435 .legalForTypesWithMemDesc(
436 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
437 {s32, p0, s8, 8}, // truncstorei8 from s32
438 {s64, p0, s8, 8}, // truncstorei8 from s64
439 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
440 {s64, p0, s16, 8}, // truncstorei16 from s64
441 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
442 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
443 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
444 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
445 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
446 .legalForTypesWithMemDesc({
447 // SVE vscale x 128 bit base sizes
448 // TODO: Add nxv2p0. Consider bitcastIf.
449 // See #92130
450 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
451 {nxv16s8, p0, nxv16s8, 8},
452 {nxv8s16, p0, nxv8s16, 8},
453 {nxv4s32, p0, nxv4s32, 8},
454 {nxv2s64, p0, nxv2s64, 8},
455 })
456 .clampScalar(0, s8, s64)
457 .lowerIf([=](const LegalityQuery &Query) {
458 return Query.Types[0].isScalar() &&
459 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
460 })
461 // Maximum: sN * k = 128
462 .clampMaxNumElements(0, s8, 16)
463 .clampMaxNumElements(0, s16, 8)
464 .clampMaxNumElements(0, s32, 4)
465 .clampMaxNumElements(0, s64, 2)
466 .clampMaxNumElements(0, p0, 2)
468 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
469 .bitcastIf(typeInSet(0, {v4s8}),
470 [=](const LegalityQuery &Query) {
471 const LLT VecTy = Query.Types[0];
472 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
473 })
474 .customIf(IsPtrVecPred)
475 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
477
478 getActionDefinitionsBuilder(G_INDEXED_STORE)
479 // Idx 0 == Ptr, Idx 1 == Val
480 // TODO: we can implement legalizations but as of now these are
481 // generated in a very specific way.
483 {p0, s8, s8, 8},
484 {p0, s16, s16, 8},
485 {p0, s32, s8, 8},
486 {p0, s32, s16, 8},
487 {p0, s32, s32, 8},
488 {p0, s64, s64, 8},
489 {p0, p0, p0, 8},
490 {p0, v8s8, v8s8, 8},
491 {p0, v16s8, v16s8, 8},
492 {p0, v4s16, v4s16, 8},
493 {p0, v8s16, v8s16, 8},
494 {p0, v2s32, v2s32, 8},
495 {p0, v4s32, v4s32, 8},
496 {p0, v2s64, v2s64, 8},
497 {p0, v2p0, v2p0, 8},
498 {p0, s128, s128, 8},
499 })
500 .unsupported();
501
502 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
503 LLT LdTy = Query.Types[0];
504 LLT PtrTy = Query.Types[1];
505 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
506 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
507 return false;
508 if (PtrTy != p0)
509 return false;
510 return true;
511 };
512 getActionDefinitionsBuilder(G_INDEXED_LOAD)
515 .legalIf(IndexedLoadBasicPred)
516 .unsupported();
517 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
518 .unsupportedIf(
520 .legalIf(all(typeInSet(0, {s16, s32, s64}),
521 LegalityPredicate([=](const LegalityQuery &Q) {
522 LLT LdTy = Q.Types[0];
523 LLT PtrTy = Q.Types[1];
524 LLT MemTy = Q.MMODescrs[0].MemoryTy;
525 if (PtrTy != p0)
526 return false;
527 if (LdTy == s16)
528 return MemTy == s8;
529 if (LdTy == s32)
530 return MemTy == s8 || MemTy == s16;
531 if (LdTy == s64)
532 return MemTy == s8 || MemTy == s16 || MemTy == s32;
533 return false;
534 })))
535 .unsupported();
536
537 // Constants
539 .legalFor({p0, s8, s16, s32, s64})
540 .widenScalarToNextPow2(0)
541 .clampScalar(0, s8, s64);
542 getActionDefinitionsBuilder(G_FCONSTANT)
543 .legalFor({s32, s64, s128})
544 .legalFor(HasFP16, {s16})
545 .clampScalar(0, MinFPScalar, s128);
546
547 // FIXME: fix moreElementsToNextPow2
549 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
551 .clampScalar(1, s32, s64)
552 .clampScalar(0, s32, s32)
553 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
554 .minScalarEltSameAsIf(
555 [=](const LegalityQuery &Query) {
556 const LLT &Ty = Query.Types[0];
557 const LLT &SrcTy = Query.Types[1];
558 return Ty.isVector() && !SrcTy.isPointerVector() &&
559 Ty.getElementType() != SrcTy.getElementType();
560 },
561 0, 1)
562 .minScalarOrEltIf(
563 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
564 1, s32)
565 .minScalarOrEltIf(
566 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
567 s64)
569 .clampNumElements(1, v8s8, v16s8)
570 .clampNumElements(1, v4s16, v8s16)
571 .clampNumElements(1, v2s32, v4s32)
572 .clampNumElements(1, v2s64, v2s64)
573 .customIf(isVector(0));
574
576 .legalFor({{s32, s32},
577 {s32, s64},
578 {v4s32, v4s32},
579 {v2s32, v2s32},
580 {v2s64, v2s64}})
581 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
583 .clampScalar(0, s32, s32)
584 .minScalarOrElt(1, MinFPScalar)
585 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
586 .minScalarEltSameAsIf(
587 [=](const LegalityQuery &Query) {
588 const LLT &Ty = Query.Types[0];
589 const LLT &SrcTy = Query.Types[1];
590 return Ty.isVector() && !SrcTy.isPointerVector() &&
591 Ty.getElementType() != SrcTy.getElementType();
592 },
593 0, 1)
594 .clampNumElements(1, v4s16, v8s16)
595 .clampNumElements(1, v2s32, v4s32)
596 .clampMaxNumElements(1, s64, 2)
597 .moreElementsToNextPow2(1)
598 .libcallFor({{s32, s128}});
599
600 // Extensions
601 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
602 unsigned DstSize = Query.Types[0].getSizeInBits();
603
604 // Handle legal vectors using legalFor
605 if (Query.Types[0].isVector())
606 return false;
607
608 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
609 return false; // Extending to a scalar s128 needs narrowing.
610
611 const LLT &SrcTy = Query.Types[1];
612
613 // Make sure we fit in a register otherwise. Don't bother checking that
614 // the source type is below 128 bits. We shouldn't be allowing anything
615 // through which is wider than the destination in the first place.
616 unsigned SrcSize = SrcTy.getSizeInBits();
617 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
618 return false;
619
620 return true;
621 };
622 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
623 .legalIf(ExtLegalFunc)
624 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
625 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
627 .clampMaxNumElements(1, s8, 8)
628 .clampMaxNumElements(1, s16, 4)
629 .clampMaxNumElements(1, s32, 2)
630 // Tries to convert a large EXTEND into two smaller EXTENDs
631 .lowerIf([=](const LegalityQuery &Query) {
632 return (Query.Types[0].getScalarSizeInBits() >
633 Query.Types[1].getScalarSizeInBits() * 2) &&
634 Query.Types[0].isVector() &&
635 (Query.Types[1].getScalarSizeInBits() == 8 ||
636 Query.Types[1].getScalarSizeInBits() == 16);
637 })
638 .clampMinNumElements(1, s8, 8)
639 .clampMinNumElements(1, s16, 4);
640
642 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
644 .clampMaxNumElements(0, s8, 8)
645 .clampMaxNumElements(0, s16, 4)
646 .clampMaxNumElements(0, s32, 2)
647 .minScalarOrEltIf(
648 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
649 0, s8)
650 .lowerIf([=](const LegalityQuery &Query) {
651 LLT DstTy = Query.Types[0];
652 LLT SrcTy = Query.Types[1];
653 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
654 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
655 })
656 .clampMinNumElements(0, s8, 8)
657 .clampMinNumElements(0, s16, 4)
658 .alwaysLegal();
659
660 getActionDefinitionsBuilder(G_SEXT_INREG)
661 .legalFor({s32, s64})
662 .legalFor(PackedVectorAllTypeList)
663 .maxScalar(0, s64)
664 .clampNumElements(0, v8s8, v16s8)
665 .clampNumElements(0, v4s16, v8s16)
666 .clampNumElements(0, v2s32, v4s32)
667 .clampMaxNumElements(0, s64, 2)
668 .lower();
669
670 // FP conversions
672 .legalFor(
673 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
674 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
675 .clampNumElements(0, v4s16, v4s16)
676 .clampNumElements(0, v2s32, v2s32)
677 .scalarize(0);
678
680 .legalFor(
681 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
682 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
683 .clampNumElements(0, v4s32, v4s32)
684 .clampNumElements(0, v2s64, v2s64)
685 .scalarize(0);
686
687 // Conversions
688 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
689 .legalFor({{s32, s32},
690 {s64, s32},
691 {s32, s64},
692 {s64, s64},
693 {v2s64, v2s64},
694 {v4s32, v4s32},
695 {v2s32, v2s32}})
696 .legalFor(HasFP16,
697 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
698 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
700 // The range of a fp16 value fits into an i17, so we can lower the width
701 // to i64.
703 [=](const LegalityQuery &Query) {
704 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
705 },
706 changeTo(0, s64))
708 .widenScalarOrEltToNextPow2OrMinSize(0)
709 .minScalar(0, s32)
710 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
711 .widenScalarIf(
712 [=](const LegalityQuery &Query) {
713 return Query.Types[0].getScalarSizeInBits() <= 64 &&
714 Query.Types[0].getScalarSizeInBits() >
715 Query.Types[1].getScalarSizeInBits();
716 },
718 .widenScalarIf(
719 [=](const LegalityQuery &Query) {
720 return Query.Types[1].getScalarSizeInBits() <= 64 &&
721 Query.Types[0].getScalarSizeInBits() <
722 Query.Types[1].getScalarSizeInBits();
723 },
725 .clampNumElements(0, v4s16, v8s16)
726 .clampNumElements(0, v2s32, v4s32)
727 .clampMaxNumElements(0, s64, 2)
728 .libcallFor(
729 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
730
731 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
732 .legalFor({{s32, s32},
733 {s64, s32},
734 {s32, s64},
735 {s64, s64},
736 {v2s64, v2s64},
737 {v4s32, v4s32},
738 {v2s32, v2s32}})
739 .legalFor(HasFP16,
740 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
741 // Handle types larger than i64 by scalarizing/lowering.
742 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
744 // The range of a fp16 value fits into an i17, so we can lower the width
745 // to i64.
747 [=](const LegalityQuery &Query) {
748 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
749 },
750 changeTo(0, s64))
751 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
753 .widenScalarToNextPow2(0, /*MinSize=*/32)
754 .minScalar(0, s32)
755 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
756 .widenScalarIf(
757 [=](const LegalityQuery &Query) {
758 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
759 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
760 ITySize > Query.Types[1].getScalarSizeInBits();
761 },
763 .widenScalarIf(
764 [=](const LegalityQuery &Query) {
765 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
766 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
767 Query.Types[0].getScalarSizeInBits() < FTySize;
768 },
771 .clampNumElements(0, v4s16, v8s16)
772 .clampNumElements(0, v2s32, v4s32)
773 .clampMaxNumElements(0, s64, 2);
774
775 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
776 .legalFor({{s32, s32},
777 {s64, s32},
778 {s32, s64},
779 {s64, s64},
780 {v2s64, v2s64},
781 {v4s32, v4s32},
782 {v2s32, v2s32}})
783 .legalFor(HasFP16,
784 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
785 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
789 .minScalar(1, s32)
790 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
792 [=](const LegalityQuery &Query) {
793 return Query.Types[1].getScalarSizeInBits() <= 64 &&
794 Query.Types[0].getScalarSizeInBits() <
795 Query.Types[1].getScalarSizeInBits();
796 },
798 .widenScalarIf(
799 [=](const LegalityQuery &Query) {
800 return Query.Types[0].getScalarSizeInBits() <= 64 &&
801 Query.Types[0].getScalarSizeInBits() >
802 Query.Types[1].getScalarSizeInBits();
803 },
805 .clampNumElements(0, v4s16, v8s16)
806 .clampNumElements(0, v2s32, v4s32)
807 .clampMaxNumElements(0, s64, 2)
808 .libcallFor({{s16, s128},
809 {s32, s128},
810 {s64, s128},
811 {s128, s128},
812 {s128, s32},
813 {s128, s64}});
814
815 // Control-flow
817 .legalFor({s32})
818 .clampScalar(0, s32, s32);
819 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
820
822 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
823 .widenScalarToNextPow2(0)
824 .clampScalar(0, s32, s64)
825 .clampScalar(1, s32, s32)
828 .lowerIf(isVector(0));
829
830 // Pointer-handling
831 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
832
833 if (TM.getCodeModel() == CodeModel::Small)
834 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
835 else
836 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
837
838 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
839 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
840
842 .legalFor({{s64, p0}, {v2s64, v2p0}})
843 .widenScalarToNextPow2(0, 64)
844 .clampScalar(0, s64, s64)
845 .clampMaxNumElements(0, s64, 2);
846
848 .unsupportedIf([&](const LegalityQuery &Query) {
849 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
850 })
851 .legalFor({{p0, s64}, {v2p0, v2s64}})
852 .clampMaxNumElements(1, s64, 2);
853
854 // Casts for 32 and 64-bit width type are just copies.
855 // Same for 128-bit width type, except they are on the FPR bank.
857 // Keeping 32-bit instructions legal to prevent regression in some tests
858 .legalForCartesianProduct({s32, v2s16, v4s8})
859 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
860 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
861 .lowerIf([=](const LegalityQuery &Query) {
862 return Query.Types[0].isVector() != Query.Types[1].isVector();
863 })
865 .clampNumElements(0, v8s8, v16s8)
866 .clampNumElements(0, v4s16, v8s16)
867 .clampNumElements(0, v2s32, v4s32)
868 .lower();
869
870 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
871
872 // va_list must be a pointer, but most sized types are pretty easy to handle
873 // as the destination.
875 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
876 .clampScalar(0, s8, s64)
877 .widenScalarToNextPow2(0, /*Min*/ 8);
878
879 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
880 .lowerIf(
881 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
882
883 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
884
885 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
886 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
887 .customFor(!UseOutlineAtomics, {{s128, p0}})
888 .libcallFor(UseOutlineAtomics,
889 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
890 .clampScalar(0, s32, s64);
891
892 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
893 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
894 G_ATOMICRMW_XOR})
895 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
896 .libcallFor(UseOutlineAtomics,
897 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
898 .clampScalar(0, s32, s64);
899
900 // Do not outline these atomics operations, as per comment in
901 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
903 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
904 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
905 .clampScalar(0, s32, s64);
906
907 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
908
909 // Merge/Unmerge
910 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
911 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
912 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
914 .widenScalarToNextPow2(LitTyIdx, 8)
915 .widenScalarToNextPow2(BigTyIdx, 32)
916 .clampScalar(LitTyIdx, s8, s64)
917 .clampScalar(BigTyIdx, s32, s128)
918 .legalIf([=](const LegalityQuery &Q) {
919 switch (Q.Types[BigTyIdx].getSizeInBits()) {
920 case 32:
921 case 64:
922 case 128:
923 break;
924 default:
925 return false;
926 }
927 switch (Q.Types[LitTyIdx].getSizeInBits()) {
928 case 8:
929 case 16:
930 case 32:
931 case 64:
932 return true;
933 default:
934 return false;
935 }
936 });
937 }
938
939 // TODO : nxv4s16, nxv2s16, nxv2s32
940 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
941 .legalFor(HasSVE, {{s16, nxv16s8, s64},
942 {s16, nxv8s16, s64},
943 {s32, nxv4s32, s64},
944 {s64, nxv2s64, s64}})
945 .unsupportedIf([=](const LegalityQuery &Query) {
946 const LLT &EltTy = Query.Types[1].getElementType();
947 if (Query.Types[1].isScalableVector())
948 return false;
949 return Query.Types[0] != EltTy;
950 })
951 .minScalar(2, s64)
952 .customIf([=](const LegalityQuery &Query) {
953 const LLT &VecTy = Query.Types[1];
954 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
955 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
956 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
957 })
958 .minScalarOrEltIf(
959 [=](const LegalityQuery &Query) {
960 // We want to promote to <M x s1> to <M x s64> if that wouldn't
961 // cause the total vec size to be > 128b.
962 return Query.Types[1].isFixedVector() &&
963 Query.Types[1].getNumElements() <= 2;
964 },
965 0, s64)
966 .minScalarOrEltIf(
967 [=](const LegalityQuery &Query) {
968 return Query.Types[1].isFixedVector() &&
969 Query.Types[1].getNumElements() <= 4;
970 },
971 0, s32)
972 .minScalarOrEltIf(
973 [=](const LegalityQuery &Query) {
974 return Query.Types[1].isFixedVector() &&
975 Query.Types[1].getNumElements() <= 8;
976 },
977 0, s16)
978 .minScalarOrEltIf(
979 [=](const LegalityQuery &Query) {
980 return Query.Types[1].isFixedVector() &&
981 Query.Types[1].getNumElements() <= 16;
982 },
983 0, s8)
984 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
986 .clampMaxNumElements(1, s64, 2)
987 .clampMaxNumElements(1, s32, 4)
988 .clampMaxNumElements(1, s16, 8)
989 .clampMaxNumElements(1, s8, 16)
990 .clampMaxNumElements(1, p0, 2);
991
992 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
993 .legalIf(
994 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
995 .legalFor(HasSVE, {{nxv16s8, s32, s64},
996 {nxv8s16, s32, s64},
997 {nxv4s32, s32, s64},
998 {nxv2s64, s64, s64}})
1000 .widenVectorEltsToVectorMinSize(0, 64)
1001 .clampNumElements(0, v8s8, v16s8)
1002 .clampNumElements(0, v4s16, v8s16)
1003 .clampNumElements(0, v2s32, v4s32)
1004 .clampMaxNumElements(0, s64, 2)
1005 .clampMaxNumElements(0, p0, 2);
1006
1007 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1008 .legalFor({{v8s8, s8},
1009 {v16s8, s8},
1010 {v4s16, s16},
1011 {v8s16, s16},
1012 {v2s32, s32},
1013 {v4s32, s32},
1014 {v2p0, p0},
1015 {v2s64, s64}})
1016 .clampNumElements(0, v4s32, v4s32)
1017 .clampNumElements(0, v2s64, v2s64)
1018 .minScalarOrElt(0, s8)
1021 .minScalarSameAs(1, 0);
1022
1023 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1024
1027 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
1028 .scalarize(1)
1029 .widenScalarToNextPow2(1, /*Min=*/32)
1030 .clampScalar(1, s32, s64)
1031 .scalarSameSizeAs(0, 1);
1032 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
1033
1034 // TODO: Custom lowering for v2s32, v4s32, v2s64.
1035 getActionDefinitionsBuilder(G_BITREVERSE)
1036 .legalFor({s32, s64, v8s8, v16s8})
1037 .widenScalarToNextPow2(0, /*Min = */ 32)
1038 .clampScalar(0, s32, s64);
1039
1040 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
1041
1043 .lowerIf(isVector(0))
1044 .widenScalarToNextPow2(1, /*Min=*/32)
1045 .clampScalar(1, s32, s64)
1046 .scalarSameSizeAs(0, 1)
1047 .legalFor(HasCSSC, {s32, s64})
1048 .customFor(!HasCSSC, {s32, s64});
1049
1050 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1051 .legalIf([=](const LegalityQuery &Query) {
1052 const LLT &DstTy = Query.Types[0];
1053 const LLT &SrcTy = Query.Types[1];
1054 // For now just support the TBL2 variant which needs the source vectors
1055 // to be the same size as the dest.
1056 if (DstTy != SrcTy)
1057 return false;
1058 return llvm::is_contained(
1059 {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
1060 })
1061 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
1062 // just want those lowered into G_BUILD_VECTOR
1063 .lowerIf([=](const LegalityQuery &Query) {
1064 return !Query.Types[1].isVector();
1065 })
1066 .moreElementsIf(
1067 [](const LegalityQuery &Query) {
1068 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1069 Query.Types[0].getNumElements() >
1070 Query.Types[1].getNumElements();
1071 },
1072 changeTo(1, 0))
1074 .moreElementsIf(
1075 [](const LegalityQuery &Query) {
1076 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1077 Query.Types[0].getNumElements() <
1078 Query.Types[1].getNumElements();
1079 },
1080 changeTo(0, 1))
1081 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1082 .clampNumElements(0, v8s8, v16s8)
1083 .clampNumElements(0, v4s16, v8s16)
1084 .clampNumElements(0, v4s32, v4s32)
1085 .clampNumElements(0, v2s64, v2s64)
1086 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1087 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1088 // Bitcast pointers vector to i64.
1089 const LLT DstTy = Query.Types[0];
1090 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1091 });
1092
1093 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1094 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1095 .bitcastIf(
1096 [=](const LegalityQuery &Query) {
1097 return Query.Types[0].getSizeInBits() <= 128 &&
1098 Query.Types[1].getSizeInBits() <= 64;
1099 },
1100 [=](const LegalityQuery &Query) {
1101 const LLT DstTy = Query.Types[0];
1102 const LLT SrcTy = Query.Types[1];
1103 return std::pair(
1104 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1107 SrcTy.getNumElements())));
1108 });
1109
1110 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1111
1112 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1113
1114 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1115
1116 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1117
1118 if (ST.hasMOPS()) {
1119 // G_BZERO is not supported. Currently it is only emitted by
1120 // PreLegalizerCombiner for G_MEMSET with zero constant.
1122
1124 .legalForCartesianProduct({p0}, {s64}, {s64})
1125 .customForCartesianProduct({p0}, {s8}, {s64})
1126 .immIdx(0); // Inform verifier imm idx 0 is handled.
1127
1128 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1129 .legalForCartesianProduct({p0}, {p0}, {s64})
1130 .immIdx(0); // Inform verifier imm idx 0 is handled.
1131
1132 // G_MEMCPY_INLINE does not have a tailcall immediate
1133 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1134 .legalForCartesianProduct({p0}, {p0}, {s64});
1135
1136 } else {
1137 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1138 .libcall();
1139 }
1140
1141 // FIXME: Legal vector types are only legal with NEON.
1143 .legalFor(HasCSSC, {s32, s64})
1144 .legalFor(PackedVectorAllTypeList)
1145 .customIf([=](const LegalityQuery &Q) {
1146 // TODO: Fix suboptimal codegen for 128+ bit types.
1147 LLT SrcTy = Q.Types[0];
1148 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1149 })
1150 .widenScalarIf(
1151 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1152 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1153 .widenScalarIf(
1154 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1155 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1156 .clampNumElements(0, v8s8, v16s8)
1157 .clampNumElements(0, v4s16, v8s16)
1158 .clampNumElements(0, v2s32, v4s32)
1159 .clampNumElements(0, v2s64, v2s64)
1161 .lower();
1162
1163 // For fadd reductions we have pairwise operations available. We treat the
1164 // usual legal types as legal and handle the lowering to pairwise instructions
1165 // later.
1166 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1167 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1168 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1169 .minScalarOrElt(0, MinFPScalar)
1170 .clampMaxNumElements(1, s64, 2)
1171 .clampMaxNumElements(1, s32, 4)
1172 .clampMaxNumElements(1, s16, 8)
1173 .lower();
1174
1175 // For fmul reductions we need to split up into individual operations. We
1176 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1177 // smaller types, followed by scalarizing what remains.
1178 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1179 .minScalarOrElt(0, MinFPScalar)
1180 .clampMaxNumElements(1, s64, 2)
1181 .clampMaxNumElements(1, s32, 4)
1182 .clampMaxNumElements(1, s16, 8)
1183 .clampMaxNumElements(1, s32, 2)
1184 .clampMaxNumElements(1, s16, 4)
1185 .scalarize(1)
1186 .lower();
1187
1188 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1189 .scalarize(2)
1190 .lower();
1191
1192 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1193 .legalFor({{s8, v16s8},
1194 {s8, v8s8},
1195 {s16, v8s16},
1196 {s16, v4s16},
1197 {s32, v4s32},
1198 {s32, v2s32},
1199 {s64, v2s64}})
1200 .clampMaxNumElements(1, s64, 2)
1201 .clampMaxNumElements(1, s32, 4)
1202 .clampMaxNumElements(1, s16, 8)
1203 .clampMaxNumElements(1, s8, 16)
1204 .lower();
1205
1206 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1207 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1208 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1209 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1210 .minScalarOrElt(0, MinFPScalar)
1211 .clampMaxNumElements(1, s64, 2)
1212 .clampMaxNumElements(1, s32, 4)
1213 .clampMaxNumElements(1, s16, 8)
1214 .lower();
1215
1216 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1217 .clampMaxNumElements(1, s32, 2)
1218 .clampMaxNumElements(1, s16, 4)
1219 .clampMaxNumElements(1, s8, 8)
1220 .scalarize(1)
1221 .lower();
1222
1224 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1225 .legalFor({{s8, v8s8},
1226 {s8, v16s8},
1227 {s16, v4s16},
1228 {s16, v8s16},
1229 {s32, v2s32},
1230 {s32, v4s32}})
1231 .moreElementsIf(
1232 [=](const LegalityQuery &Query) {
1233 return Query.Types[1].isVector() &&
1234 Query.Types[1].getElementType() != s8 &&
1235 Query.Types[1].getNumElements() & 1;
1236 },
1238 .clampMaxNumElements(1, s64, 2)
1239 .clampMaxNumElements(1, s32, 4)
1240 .clampMaxNumElements(1, s16, 8)
1241 .clampMaxNumElements(1, s8, 16)
1242 .scalarize(1)
1243 .lower();
1244
1246 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1247 // Try to break down into smaller vectors as long as they're at least 64
1248 // bits. This lets us use vector operations for some parts of the
1249 // reduction.
1250 .fewerElementsIf(
1251 [=](const LegalityQuery &Q) {
1252 LLT SrcTy = Q.Types[1];
1253 if (SrcTy.isScalar())
1254 return false;
1255 if (!isPowerOf2_32(SrcTy.getNumElements()))
1256 return false;
1257 // We can usually perform 64b vector operations.
1258 return SrcTy.getSizeInBits() > 64;
1259 },
1260 [=](const LegalityQuery &Q) {
1261 LLT SrcTy = Q.Types[1];
1262 return std::make_pair(1, SrcTy.divide(2));
1263 })
1264 .scalarize(1)
1265 .lower();
1266
1267 // TODO: Update this to correct handling when adding AArch64/SVE support.
1268 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1269
1270 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1271 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1272 .lower();
1273
1275 .legalFor({{s32, s64}, {s64, s64}})
1276 .customIf([=](const LegalityQuery &Q) {
1277 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1278 })
1279 .lower();
1281
1282 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1283 .customFor({{s32, s32}, {s64, s64}});
1284
1285 auto always = [=](const LegalityQuery &Q) { return true; };
1287 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
1288 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
1289 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
1290 .customFor({{s128, s128},
1291 {v2s64, v2s64},
1292 {v2s32, v2s32},
1293 {v4s32, v4s32},
1294 {v4s16, v4s16},
1295 {v8s16, v8s16}})
1296 .clampScalar(0, s32, s128)
1298 .minScalarEltSameAsIf(always, 1, 0)
1299 .maxScalarEltSameAsIf(always, 1, 0);
1300
1301 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1302 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1303 .legalFor(HasSVE, {nxv2s64, nxv4s32, nxv8s16, nxv16s8})
1304 .clampNumElements(0, v8s8, v16s8)
1305 .clampNumElements(0, v4s16, v8s16)
1306 .clampNumElements(0, v2s32, v4s32)
1307 .clampMaxNumElements(0, s64, 2)
1310 .lower();
1311
1312 // TODO: Libcall support for s128.
1313 // TODO: s16 should be legal with full FP16 support.
1314 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1315 .legalFor({{s64, s32}, {s64, s64}});
1316
1317 // TODO: Custom legalization for mismatched types.
1318 getActionDefinitionsBuilder(G_FCOPYSIGN)
1320 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1321 [=](const LegalityQuery &Query) {
1322 const LLT Ty = Query.Types[0];
1323 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1324 })
1325 .lower();
1326
1328
1329 // Access to floating-point environment.
1330 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1331 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1332 .libcall();
1333
1334 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1335
1336 getActionDefinitionsBuilder(G_PREFETCH).custom();
1337
1338 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1339
1340 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1341 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1343 .immIdx(0); // Inform verifier imm idx 0 is handled.
1344
1345 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1346 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1347 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1348
1350 verify(*ST.getInstrInfo());
1351}
1352
1355 LostDebugLocObserver &LocObserver) const {
1356 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1357 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1358 GISelChangeObserver &Observer = Helper.Observer;
1359 switch (MI.getOpcode()) {
1360 default:
1361 // No idea what to do.
1362 return false;
1363 case TargetOpcode::G_VAARG:
1364 return legalizeVaArg(MI, MRI, MIRBuilder);
1365 case TargetOpcode::G_LOAD:
1366 case TargetOpcode::G_STORE:
1367 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1368 case TargetOpcode::G_SHL:
1369 case TargetOpcode::G_ASHR:
1370 case TargetOpcode::G_LSHR:
1371 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1372 case TargetOpcode::G_GLOBAL_VALUE:
1373 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1374 case TargetOpcode::G_SBFX:
1375 case TargetOpcode::G_UBFX:
1376 return legalizeBitfieldExtract(MI, MRI, Helper);
1377 case TargetOpcode::G_FSHL:
1378 case TargetOpcode::G_FSHR:
1379 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1380 case TargetOpcode::G_ROTR:
1381 return legalizeRotate(MI, MRI, Helper);
1382 case TargetOpcode::G_CTPOP:
1383 return legalizeCTPOP(MI, MRI, Helper);
1384 case TargetOpcode::G_ATOMIC_CMPXCHG:
1385 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1386 case TargetOpcode::G_CTTZ:
1387 return legalizeCTTZ(MI, Helper);
1388 case TargetOpcode::G_BZERO:
1389 case TargetOpcode::G_MEMCPY:
1390 case TargetOpcode::G_MEMMOVE:
1391 case TargetOpcode::G_MEMSET:
1392 return legalizeMemOps(MI, Helper);
1393 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1394 return legalizeExtractVectorElt(MI, MRI, Helper);
1395 case TargetOpcode::G_DYN_STACKALLOC:
1396 return legalizeDynStackAlloc(MI, Helper);
1397 case TargetOpcode::G_PREFETCH:
1398 return legalizePrefetch(MI, Helper);
1399 case TargetOpcode::G_ABS:
1400 return Helper.lowerAbsToCNeg(MI);
1401 case TargetOpcode::G_ICMP:
1402 return legalizeICMP(MI, MRI, MIRBuilder);
1403 }
1404
1405 llvm_unreachable("expected switch to return");
1406}
1407
1408bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1410 MachineIRBuilder &MIRBuilder,
1411 GISelChangeObserver &Observer,
1412 LegalizerHelper &Helper) const {
1413 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1414 MI.getOpcode() == TargetOpcode::G_FSHR);
1415
1416 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1417 // lowering
1418 Register ShiftNo = MI.getOperand(3).getReg();
1419 LLT ShiftTy = MRI.getType(ShiftNo);
1420 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1421
1422 // Adjust shift amount according to Opcode (FSHL/FSHR)
1423 // Convert FSHL to FSHR
1424 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1425 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1426
1427 // Lower non-constant shifts and leave zero shifts to the optimizer.
1428 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1429 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1431
1432 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1433
1434 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1435
1436 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1437 // in the range of 0 <-> BitWidth, it is legal
1438 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1439 VRegAndVal->Value.ult(BitWidth))
1440 return true;
1441
1442 // Cast the ShiftNumber to a 64-bit type
1443 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1444
1445 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1446 Observer.changingInstr(MI);
1447 MI.getOperand(3).setReg(Cast64.getReg(0));
1448 Observer.changedInstr(MI);
1449 }
1450 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1451 // instruction
1452 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1453 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1454 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1455 Cast64.getReg(0)});
1456 MI.eraseFromParent();
1457 }
1458 return true;
1459}
1460
1461bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1463 MachineIRBuilder &MIRBuilder) const {
1464 Register DstReg = MI.getOperand(0).getReg();
1465 Register SrcReg1 = MI.getOperand(2).getReg();
1466 Register SrcReg2 = MI.getOperand(3).getReg();
1467 LLT DstTy = MRI.getType(DstReg);
1468 LLT SrcTy = MRI.getType(SrcReg1);
1469
1470 // Check the vector types are legal
1471 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1472 DstTy.getNumElements() != SrcTy.getNumElements() ||
1473 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1474 return false;
1475
1476 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1477 // following passes
1478 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1479 if (Pred != CmpInst::ICMP_NE)
1480 return true;
1481 Register CmpReg =
1482 MIRBuilder
1483 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1484 .getReg(0);
1485 MIRBuilder.buildNot(DstReg, CmpReg);
1486
1487 MI.eraseFromParent();
1488 return true;
1489}
1490
1491bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1493 LegalizerHelper &Helper) const {
1494 // To allow for imported patterns to match, we ensure that the rotate amount
1495 // is 64b with an extension.
1496 Register AmtReg = MI.getOperand(2).getReg();
1497 LLT AmtTy = MRI.getType(AmtReg);
1498 (void)AmtTy;
1499 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1500 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1501 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1502 Helper.Observer.changingInstr(MI);
1503 MI.getOperand(2).setReg(NewAmt.getReg(0));
1504 Helper.Observer.changedInstr(MI);
1505 return true;
1506}
1507
1508bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1510 GISelChangeObserver &Observer) const {
1511 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1512 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1513 // G_ADD_LOW instructions.
1514 // By splitting this here, we can optimize accesses in the small code model by
1515 // folding in the G_ADD_LOW into the load/store offset.
1516 auto &GlobalOp = MI.getOperand(1);
1517 // Don't modify an intrinsic call.
1518 if (GlobalOp.isSymbol())
1519 return true;
1520 const auto* GV = GlobalOp.getGlobal();
1521 if (GV->isThreadLocal())
1522 return true; // Don't want to modify TLS vars.
1523
1524 auto &TM = ST->getTargetLowering()->getTargetMachine();
1525 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1526
1527 if (OpFlags & AArch64II::MO_GOT)
1528 return true;
1529
1530 auto Offset = GlobalOp.getOffset();
1531 Register DstReg = MI.getOperand(0).getReg();
1532 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1533 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1534 // Set the regclass on the dest reg too.
1535 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1536
1537 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1538 // by creating a MOVK that sets bits 48-63 of the register to (global address
1539 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1540 // prevent an incorrect tag being generated during relocation when the
1541 // global appears before the code section. Without the offset, a global at
1542 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1543 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1544 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1545 // instead of `0xf`.
1546 // This assumes that we're in the small code model so we can assume a binary
1547 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1548 // binary must also be loaded into address range [0, 2^48). Both of these
1549 // properties need to be ensured at runtime when using tagged addresses.
1550 if (OpFlags & AArch64II::MO_TAGGED) {
1551 assert(!Offset &&
1552 "Should not have folded in an offset for a tagged global!");
1553 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1554 .addGlobalAddress(GV, 0x100000000,
1556 .addImm(48);
1557 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1558 }
1559
1560 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1561 .addGlobalAddress(GV, Offset,
1563 MI.eraseFromParent();
1564 return true;
1565}
1566
1568 MachineInstr &MI) const {
1569 auto LowerBinOp = [&MI](unsigned Opcode) {
1570 MachineIRBuilder MIB(MI);
1571 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1572 {MI.getOperand(2), MI.getOperand(3)});
1573 MI.eraseFromParent();
1574 return true;
1575 };
1576
1577 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1578 switch (IntrinsicID) {
1579 case Intrinsic::vacopy: {
1580 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1581 unsigned VaListSize =
1582 (ST->isTargetDarwin() || ST->isTargetWindows())
1583 ? PtrSize
1584 : ST->isTargetILP32() ? 20 : 32;
1585
1586 MachineFunction &MF = *MI.getMF();
1588 LLT::scalar(VaListSize * 8));
1589 MachineIRBuilder MIB(MI);
1590 MIB.buildLoad(Val, MI.getOperand(2),
1593 VaListSize, Align(PtrSize)));
1594 MIB.buildStore(Val, MI.getOperand(1),
1597 VaListSize, Align(PtrSize)));
1598 MI.eraseFromParent();
1599 return true;
1600 }
1601 case Intrinsic::get_dynamic_area_offset: {
1602 MachineIRBuilder &MIB = Helper.MIRBuilder;
1603 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1604 MI.eraseFromParent();
1605 return true;
1606 }
1607 case Intrinsic::aarch64_mops_memset_tag: {
1608 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1609 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1610 // the instruction).
1611 MachineIRBuilder MIB(MI);
1612 auto &Value = MI.getOperand(3);
1613 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1614 Value.setReg(ExtValueReg);
1615 return true;
1616 }
1617 case Intrinsic::aarch64_prefetch: {
1618 MachineIRBuilder MIB(MI);
1619 auto &AddrVal = MI.getOperand(1);
1620
1621 int64_t IsWrite = MI.getOperand(2).getImm();
1622 int64_t Target = MI.getOperand(3).getImm();
1623 int64_t IsStream = MI.getOperand(4).getImm();
1624 int64_t IsData = MI.getOperand(5).getImm();
1625
1626 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1627 (!IsData << 3) | // IsDataCache bit
1628 (Target << 1) | // Cache level bits
1629 (unsigned)IsStream; // Stream bit
1630
1631 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1632 MI.eraseFromParent();
1633 return true;
1634 }
1635 case Intrinsic::aarch64_neon_uaddv:
1636 case Intrinsic::aarch64_neon_saddv:
1637 case Intrinsic::aarch64_neon_umaxv:
1638 case Intrinsic::aarch64_neon_smaxv:
1639 case Intrinsic::aarch64_neon_uminv:
1640 case Intrinsic::aarch64_neon_sminv: {
1641 MachineIRBuilder MIB(MI);
1642 MachineRegisterInfo &MRI = *MIB.getMRI();
1643 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1644 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1645 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1646
1647 auto OldDst = MI.getOperand(0).getReg();
1648 auto OldDstTy = MRI.getType(OldDst);
1649 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1650 if (OldDstTy == NewDstTy)
1651 return true;
1652
1653 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1654
1655 Helper.Observer.changingInstr(MI);
1656 MI.getOperand(0).setReg(NewDst);
1657 Helper.Observer.changedInstr(MI);
1658
1659 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1660 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1661 OldDst, NewDst);
1662
1663 return true;
1664 }
1665 case Intrinsic::aarch64_neon_uaddlp:
1666 case Intrinsic::aarch64_neon_saddlp: {
1667 MachineIRBuilder MIB(MI);
1668
1669 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1670 ? AArch64::G_UADDLP
1671 : AArch64::G_SADDLP;
1672 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1673 MI.eraseFromParent();
1674
1675 return true;
1676 }
1677 case Intrinsic::aarch64_neon_uaddlv:
1678 case Intrinsic::aarch64_neon_saddlv: {
1679 MachineIRBuilder MIB(MI);
1680 MachineRegisterInfo &MRI = *MIB.getMRI();
1681
1682 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1683 ? AArch64::G_UADDLV
1684 : AArch64::G_SADDLV;
1685 Register DstReg = MI.getOperand(0).getReg();
1686 Register SrcReg = MI.getOperand(2).getReg();
1687 LLT DstTy = MRI.getType(DstReg);
1688
1689 LLT MidTy, ExtTy;
1690 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1691 MidTy = LLT::fixed_vector(4, 32);
1692 ExtTy = LLT::scalar(32);
1693 } else {
1694 MidTy = LLT::fixed_vector(2, 64);
1695 ExtTy = LLT::scalar(64);
1696 }
1697
1698 Register MidReg =
1699 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1700 Register ZeroReg =
1701 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1702 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1703 {MidReg, ZeroReg})
1704 .getReg(0);
1705
1706 if (DstTy.getScalarSizeInBits() < 32)
1707 MIB.buildTrunc(DstReg, ExtReg);
1708 else
1709 MIB.buildCopy(DstReg, ExtReg);
1710
1711 MI.eraseFromParent();
1712
1713 return true;
1714 }
1715 case Intrinsic::aarch64_neon_smax:
1716 return LowerBinOp(TargetOpcode::G_SMAX);
1717 case Intrinsic::aarch64_neon_smin:
1718 return LowerBinOp(TargetOpcode::G_SMIN);
1719 case Intrinsic::aarch64_neon_umax:
1720 return LowerBinOp(TargetOpcode::G_UMAX);
1721 case Intrinsic::aarch64_neon_umin:
1722 return LowerBinOp(TargetOpcode::G_UMIN);
1723 case Intrinsic::aarch64_neon_fmax:
1724 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1725 case Intrinsic::aarch64_neon_fmin:
1726 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1727 case Intrinsic::aarch64_neon_fmaxnm:
1728 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1729 case Intrinsic::aarch64_neon_fminnm:
1730 return LowerBinOp(TargetOpcode::G_FMINNUM);
1731 case Intrinsic::aarch64_neon_smull:
1732 return LowerBinOp(AArch64::G_SMULL);
1733 case Intrinsic::aarch64_neon_umull:
1734 return LowerBinOp(AArch64::G_UMULL);
1735 case Intrinsic::aarch64_neon_abs: {
1736 // Lower the intrinsic to G_ABS.
1737 MachineIRBuilder MIB(MI);
1738 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1739 MI.eraseFromParent();
1740 return true;
1741 }
1742
1743 case Intrinsic::vector_reverse:
1744 // TODO: Add support for vector_reverse
1745 return false;
1746 }
1747
1748 return true;
1749}
1750
1751bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1753 GISelChangeObserver &Observer) const {
1754 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1755 MI.getOpcode() == TargetOpcode::G_LSHR ||
1756 MI.getOpcode() == TargetOpcode::G_SHL);
1757 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1758 // imported patterns can select it later. Either way, it will be legal.
1759 Register AmtReg = MI.getOperand(2).getReg();
1760 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1761 if (!VRegAndVal)
1762 return true;
1763 // Check the shift amount is in range for an immediate form.
1764 int64_t Amount = VRegAndVal->Value.getSExtValue();
1765 if (Amount > 31)
1766 return true; // This will have to remain a register variant.
1767 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1768 Observer.changingInstr(MI);
1769 MI.getOperand(2).setReg(ExtCst.getReg(0));
1770 Observer.changedInstr(MI);
1771 return true;
1772}
1773
1776 Base = Root;
1777 Offset = 0;
1778
1779 Register NewBase;
1780 int64_t NewOffset;
1781 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1782 isShiftedInt<7, 3>(NewOffset)) {
1783 Base = NewBase;
1784 Offset = NewOffset;
1785 }
1786}
1787
1788// FIXME: This should be removed and replaced with the generic bitcast legalize
1789// action.
1790bool AArch64LegalizerInfo::legalizeLoadStore(
1792 GISelChangeObserver &Observer) const {
1793 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1794 MI.getOpcode() == TargetOpcode::G_LOAD);
1795 // Here we just try to handle vector loads/stores where our value type might
1796 // have pointer elements, which the SelectionDAG importer can't handle. To
1797 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1798 // the value to use s64 types.
1799
1800 // Custom legalization requires the instruction, if not deleted, must be fully
1801 // legalized. In order to allow further legalization of the inst, we create
1802 // a new instruction and erase the existing one.
1803
1804 Register ValReg = MI.getOperand(0).getReg();
1805 const LLT ValTy = MRI.getType(ValReg);
1806
1807 if (ValTy == LLT::scalar(128)) {
1808
1809 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1810 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1811 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1812 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1813 bool IsRcpC3 =
1814 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1815
1816 LLT s64 = LLT::scalar(64);
1817
1818 unsigned Opcode;
1819 if (IsRcpC3) {
1820 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1821 } else {
1822 // For LSE2, loads/stores should have been converted to monotonic and had
1823 // a fence inserted after them.
1824 assert(Ordering == AtomicOrdering::Monotonic ||
1825 Ordering == AtomicOrdering::Unordered);
1826 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1827
1828 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1829 }
1830
1832 if (IsLoad) {
1833 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1834 MIRBuilder.buildMergeLikeInstr(
1835 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1836 } else {
1837 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1838 NewI = MIRBuilder.buildInstr(
1839 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1840 }
1841
1842 if (IsRcpC3) {
1843 NewI.addUse(MI.getOperand(1).getReg());
1844 } else {
1845 Register Base;
1846 int Offset;
1847 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1848 NewI.addUse(Base);
1849 NewI.addImm(Offset / 8);
1850 }
1851
1852 NewI.cloneMemRefs(MI);
1854 *MRI.getTargetRegisterInfo(),
1855 *ST->getRegBankInfo());
1856 MI.eraseFromParent();
1857 return true;
1858 }
1859
1860 if (!ValTy.isPointerVector() ||
1861 ValTy.getElementType().getAddressSpace() != 0) {
1862 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1863 return false;
1864 }
1865
1866 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1867 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1868 auto &MMO = **MI.memoperands_begin();
1869 MMO.setType(NewTy);
1870
1871 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1872 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1873 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1874 } else {
1875 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1876 MIRBuilder.buildBitcast(ValReg, NewLoad);
1877 }
1878 MI.eraseFromParent();
1879 return true;
1880}
1881
1882bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1884 MachineIRBuilder &MIRBuilder) const {
1885 MachineFunction &MF = MIRBuilder.getMF();
1886 Align Alignment(MI.getOperand(2).getImm());
1887 Register Dst = MI.getOperand(0).getReg();
1888 Register ListPtr = MI.getOperand(1).getReg();
1889
1890 LLT PtrTy = MRI.getType(ListPtr);
1891 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1892
1893 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1894 const Align PtrAlign = Align(PtrSize);
1895 auto List = MIRBuilder.buildLoad(
1896 PtrTy, ListPtr,
1898 PtrTy, PtrAlign));
1899
1900 MachineInstrBuilder DstPtr;
1901 if (Alignment > PtrAlign) {
1902 // Realign the list to the actual required alignment.
1903 auto AlignMinus1 =
1904 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1905 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1906 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1907 } else
1908 DstPtr = List;
1909
1910 LLT ValTy = MRI.getType(Dst);
1911 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1912 MIRBuilder.buildLoad(
1913 Dst, DstPtr,
1915 ValTy, std::max(Alignment, PtrAlign)));
1916
1917 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1918
1919 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1920
1921 MIRBuilder.buildStore(NewList, ListPtr,
1924 PtrTy, PtrAlign));
1925
1926 MI.eraseFromParent();
1927 return true;
1928}
1929
1930bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1932 // Only legal if we can select immediate forms.
1933 // TODO: Lower this otherwise.
1934 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1935 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1936}
1937
1938bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1940 LegalizerHelper &Helper) const {
1941 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1942 // it can be more efficiently lowered to the following sequence that uses
1943 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1944 // registers are cheap.
1945 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1946 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1947 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1948 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1949 //
1950 // For 128 bit vector popcounts, we lower to the following sequence:
1951 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1952 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1953 // uaddlp.4s v0, v0 // v4s32, v2s64
1954 // uaddlp.2d v0, v0 // v2s64
1955 //
1956 // For 64 bit vector popcounts, we lower to the following sequence:
1957 // cnt.8b v0, v0 // v4s16, v2s32
1958 // uaddlp.4h v0, v0 // v4s16, v2s32
1959 // uaddlp.2s v0, v0 // v2s32
1960
1961 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1962 Register Dst = MI.getOperand(0).getReg();
1963 Register Val = MI.getOperand(1).getReg();
1964 LLT Ty = MRI.getType(Val);
1965 unsigned Size = Ty.getSizeInBits();
1966
1967 assert(Ty == MRI.getType(Dst) &&
1968 "Expected src and dst to have the same type!");
1969
1970 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1971 LLT s64 = LLT::scalar(64);
1972
1973 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1974 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1975 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1976 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1977
1978 MIRBuilder.buildZExt(Dst, Add);
1979 MI.eraseFromParent();
1980 return true;
1981 }
1982
1983 if (!ST->hasNEON() ||
1984 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1985 // Use generic lowering when custom lowering is not possible.
1986 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1987 Helper.lowerBitCount(MI) ==
1989 }
1990
1991 // Pre-conditioning: widen Val up to the nearest vector type.
1992 // s32,s64,v4s16,v2s32 -> v8i8
1993 // v8s16,v4s32,v2s64 -> v16i8
1994 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1995 if (Ty.isScalar()) {
1996 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1997 if (Size == 32) {
1998 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1999 }
2000 }
2001 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2002
2003 // Count bits in each byte-sized lane.
2004 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2005
2006 // Sum across lanes.
2007
2008 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2009 Ty.getScalarSizeInBits() != 16) {
2010 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2011 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2012 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2014
2015 if (Ty == LLT::fixed_vector(2, 64)) {
2016 auto UDOT =
2017 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2018 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2019 } else if (Ty == LLT::fixed_vector(4, 32)) {
2020 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2021 } else if (Ty == LLT::fixed_vector(2, 32)) {
2022 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2023 } else {
2024 llvm_unreachable("unexpected vector shape");
2025 }
2026
2027 Sum->getOperand(0).setReg(Dst);
2028 MI.eraseFromParent();
2029 return true;
2030 }
2031
2032 Register HSum = CTPOP.getReg(0);
2033 unsigned Opc;
2034 SmallVector<LLT> HAddTys;
2035 if (Ty.isScalar()) {
2036 Opc = Intrinsic::aarch64_neon_uaddlv;
2037 HAddTys.push_back(LLT::scalar(32));
2038 } else if (Ty == LLT::fixed_vector(8, 16)) {
2039 Opc = Intrinsic::aarch64_neon_uaddlp;
2040 HAddTys.push_back(LLT::fixed_vector(8, 16));
2041 } else if (Ty == LLT::fixed_vector(4, 32)) {
2042 Opc = Intrinsic::aarch64_neon_uaddlp;
2043 HAddTys.push_back(LLT::fixed_vector(8, 16));
2044 HAddTys.push_back(LLT::fixed_vector(4, 32));
2045 } else if (Ty == LLT::fixed_vector(2, 64)) {
2046 Opc = Intrinsic::aarch64_neon_uaddlp;
2047 HAddTys.push_back(LLT::fixed_vector(8, 16));
2048 HAddTys.push_back(LLT::fixed_vector(4, 32));
2049 HAddTys.push_back(LLT::fixed_vector(2, 64));
2050 } else if (Ty == LLT::fixed_vector(4, 16)) {
2051 Opc = Intrinsic::aarch64_neon_uaddlp;
2052 HAddTys.push_back(LLT::fixed_vector(4, 16));
2053 } else if (Ty == LLT::fixed_vector(2, 32)) {
2054 Opc = Intrinsic::aarch64_neon_uaddlp;
2055 HAddTys.push_back(LLT::fixed_vector(4, 16));
2056 HAddTys.push_back(LLT::fixed_vector(2, 32));
2057 } else
2058 llvm_unreachable("unexpected vector shape");
2060 for (LLT HTy : HAddTys) {
2061 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2062 HSum = UADD.getReg(0);
2063 }
2064
2065 // Post-conditioning.
2066 if (Ty.isScalar() && (Size == 64 || Size == 128))
2067 MIRBuilder.buildZExt(Dst, UADD);
2068 else
2069 UADD->getOperand(0).setReg(Dst);
2070 MI.eraseFromParent();
2071 return true;
2072}
2073
2074bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2076 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2077 LLT s64 = LLT::scalar(64);
2078 auto Addr = MI.getOperand(1).getReg();
2079 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2080 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2081 auto DstLo = MRI.createGenericVirtualRegister(s64);
2082 auto DstHi = MRI.createGenericVirtualRegister(s64);
2083
2085 if (ST->hasLSE()) {
2086 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2087 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2088 // the rest of the MIR so we must reassemble the extracted registers into a
2089 // 128-bit known-regclass one with code like this:
2090 //
2091 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2092 // %out = CASP %in1, ...
2093 // %OldLo = G_EXTRACT %out, 0
2094 // %OldHi = G_EXTRACT %out, 64
2095 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2096 unsigned Opcode;
2097 switch (Ordering) {
2099 Opcode = AArch64::CASPAX;
2100 break;
2102 Opcode = AArch64::CASPLX;
2103 break;
2106 Opcode = AArch64::CASPALX;
2107 break;
2108 default:
2109 Opcode = AArch64::CASPX;
2110 break;
2111 }
2112
2113 LLT s128 = LLT::scalar(128);
2114 auto CASDst = MRI.createGenericVirtualRegister(s128);
2115 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2116 auto CASNew = MRI.createGenericVirtualRegister(s128);
2117 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2118 .addUse(DesiredI->getOperand(0).getReg())
2119 .addImm(AArch64::sube64)
2120 .addUse(DesiredI->getOperand(1).getReg())
2121 .addImm(AArch64::subo64);
2122 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2123 .addUse(NewI->getOperand(0).getReg())
2124 .addImm(AArch64::sube64)
2125 .addUse(NewI->getOperand(1).getReg())
2126 .addImm(AArch64::subo64);
2127
2128 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2129
2130 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2131 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2132 } else {
2133 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2134 // can take arbitrary registers so it just has the normal GPR64 operands the
2135 // rest of AArch64 is expecting.
2136 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2137 unsigned Opcode;
2138 switch (Ordering) {
2140 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2141 break;
2143 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2144 break;
2147 Opcode = AArch64::CMP_SWAP_128;
2148 break;
2149 default:
2150 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2151 break;
2152 }
2153
2154 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2155 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2156 {Addr, DesiredI->getOperand(0),
2157 DesiredI->getOperand(1), NewI->getOperand(0),
2158 NewI->getOperand(1)});
2159 }
2160
2161 CAS.cloneMemRefs(MI);
2163 *MRI.getTargetRegisterInfo(),
2164 *ST->getRegBankInfo());
2165
2166 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2167 MI.eraseFromParent();
2168 return true;
2169}
2170
2171bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2172 LegalizerHelper &Helper) const {
2173 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2174 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2175 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2176 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2177 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2178 MI.eraseFromParent();
2179 return true;
2180}
2181
2182bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2183 LegalizerHelper &Helper) const {
2184 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2185
2186 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2187 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2188 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2189 // the instruction).
2190 auto &Value = MI.getOperand(1);
2191 Register ExtValueReg =
2192 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2193 Value.setReg(ExtValueReg);
2194 return true;
2195 }
2196
2197 return false;
2198}
2199
2200bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2202 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2203 auto VRegAndVal =
2205 if (VRegAndVal)
2206 return true;
2207 LLT VecTy = MRI.getType(Element->getVectorReg());
2208 if (VecTy.isScalableVector())
2209 return true;
2210 return Helper.lowerExtractInsertVectorElt(MI) !=
2212}
2213
2214bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2215 MachineInstr &MI, LegalizerHelper &Helper) const {
2216 MachineFunction &MF = *MI.getParent()->getParent();
2217 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2218 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2219
2220 // If stack probing is not enabled for this function, use the default
2221 // lowering.
2222 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2223 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2224 "inline-asm") {
2225 Helper.lowerDynStackAlloc(MI);
2226 return true;
2227 }
2228
2229 Register Dst = MI.getOperand(0).getReg();
2230 Register AllocSize = MI.getOperand(1).getReg();
2231 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2232
2233 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2234 "Unexpected type for dynamic alloca");
2235 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2236 "Unexpected type for dynamic alloca");
2237
2238 LLT PtrTy = MRI.getType(Dst);
2239 Register SPReg =
2241 Register SPTmp =
2242 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2243 auto NewMI =
2244 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2245 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2246 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2247 MIRBuilder.buildCopy(Dst, SPTmp);
2248
2249 MI.eraseFromParent();
2250 return true;
2251}
2252
2253bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2254 LegalizerHelper &Helper) const {
2255 MachineIRBuilder &MIB = Helper.MIRBuilder;
2256 auto &AddrVal = MI.getOperand(0);
2257
2258 int64_t IsWrite = MI.getOperand(1).getImm();
2259 int64_t Locality = MI.getOperand(2).getImm();
2260 int64_t IsData = MI.getOperand(3).getImm();
2261
2262 bool IsStream = Locality == 0;
2263 if (Locality != 0) {
2264 assert(Locality <= 3 && "Prefetch locality out-of-range");
2265 // The locality degree is the opposite of the cache speed.
2266 // Put the number the other way around.
2267 // The encoding starts at 0 for level 1
2268 Locality = 3 - Locality;
2269 }
2270
2271 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2272
2273 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2274 MI.eraseFromParent();
2275 return true;
2276}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr Register SPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Represents an extract vector element.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:218
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:234
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:418
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...