LLVM 19.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
65 v16s8, v8s16, v4s32,
66 v2s64, v2p0,
67 /* End 128bit types */
68 /* Begin 64bit types */
69 v8s8, v4s16, v2s32};
70 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
71 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
72 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
73
74 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
75
76 // FIXME: support subtargets which have neon/fp-armv8 disabled.
77 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
79 return;
80 }
81
82 // Some instructions only support s16 if the subtarget has full 16-bit FP
83 // support.
84 const bool HasFP16 = ST.hasFullFP16();
85 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
86
87 const bool HasCSSC = ST.hasCSSC();
88 const bool HasRCPC3 = ST.hasRCPC3();
89
91 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
92 .legalFor({p0, s8, s16, s32, s64})
93 .legalFor(PackedVectorAllTypeList)
95 .clampScalar(0, s8, s64)
98 .clampNumElements(0, v8s8, v16s8)
99 .clampNumElements(0, v4s16, v8s16)
100 .clampNumElements(0, v2s32, v4s32)
101 .clampNumElements(0, v2s64, v2s64);
102
104 .legalFor({p0, s16, s32, s64})
105 .legalFor(PackedVectorAllTypeList)
107 .clampScalar(0, s16, s64)
108 // Maximum: sN * k = 128
109 .clampMaxNumElements(0, s8, 16)
110 .clampMaxNumElements(0, s16, 8)
111 .clampMaxNumElements(0, s32, 4)
112 .clampMaxNumElements(0, s64, 2)
113 .clampMaxNumElements(0, p0, 2);
114
116 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
118 .clampScalar(0, s32, s64)
119 .clampNumElements(0, v4s16, v8s16)
120 .clampNumElements(0, v2s32, v4s32)
121 .clampNumElements(0, v2s64, v2s64)
122 .moreElementsToNextPow2(0);
123
124 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
125 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
126 .widenScalarToNextPow2(0)
127 .clampScalar(0, s32, s64)
128 .clampMaxNumElements(0, s8, 16)
129 .clampMaxNumElements(0, s16, 8)
130 .clampNumElements(0, v2s32, v4s32)
131 .clampNumElements(0, v2s64, v2s64)
133 [=](const LegalityQuery &Query) {
134 return Query.Types[0].getNumElements() <= 2;
135 },
136 0, s32)
137 .minScalarOrEltIf(
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 4;
140 },
141 0, s16)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 16;
145 },
146 0, s8)
148
149 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
150 .customIf([=](const LegalityQuery &Query) {
151 const auto &SrcTy = Query.Types[0];
152 const auto &AmtTy = Query.Types[1];
153 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
154 AmtTy.getSizeInBits() == 32;
155 })
156 .legalFor({
157 {s32, s32},
158 {s32, s64},
159 {s64, s64},
160 {v8s8, v8s8},
161 {v16s8, v16s8},
162 {v4s16, v4s16},
163 {v8s16, v8s16},
164 {v2s32, v2s32},
165 {v4s32, v4s32},
166 {v2s64, v2s64},
167 })
168 .widenScalarToNextPow2(0)
169 .clampScalar(1, s32, s64)
170 .clampScalar(0, s32, s64)
171 .clampNumElements(0, v8s8, v16s8)
172 .clampNumElements(0, v4s16, v8s16)
173 .clampNumElements(0, v2s32, v4s32)
174 .clampNumElements(0, v2s64, v2s64)
176 .minScalarSameAs(1, 0);
177
179 .legalFor({{p0, s64}, {v2p0, v2s64}})
180 .clampScalar(1, s64, s64);
181
182 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
183
184 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
185 .legalFor({s32, s64})
186 .libcallFor({s128})
187 .clampScalar(0, s32, s64)
189 .scalarize(0);
190
191 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
192 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
194 .clampScalarOrElt(0, s32, s64)
195 .clampNumElements(0, v2s32, v4s32)
196 .clampNumElements(0, v2s64, v2s64)
197 .moreElementsToNextPow2(0);
198
199
200 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
201 .widenScalarToNextPow2(0, /*Min = */ 32)
202 .clampScalar(0, s32, s64)
203 .lower();
204
205 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
206 .legalFor({s64, v8s16, v16s8, v4s32})
207 .lower();
208
209 auto &MinMaxActions = getActionDefinitionsBuilder(
210 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
211 if (HasCSSC)
212 MinMaxActions
213 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
214 // Making clamping conditional on CSSC extension as without legal types we
215 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
216 // if we detect a type smaller than 32-bit.
217 .minScalar(0, s32);
218 else
219 MinMaxActions
220 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
221 MinMaxActions
222 .clampNumElements(0, v8s8, v16s8)
223 .clampNumElements(0, v4s16, v8s16)
224 .clampNumElements(0, v2s32, v4s32)
225 // FIXME: This sholdn't be needed as v2s64 types are going to
226 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
227 .clampNumElements(0, v2s64, v2s64)
228 .lower();
229
231 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
232 .legalFor({{s32, s32}, {s64, s32}})
233 .clampScalar(0, s32, s64)
234 .clampScalar(1, s32, s64)
236
237 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
238 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
239 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
240 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
241 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
242 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
243 .legalIf([=](const LegalityQuery &Query) {
244 const auto &Ty = Query.Types[0];
245 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
246 })
247 .libcallFor({s128})
248 .minScalarOrElt(0, MinFPScalar)
249 .clampNumElements(0, v4s16, v8s16)
250 .clampNumElements(0, v2s32, v4s32)
251 .clampNumElements(0, v2s64, v2s64)
253
255 .libcallFor({s32, s64})
256 .minScalar(0, s32)
257 .scalarize(0);
258
259 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
260 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
261 .libcallFor({{s64, s128}})
262 .minScalarOrElt(1, MinFPScalar);
263
265 {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10,
266 G_FEXP, G_FEXP2, G_FEXP10})
267 // We need a call for these, so we always need to scalarize.
268 .scalarize(0)
269 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
270 .minScalar(0, s32)
271 .libcallFor({s32, s64});
273 .scalarize(0)
274 .minScalar(0, s32)
275 .libcallFor({{s32, s32}, {s64, s32}});
276
278 .legalIf(all(typeInSet(0, {s32, s64, p0}),
279 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
281 .clampScalar(0, s32, s64)
283 .minScalar(1, s8)
284 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
285 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
286
288 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
289 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
291 .clampScalar(1, s32, s128)
293 .minScalar(0, s16)
294 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
295 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
296 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
297
298
299 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
300 auto &Actions = getActionDefinitionsBuilder(Op);
301
302 if (Op == G_SEXTLOAD)
304
305 // Atomics have zero extending behavior.
306 Actions
307 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
308 {s32, p0, s16, 8},
309 {s32, p0, s32, 8},
310 {s64, p0, s8, 2},
311 {s64, p0, s16, 2},
312 {s64, p0, s32, 4},
313 {s64, p0, s64, 8},
314 {p0, p0, s64, 8},
315 {v2s32, p0, s64, 8}})
316 .widenScalarToNextPow2(0)
317 .clampScalar(0, s32, s64)
318 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
319 // how to do that yet.
320 .unsupportedIfMemSizeNotPow2()
321 // Lower anything left over into G_*EXT and G_LOAD
322 .lower();
323 }
324
325 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
326 const LLT &ValTy = Query.Types[0];
327 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
328 };
329
331 .customIf([=](const LegalityQuery &Query) {
332 return HasRCPC3 && Query.Types[0] == s128 &&
333 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
334 })
335 .customIf([=](const LegalityQuery &Query) {
336 return Query.Types[0] == s128 &&
337 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
338 })
339 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
340 {s16, p0, s16, 8},
341 {s32, p0, s32, 8},
342 {s64, p0, s64, 8},
343 {p0, p0, s64, 8},
344 {s128, p0, s128, 8},
345 {v8s8, p0, s64, 8},
346 {v16s8, p0, s128, 8},
347 {v4s16, p0, s64, 8},
348 {v8s16, p0, s128, 8},
349 {v2s32, p0, s64, 8},
350 {v4s32, p0, s128, 8},
351 {v2s64, p0, s128, 8}})
352 // These extends are also legal
353 .legalForTypesWithMemDesc(
354 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
355 .widenScalarToNextPow2(0, /* MinSize = */ 8)
356 .clampMaxNumElements(0, s8, 16)
357 .clampMaxNumElements(0, s16, 8)
358 .clampMaxNumElements(0, s32, 4)
359 .clampMaxNumElements(0, s64, 2)
360 .clampMaxNumElements(0, p0, 2)
362 .clampScalar(0, s8, s64)
364 [=](const LegalityQuery &Query) {
365 // Clamp extending load results to 32-bits.
366 return Query.Types[0].isScalar() &&
367 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
368 Query.Types[0].getSizeInBits() > 32;
369 },
370 changeTo(0, s32))
371 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
372 .bitcastIf(typeInSet(0, {v4s8}),
373 [=](const LegalityQuery &Query) {
374 const LLT VecTy = Query.Types[0];
375 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
376 })
377 .customIf(IsPtrVecPred)
378 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
379
381 .customIf([=](const LegalityQuery &Query) {
382 return HasRCPC3 && Query.Types[0] == s128 &&
383 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
384 })
385 .customIf([=](const LegalityQuery &Query) {
386 return Query.Types[0] == s128 &&
387 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
388 })
389 .legalForTypesWithMemDesc(
390 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
391 {s32, p0, s8, 8}, // truncstorei8 from s32
392 {s64, p0, s8, 8}, // truncstorei8 from s64
393 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
394 {s64, p0, s16, 8}, // truncstorei16 from s64
395 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
396 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
397 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
398 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
399 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
400 .clampScalar(0, s8, s64)
401 .lowerIf([=](const LegalityQuery &Query) {
402 return Query.Types[0].isScalar() &&
403 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
404 })
405 // Maximum: sN * k = 128
406 .clampMaxNumElements(0, s8, 16)
407 .clampMaxNumElements(0, s16, 8)
408 .clampMaxNumElements(0, s32, 4)
409 .clampMaxNumElements(0, s64, 2)
410 .clampMaxNumElements(0, p0, 2)
412 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
413 .bitcastIf(typeInSet(0, {v4s8}),
414 [=](const LegalityQuery &Query) {
415 const LLT VecTy = Query.Types[0];
416 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
417 })
418 .customIf(IsPtrVecPred)
419 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
420
421 getActionDefinitionsBuilder(G_INDEXED_STORE)
422 // Idx 0 == Ptr, Idx 1 == Val
423 // TODO: we can implement legalizations but as of now these are
424 // generated in a very specific way.
426 {p0, s8, s8, 8},
427 {p0, s16, s16, 8},
428 {p0, s32, s8, 8},
429 {p0, s32, s16, 8},
430 {p0, s32, s32, 8},
431 {p0, s64, s64, 8},
432 {p0, p0, p0, 8},
433 {p0, v8s8, v8s8, 8},
434 {p0, v16s8, v16s8, 8},
435 {p0, v4s16, v4s16, 8},
436 {p0, v8s16, v8s16, 8},
437 {p0, v2s32, v2s32, 8},
438 {p0, v4s32, v4s32, 8},
439 {p0, v2s64, v2s64, 8},
440 {p0, v2p0, v2p0, 8},
441 {p0, s128, s128, 8},
442 })
443 .unsupported();
444
445 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
446 LLT LdTy = Query.Types[0];
447 LLT PtrTy = Query.Types[1];
448 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
449 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
450 return false;
451 if (PtrTy != p0)
452 return false;
453 return true;
454 };
455 getActionDefinitionsBuilder(G_INDEXED_LOAD)
458 .legalIf(IndexedLoadBasicPred)
459 .unsupported();
460 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
461 .unsupportedIf(
463 .legalIf(all(typeInSet(0, {s16, s32, s64}),
464 LegalityPredicate([=](const LegalityQuery &Q) {
465 LLT LdTy = Q.Types[0];
466 LLT PtrTy = Q.Types[1];
467 LLT MemTy = Q.MMODescrs[0].MemoryTy;
468 if (PtrTy != p0)
469 return false;
470 if (LdTy == s16)
471 return MemTy == s8;
472 if (LdTy == s32)
473 return MemTy == s8 || MemTy == s16;
474 if (LdTy == s64)
475 return MemTy == s8 || MemTy == s16 || MemTy == s32;
476 return false;
477 })))
478 .unsupported();
479
480 // Constants
482 .legalFor({p0, s8, s16, s32, s64})
483 .widenScalarToNextPow2(0)
484 .clampScalar(0, s8, s64);
485 getActionDefinitionsBuilder(G_FCONSTANT)
486 .legalIf([=](const LegalityQuery &Query) {
487 const auto &Ty = Query.Types[0];
488 if (HasFP16 && Ty == s16)
489 return true;
490 return Ty == s32 || Ty == s64 || Ty == s128;
491 })
492 .clampScalar(0, MinFPScalar, s128);
493
494 // FIXME: fix moreElementsToNextPow2
496 .legalFor({{s32, s32},
497 {s32, s64},
498 {s32, p0},
499 {v4s32, v4s32},
500 {v2s32, v2s32},
501 {v2s64, v2s64},
502 {v2s64, v2p0},
503 {v4s16, v4s16},
504 {v8s16, v8s16},
505 {v8s8, v8s8},
506 {v16s8, v16s8}})
508 .clampScalar(1, s32, s64)
509 .clampScalar(0, s32, s32)
510 .minScalarEltSameAsIf(
511 [=](const LegalityQuery &Query) {
512 const LLT &Ty = Query.Types[0];
513 const LLT &SrcTy = Query.Types[1];
514 return Ty.isVector() && !SrcTy.isPointerVector() &&
515 Ty.getElementType() != SrcTy.getElementType();
516 },
517 0, 1)
518 .minScalarOrEltIf(
519 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
520 1, s32)
521 .minScalarOrEltIf(
522 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
523 s64)
525 .clampNumElements(1, v8s8, v16s8)
526 .clampNumElements(1, v4s16, v8s16)
527 .clampNumElements(1, v2s32, v4s32)
528 .clampNumElements(1, v2s64, v2s64);
529
531 .legalFor({{s32, MinFPScalar},
532 {s32, s32},
533 {s32, s64},
534 {v4s32, v4s32},
535 {v2s32, v2s32},
536 {v2s64, v2s64}})
537 .legalIf([=](const LegalityQuery &Query) {
538 const auto &Ty = Query.Types[1];
539 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
540 })
542 .clampScalar(0, s32, s32)
543 .clampScalarOrElt(1, MinFPScalar, s64)
544 .minScalarEltSameAsIf(
545 [=](const LegalityQuery &Query) {
546 const LLT &Ty = Query.Types[0];
547 const LLT &SrcTy = Query.Types[1];
548 return Ty.isVector() && !SrcTy.isPointerVector() &&
549 Ty.getElementType() != SrcTy.getElementType();
550 },
551 0, 1)
552 .clampNumElements(1, v4s16, v8s16)
553 .clampNumElements(1, v2s32, v4s32)
554 .clampMaxNumElements(1, s64, 2)
555 .moreElementsToNextPow2(1);
556
557 // Extensions
558 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
559 unsigned DstSize = Query.Types[0].getSizeInBits();
560
561 // Handle legal vectors using legalFor
562 if (Query.Types[0].isVector())
563 return false;
564
565 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
566 return false; // Extending to a scalar s128 needs narrowing.
567
568 const LLT &SrcTy = Query.Types[1];
569
570 // Make sure we fit in a register otherwise. Don't bother checking that
571 // the source type is below 128 bits. We shouldn't be allowing anything
572 // through which is wider than the destination in the first place.
573 unsigned SrcSize = SrcTy.getSizeInBits();
574 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
575 return false;
576
577 return true;
578 };
579 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
580 .legalIf(ExtLegalFunc)
581 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
582 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
584 .clampMaxNumElements(1, s8, 8)
585 .clampMaxNumElements(1, s16, 4)
586 .clampMaxNumElements(1, s32, 2)
587 // Tries to convert a large EXTEND into two smaller EXTENDs
588 .lowerIf([=](const LegalityQuery &Query) {
589 return (Query.Types[0].getScalarSizeInBits() >
590 Query.Types[1].getScalarSizeInBits() * 2) &&
591 Query.Types[0].isVector() &&
592 (Query.Types[1].getScalarSizeInBits() == 8 ||
593 Query.Types[1].getScalarSizeInBits() == 16);
594 })
595 .clampMinNumElements(1, s8, 8)
596 .clampMinNumElements(1, s16, 4);
597
599 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
601 .clampMaxNumElements(0, s8, 8)
602 .clampMaxNumElements(0, s16, 4)
603 .clampMaxNumElements(0, s32, 2)
604 .minScalarOrEltIf(
605 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
606 0, s8)
607 .lowerIf([=](const LegalityQuery &Query) {
608 LLT DstTy = Query.Types[0];
609 LLT SrcTy = Query.Types[1];
610 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
611 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
612 })
613 .clampMinNumElements(0, s8, 8)
614 .clampMinNumElements(0, s16, 4)
615 .alwaysLegal();
616
617 getActionDefinitionsBuilder(G_SEXT_INREG)
618 .legalFor({s32, s64})
619 .legalFor(PackedVectorAllTypeList)
620 .maxScalar(0, s64)
621 .clampNumElements(0, v8s8, v16s8)
622 .clampNumElements(0, v4s16, v8s16)
623 .clampNumElements(0, v2s32, v4s32)
624 .clampMaxNumElements(0, s64, 2)
625 .lower();
626
627 // FP conversions
629 .legalFor(
630 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
631 .clampNumElements(0, v4s16, v4s16)
632 .clampNumElements(0, v2s32, v2s32)
633 .scalarize(0);
634
636 .legalFor(
637 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
638 .clampNumElements(0, v4s32, v4s32)
639 .clampNumElements(0, v2s64, v2s64)
640 .scalarize(0);
641
642 // Conversions
643 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
644 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
645 .legalIf([=](const LegalityQuery &Query) {
646 return HasFP16 &&
647 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
648 Query.Types[1] == v8s16) &&
649 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
650 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
651 })
652 .widenScalarToNextPow2(0)
653 .clampScalar(0, s32, s64)
655 .clampScalarOrElt(1, MinFPScalar, s64)
658 [=](const LegalityQuery &Query) {
659 return Query.Types[0].getScalarSizeInBits() >
660 Query.Types[1].getScalarSizeInBits();
661 },
663 .widenScalarIf(
664 [=](const LegalityQuery &Query) {
665 return Query.Types[0].getScalarSizeInBits() <
666 Query.Types[1].getScalarSizeInBits();
667 },
669 .clampNumElements(0, v4s16, v8s16)
670 .clampNumElements(0, v2s32, v4s32)
671 .clampMaxNumElements(0, s64, 2);
672
673 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
674 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
675 .legalIf([=](const LegalityQuery &Query) {
676 return HasFP16 &&
677 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
678 Query.Types[0] == v8s16) &&
679 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
680 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
681 })
682 .widenScalarToNextPow2(1)
683 .clampScalar(1, s32, s64)
685 .clampScalarOrElt(0, MinFPScalar, s64)
688 [=](const LegalityQuery &Query) {
689 return Query.Types[0].getScalarSizeInBits() <
690 Query.Types[1].getScalarSizeInBits();
691 },
693 .widenScalarIf(
694 [=](const LegalityQuery &Query) {
695 return Query.Types[0].getScalarSizeInBits() >
696 Query.Types[1].getScalarSizeInBits();
697 },
699 .clampNumElements(0, v4s16, v8s16)
700 .clampNumElements(0, v2s32, v4s32)
701 .clampMaxNumElements(0, s64, 2);
702
703 // Control-flow
705 .legalFor({s32})
706 .clampScalar(0, s32, s32);
707 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
708
710 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
711 .widenScalarToNextPow2(0)
712 .clampScalar(0, s32, s64)
713 .clampScalar(1, s32, s32)
715 .lowerIf(isVector(0));
716
717 // Pointer-handling
718 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
719
720 if (TM.getCodeModel() == CodeModel::Small)
721 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
722 else
723 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
724
726 .legalFor({{s64, p0}, {v2s64, v2p0}})
727 .widenScalarToNextPow2(0, 64)
728 .clampScalar(0, s64, s64);
729
731 .unsupportedIf([&](const LegalityQuery &Query) {
732 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
733 })
734 .legalFor({{p0, s64}, {v2p0, v2s64}});
735
736 // Casts for 32 and 64-bit width type are just copies.
737 // Same for 128-bit width type, except they are on the FPR bank.
739 // Keeping 32-bit instructions legal to prevent regression in some tests
740 .legalForCartesianProduct({s32, v2s16, v4s8})
741 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
742 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
744 .clampNumElements(0, v8s8, v16s8)
745 .clampNumElements(0, v4s16, v8s16)
746 .clampNumElements(0, v2s32, v4s32)
747 .lower();
748
749 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
750
751 // va_list must be a pointer, but most sized types are pretty easy to handle
752 // as the destination.
754 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
755 .clampScalar(0, s8, s64)
756 .widenScalarToNextPow2(0, /*Min*/ 8);
757
758 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
759 .lowerIf(
760 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
761
762 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
763 return ST.outlineAtomics() && !ST.hasLSE();
764 };
765
766 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
767 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
768 predNot(UseOutlineAtomics)))
769 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
770 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
771 return Query.Types[0].getSizeInBits() == 128 &&
772 !UseOutlineAtomics(Query);
773 })
774 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
775 UseOutlineAtomics))
776 .clampScalar(0, s32, s64);
777
778 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
779 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
780 G_ATOMICRMW_XOR})
781 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
782 predNot(UseOutlineAtomics)))
783 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
784 UseOutlineAtomics))
785 .clampScalar(0, s32, s64);
786
787 // Do not outline these atomics operations, as per comment in
788 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
790 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
791 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
792 .clampScalar(0, s32, s64);
793
794 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
795
796 // Merge/Unmerge
797 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
798 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
799 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
801 .widenScalarToNextPow2(LitTyIdx, 8)
802 .widenScalarToNextPow2(BigTyIdx, 32)
803 .clampScalar(LitTyIdx, s8, s64)
804 .clampScalar(BigTyIdx, s32, s128)
805 .legalIf([=](const LegalityQuery &Q) {
806 switch (Q.Types[BigTyIdx].getSizeInBits()) {
807 case 32:
808 case 64:
809 case 128:
810 break;
811 default:
812 return false;
813 }
814 switch (Q.Types[LitTyIdx].getSizeInBits()) {
815 case 8:
816 case 16:
817 case 32:
818 case 64:
819 return true;
820 default:
821 return false;
822 }
823 });
824 }
825
826 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
827 .unsupportedIf([=](const LegalityQuery &Query) {
828 const LLT &EltTy = Query.Types[1].getElementType();
829 return Query.Types[0] != EltTy;
830 })
831 .minScalar(2, s64)
832 .customIf([=](const LegalityQuery &Query) {
833 const LLT &VecTy = Query.Types[1];
834 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
835 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
836 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
837 })
838 .minScalarOrEltIf(
839 [=](const LegalityQuery &Query) {
840 // We want to promote to <M x s1> to <M x s64> if that wouldn't
841 // cause the total vec size to be > 128b.
842 return Query.Types[1].getNumElements() <= 2;
843 },
844 0, s64)
845 .minScalarOrEltIf(
846 [=](const LegalityQuery &Query) {
847 return Query.Types[1].getNumElements() <= 4;
848 },
849 0, s32)
850 .minScalarOrEltIf(
851 [=](const LegalityQuery &Query) {
852 return Query.Types[1].getNumElements() <= 8;
853 },
854 0, s16)
855 .minScalarOrEltIf(
856 [=](const LegalityQuery &Query) {
857 return Query.Types[1].getNumElements() <= 16;
858 },
859 0, s8)
860 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
862 .clampMaxNumElements(1, s64, 2)
863 .clampMaxNumElements(1, s32, 4)
864 .clampMaxNumElements(1, s16, 8)
865 .clampMaxNumElements(1, s8, 16)
866 .clampMaxNumElements(1, p0, 2);
867
868 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
869 .legalIf(
870 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
873 .clampNumElements(0, v8s8, v16s8)
874 .clampNumElements(0, v4s16, v8s16)
875 .clampNumElements(0, v2s32, v4s32)
876 .clampMaxNumElements(0, s64, 2)
877 .clampMaxNumElements(0, p0, 2);
878
879 getActionDefinitionsBuilder(G_BUILD_VECTOR)
880 .legalFor({{v8s8, s8},
881 {v16s8, s8},
882 {v4s16, s16},
883 {v8s16, s16},
884 {v2s32, s32},
885 {v4s32, s32},
886 {v2p0, p0},
887 {v2s64, s64}})
888 .clampNumElements(0, v4s32, v4s32)
889 .clampNumElements(0, v2s64, v2s64)
890 .minScalarOrElt(0, s8)
892 .minScalarSameAs(1, 0);
893
894 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
895
898 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
899 .scalarize(1)
900 .widenScalarToNextPow2(1, /*Min=*/32)
901 .clampScalar(1, s32, s64)
902 .scalarSameSizeAs(0, 1);
903 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
904
905 // TODO: Custom lowering for v2s32, v4s32, v2s64.
906 getActionDefinitionsBuilder(G_BITREVERSE)
907 .legalFor({s32, s64, v8s8, v16s8})
908 .widenScalarToNextPow2(0, /*Min = */ 32)
909 .clampScalar(0, s32, s64);
910
911 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
912
914 .lowerIf(isVector(0))
915 .widenScalarToNextPow2(1, /*Min=*/32)
916 .clampScalar(1, s32, s64)
917 .scalarSameSizeAs(0, 1)
918 .legalIf([=](const LegalityQuery &Query) {
919 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
920 })
921 .customIf([=](const LegalityQuery &Query) {
922 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
923 });
924
925 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
926 .legalIf([=](const LegalityQuery &Query) {
927 const LLT &DstTy = Query.Types[0];
928 const LLT &SrcTy = Query.Types[1];
929 // For now just support the TBL2 variant which needs the source vectors
930 // to be the same size as the dest.
931 if (DstTy != SrcTy)
932 return false;
933 return llvm::is_contained(
934 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
935 })
936 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
937 // just want those lowered into G_BUILD_VECTOR
938 .lowerIf([=](const LegalityQuery &Query) {
939 return !Query.Types[1].isVector();
940 })
941 .moreElementsIf(
942 [](const LegalityQuery &Query) {
943 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
944 Query.Types[0].getNumElements() >
945 Query.Types[1].getNumElements();
946 },
947 changeTo(1, 0))
949 .moreElementsIf(
950 [](const LegalityQuery &Query) {
951 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
952 Query.Types[0].getNumElements() <
953 Query.Types[1].getNumElements();
954 },
955 changeTo(0, 1))
956 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
957 .clampNumElements(0, v8s8, v16s8)
958 .clampNumElements(0, v4s16, v8s16)
959 .clampNumElements(0, v4s32, v4s32)
960 .clampNumElements(0, v2s64, v2s64);
961
962 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
963 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
964
965 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
966
967 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
968
969 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
970
971 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
972
973 if (ST.hasMOPS()) {
974 // G_BZERO is not supported. Currently it is only emitted by
975 // PreLegalizerCombiner for G_MEMSET with zero constant.
977
979 .legalForCartesianProduct({p0}, {s64}, {s64})
980 .customForCartesianProduct({p0}, {s8}, {s64})
981 .immIdx(0); // Inform verifier imm idx 0 is handled.
982
983 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
984 .legalForCartesianProduct({p0}, {p0}, {s64})
985 .immIdx(0); // Inform verifier imm idx 0 is handled.
986
987 // G_MEMCPY_INLINE does not have a tailcall immediate
988 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
989 .legalForCartesianProduct({p0}, {p0}, {s64});
990
991 } else {
992 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
993 .libcall();
994 }
995
996 // FIXME: Legal vector types are only legal with NEON.
997 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
998 if (HasCSSC)
999 ABSActions
1000 .legalFor({s32, s64});
1001 ABSActions.legalFor(PackedVectorAllTypeList)
1002 .customIf([=](const LegalityQuery &Q) {
1003 // TODO: Fix suboptimal codegen for 128+ bit types.
1004 LLT SrcTy = Q.Types[0];
1005 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1006 })
1007 .widenScalarIf(
1008 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1009 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1010 .widenScalarIf(
1011 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1012 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1013 .clampNumElements(0, v8s8, v16s8)
1014 .clampNumElements(0, v4s16, v8s16)
1015 .clampNumElements(0, v2s32, v4s32)
1016 .clampNumElements(0, v2s64, v2s64)
1017 .moreElementsToNextPow2(0)
1018 .lower();
1019
1020 // For fadd reductions we have pairwise operations available. We treat the
1021 // usual legal types as legal and handle the lowering to pairwise instructions
1022 // later.
1023 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1024 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1025 .legalIf([=](const LegalityQuery &Query) {
1026 const auto &Ty = Query.Types[1];
1027 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1028 })
1029 .minScalarOrElt(0, MinFPScalar)
1030 .clampMaxNumElements(1, s64, 2)
1031 .clampMaxNumElements(1, s32, 4)
1032 .clampMaxNumElements(1, s16, 8)
1033 .lower();
1034
1035 // For fmul reductions we need to split up into individual operations. We
1036 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1037 // smaller types, followed by scalarizing what remains.
1038 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1039 .minScalarOrElt(0, MinFPScalar)
1040 .clampMaxNumElements(1, s64, 2)
1041 .clampMaxNumElements(1, s32, 4)
1042 .clampMaxNumElements(1, s16, 8)
1043 .clampMaxNumElements(1, s32, 2)
1044 .clampMaxNumElements(1, s16, 4)
1045 .scalarize(1)
1046 .lower();
1047
1048 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1049 .scalarize(2)
1050 .lower();
1051
1052 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1053 .legalFor({{s8, v16s8},
1054 {s8, v8s8},
1055 {s16, v8s16},
1056 {s16, v4s16},
1057 {s32, v4s32},
1058 {s32, v2s32},
1059 {s64, v2s64}})
1060 .clampMaxNumElements(1, s64, 2)
1061 .clampMaxNumElements(1, s32, 4)
1062 .clampMaxNumElements(1, s16, 8)
1063 .clampMaxNumElements(1, s8, 16)
1064 .lower();
1065
1066 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1067 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1068 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1069 .legalIf([=](const LegalityQuery &Query) {
1070 const auto &Ty = Query.Types[1];
1071 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1072 })
1073 .minScalarOrElt(0, MinFPScalar)
1074 .clampMaxNumElements(1, s64, 2)
1075 .clampMaxNumElements(1, s32, 4)
1076 .clampMaxNumElements(1, s16, 8)
1077 .lower();
1078
1079 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1080 .clampMaxNumElements(1, s32, 2)
1081 .clampMaxNumElements(1, s16, 4)
1082 .clampMaxNumElements(1, s8, 8)
1083 .scalarize(1)
1084 .lower();
1085
1087 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1088 .legalFor({{s8, v8s8},
1089 {s8, v16s8},
1090 {s16, v4s16},
1091 {s16, v8s16},
1092 {s32, v2s32},
1093 {s32, v4s32}})
1094 .moreElementsIf(
1095 [=](const LegalityQuery &Query) {
1096 return Query.Types[1].isVector() &&
1097 Query.Types[1].getElementType() != s8 &&
1098 Query.Types[1].getNumElements() & 1;
1099 },
1101 .clampMaxNumElements(1, s64, 2)
1102 .clampMaxNumElements(1, s32, 4)
1103 .clampMaxNumElements(1, s16, 8)
1104 .clampMaxNumElements(1, s8, 16)
1105 .scalarize(1)
1106 .lower();
1107
1109 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1110 // Try to break down into smaller vectors as long as they're at least 64
1111 // bits. This lets us use vector operations for some parts of the
1112 // reduction.
1113 .fewerElementsIf(
1114 [=](const LegalityQuery &Q) {
1115 LLT SrcTy = Q.Types[1];
1116 if (SrcTy.isScalar())
1117 return false;
1118 if (!isPowerOf2_32(SrcTy.getNumElements()))
1119 return false;
1120 // We can usually perform 64b vector operations.
1121 return SrcTy.getSizeInBits() > 64;
1122 },
1123 [=](const LegalityQuery &Q) {
1124 LLT SrcTy = Q.Types[1];
1125 return std::make_pair(1, SrcTy.divide(2));
1126 })
1127 .scalarize(1)
1128 .lower();
1129
1130 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1131 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1132 .lower();
1133
1135 .legalFor({{s32, s64}, {s64, s64}})
1136 .customIf([=](const LegalityQuery &Q) {
1137 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1138 })
1139 .lower();
1141
1142 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1143 .customFor({{s32, s32}, {s64, s64}});
1144
1145 auto always = [=](const LegalityQuery &Q) { return true; };
1146 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1147 if (HasCSSC)
1148 CTPOPActions
1149 .legalFor({{s32, s32},
1150 {s64, s64},
1151 {v8s8, v8s8},
1152 {v16s8, v16s8}})
1153 .customFor({{s128, s128},
1154 {v2s64, v2s64},
1155 {v2s32, v2s32},
1156 {v4s32, v4s32},
1157 {v4s16, v4s16},
1158 {v8s16, v8s16}});
1159 else
1160 CTPOPActions
1161 .legalFor({{v8s8, v8s8},
1162 {v16s8, v16s8}})
1163 .customFor({{s32, s32},
1164 {s64, s64},
1165 {s128, s128},
1166 {v2s64, v2s64},
1167 {v2s32, v2s32},
1168 {v4s32, v4s32},
1169 {v4s16, v4s16},
1170 {v8s16, v8s16}});
1171 CTPOPActions
1172 .clampScalar(0, s32, s128)
1173 .widenScalarToNextPow2(0)
1174 .minScalarEltSameAsIf(always, 1, 0)
1175 .maxScalarEltSameAsIf(always, 1, 0);
1176
1177 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1178 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1179 .clampNumElements(0, v8s8, v16s8)
1180 .clampNumElements(0, v4s16, v8s16)
1181 .clampNumElements(0, v2s32, v4s32)
1182 .clampMaxNumElements(0, s64, 2)
1184 .lower();
1185
1186 // TODO: Libcall support for s128.
1187 // TODO: s16 should be legal with full FP16 support.
1188 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1189 .legalFor({{s64, s32}, {s64, s64}});
1190
1191 // TODO: Custom legalization for mismatched types.
1192 getActionDefinitionsBuilder(G_FCOPYSIGN)
1194 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1195 [=](const LegalityQuery &Query) {
1196 const LLT Ty = Query.Types[0];
1197 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1198 })
1199 .lower();
1200
1202
1203 // Access to floating-point environment.
1204 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1205 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1206 .libcall();
1207
1208 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1209
1210 getActionDefinitionsBuilder(G_PREFETCH).custom();
1211
1213 verify(*ST.getInstrInfo());
1214}
1215
1218 LostDebugLocObserver &LocObserver) const {
1219 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1220 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1221 GISelChangeObserver &Observer = Helper.Observer;
1222 switch (MI.getOpcode()) {
1223 default:
1224 // No idea what to do.
1225 return false;
1226 case TargetOpcode::G_VAARG:
1227 return legalizeVaArg(MI, MRI, MIRBuilder);
1228 case TargetOpcode::G_LOAD:
1229 case TargetOpcode::G_STORE:
1230 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1231 case TargetOpcode::G_SHL:
1232 case TargetOpcode::G_ASHR:
1233 case TargetOpcode::G_LSHR:
1234 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1235 case TargetOpcode::G_GLOBAL_VALUE:
1236 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1237 case TargetOpcode::G_SBFX:
1238 case TargetOpcode::G_UBFX:
1239 return legalizeBitfieldExtract(MI, MRI, Helper);
1240 case TargetOpcode::G_FSHL:
1241 case TargetOpcode::G_FSHR:
1242 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1243 case TargetOpcode::G_ROTR:
1244 return legalizeRotate(MI, MRI, Helper);
1245 case TargetOpcode::G_CTPOP:
1246 return legalizeCTPOP(MI, MRI, Helper);
1247 case TargetOpcode::G_ATOMIC_CMPXCHG:
1248 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1249 case TargetOpcode::G_CTTZ:
1250 return legalizeCTTZ(MI, Helper);
1251 case TargetOpcode::G_BZERO:
1252 case TargetOpcode::G_MEMCPY:
1253 case TargetOpcode::G_MEMMOVE:
1254 case TargetOpcode::G_MEMSET:
1255 return legalizeMemOps(MI, Helper);
1256 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1257 return legalizeExtractVectorElt(MI, MRI, Helper);
1258 case TargetOpcode::G_DYN_STACKALLOC:
1259 return legalizeDynStackAlloc(MI, Helper);
1260 case TargetOpcode::G_PREFETCH:
1261 return legalizePrefetch(MI, Helper);
1262 case TargetOpcode::G_ABS:
1263 return Helper.lowerAbsToCNeg(MI);
1264 }
1265
1266 llvm_unreachable("expected switch to return");
1267}
1268
1269bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1271 MachineIRBuilder &MIRBuilder,
1272 GISelChangeObserver &Observer,
1273 LegalizerHelper &Helper) const {
1274 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1275 MI.getOpcode() == TargetOpcode::G_FSHR);
1276
1277 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1278 // lowering
1279 Register ShiftNo = MI.getOperand(3).getReg();
1280 LLT ShiftTy = MRI.getType(ShiftNo);
1281 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1282
1283 // Adjust shift amount according to Opcode (FSHL/FSHR)
1284 // Convert FSHL to FSHR
1285 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1286 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1287
1288 // Lower non-constant shifts and leave zero shifts to the optimizer.
1289 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1290 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1292
1293 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1294
1295 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1296
1297 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1298 // in the range of 0 <-> BitWidth, it is legal
1299 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1300 VRegAndVal->Value.ult(BitWidth))
1301 return true;
1302
1303 // Cast the ShiftNumber to a 64-bit type
1304 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1305
1306 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1307 Observer.changingInstr(MI);
1308 MI.getOperand(3).setReg(Cast64.getReg(0));
1309 Observer.changedInstr(MI);
1310 }
1311 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1312 // instruction
1313 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1314 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1315 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1316 Cast64.getReg(0)});
1317 MI.eraseFromParent();
1318 }
1319 return true;
1320}
1321
1322bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1324 LegalizerHelper &Helper) const {
1325 // To allow for imported patterns to match, we ensure that the rotate amount
1326 // is 64b with an extension.
1327 Register AmtReg = MI.getOperand(2).getReg();
1328 LLT AmtTy = MRI.getType(AmtReg);
1329 (void)AmtTy;
1330 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1331 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1332 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1333 Helper.Observer.changingInstr(MI);
1334 MI.getOperand(2).setReg(NewAmt.getReg(0));
1335 Helper.Observer.changedInstr(MI);
1336 return true;
1337}
1338
1339bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1341 GISelChangeObserver &Observer) const {
1342 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1343 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1344 // G_ADD_LOW instructions.
1345 // By splitting this here, we can optimize accesses in the small code model by
1346 // folding in the G_ADD_LOW into the load/store offset.
1347 auto &GlobalOp = MI.getOperand(1);
1348 // Don't modify an intrinsic call.
1349 if (GlobalOp.isSymbol())
1350 return true;
1351 const auto* GV = GlobalOp.getGlobal();
1352 if (GV->isThreadLocal())
1353 return true; // Don't want to modify TLS vars.
1354
1355 auto &TM = ST->getTargetLowering()->getTargetMachine();
1356 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1357
1358 if (OpFlags & AArch64II::MO_GOT)
1359 return true;
1360
1361 auto Offset = GlobalOp.getOffset();
1362 Register DstReg = MI.getOperand(0).getReg();
1363 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1364 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1365 // Set the regclass on the dest reg too.
1366 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1367
1368 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1369 // by creating a MOVK that sets bits 48-63 of the register to (global address
1370 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1371 // prevent an incorrect tag being generated during relocation when the
1372 // global appears before the code section. Without the offset, a global at
1373 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1374 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1375 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1376 // instead of `0xf`.
1377 // This assumes that we're in the small code model so we can assume a binary
1378 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1379 // binary must also be loaded into address range [0, 2^48). Both of these
1380 // properties need to be ensured at runtime when using tagged addresses.
1381 if (OpFlags & AArch64II::MO_TAGGED) {
1382 assert(!Offset &&
1383 "Should not have folded in an offset for a tagged global!");
1384 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1385 .addGlobalAddress(GV, 0x100000000,
1387 .addImm(48);
1388 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1389 }
1390
1391 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1392 .addGlobalAddress(GV, Offset,
1394 MI.eraseFromParent();
1395 return true;
1396}
1397
1399 MachineInstr &MI) const {
1400 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1401 switch (IntrinsicID) {
1402 case Intrinsic::vacopy: {
1403 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1404 unsigned VaListSize =
1405 (ST->isTargetDarwin() || ST->isTargetWindows())
1406 ? PtrSize
1407 : ST->isTargetILP32() ? 20 : 32;
1408
1409 MachineFunction &MF = *MI.getMF();
1411 LLT::scalar(VaListSize * 8));
1412 MachineIRBuilder MIB(MI);
1413 MIB.buildLoad(Val, MI.getOperand(2),
1416 VaListSize, Align(PtrSize)));
1417 MIB.buildStore(Val, MI.getOperand(1),
1420 VaListSize, Align(PtrSize)));
1421 MI.eraseFromParent();
1422 return true;
1423 }
1424 case Intrinsic::get_dynamic_area_offset: {
1425 MachineIRBuilder &MIB = Helper.MIRBuilder;
1426 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1427 MI.eraseFromParent();
1428 return true;
1429 }
1430 case Intrinsic::aarch64_mops_memset_tag: {
1431 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1432 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1433 // the instruction).
1434 MachineIRBuilder MIB(MI);
1435 auto &Value = MI.getOperand(3);
1436 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1437 Value.setReg(ExtValueReg);
1438 return true;
1439 }
1440 case Intrinsic::aarch64_prefetch: {
1441 MachineIRBuilder MIB(MI);
1442 auto &AddrVal = MI.getOperand(1);
1443
1444 int64_t IsWrite = MI.getOperand(2).getImm();
1445 int64_t Target = MI.getOperand(3).getImm();
1446 int64_t IsStream = MI.getOperand(4).getImm();
1447 int64_t IsData = MI.getOperand(5).getImm();
1448
1449 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1450 (!IsData << 3) | // IsDataCache bit
1451 (Target << 1) | // Cache level bits
1452 (unsigned)IsStream; // Stream bit
1453
1454 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1455 MI.eraseFromParent();
1456 return true;
1457 }
1458 case Intrinsic::aarch64_neon_uaddv:
1459 case Intrinsic::aarch64_neon_saddv:
1460 case Intrinsic::aarch64_neon_umaxv:
1461 case Intrinsic::aarch64_neon_smaxv:
1462 case Intrinsic::aarch64_neon_uminv:
1463 case Intrinsic::aarch64_neon_sminv: {
1464 MachineIRBuilder MIB(MI);
1465 MachineRegisterInfo &MRI = *MIB.getMRI();
1466 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1467 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1468 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1469
1470 auto OldDst = MI.getOperand(0).getReg();
1471 auto OldDstTy = MRI.getType(OldDst);
1472 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1473 if (OldDstTy == NewDstTy)
1474 return true;
1475
1476 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1477
1478 Helper.Observer.changingInstr(MI);
1479 MI.getOperand(0).setReg(NewDst);
1480 Helper.Observer.changedInstr(MI);
1481
1482 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1483 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1484 OldDst, NewDst);
1485
1486 return true;
1487 }
1488 case Intrinsic::aarch64_neon_uaddlp:
1489 case Intrinsic::aarch64_neon_saddlp: {
1490 MachineIRBuilder MIB(MI);
1491
1492 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1493 ? AArch64::G_UADDLP
1494 : AArch64::G_SADDLP;
1495 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1496 MI.eraseFromParent();
1497
1498 return true;
1499 }
1500 case Intrinsic::aarch64_neon_uaddlv:
1501 case Intrinsic::aarch64_neon_saddlv: {
1502 MachineIRBuilder MIB(MI);
1503 MachineRegisterInfo &MRI = *MIB.getMRI();
1504
1505 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1506 ? AArch64::G_UADDLV
1507 : AArch64::G_SADDLV;
1508 Register DstReg = MI.getOperand(0).getReg();
1509 Register SrcReg = MI.getOperand(2).getReg();
1510 LLT DstTy = MRI.getType(DstReg);
1511
1512 LLT MidTy, ExtTy;
1513 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1514 MidTy = LLT::fixed_vector(4, 32);
1515 ExtTy = LLT::scalar(32);
1516 } else {
1517 MidTy = LLT::fixed_vector(2, 64);
1518 ExtTy = LLT::scalar(64);
1519 }
1520
1521 Register MidReg =
1522 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1523 Register ZeroReg =
1524 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1525 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1526 {MidReg, ZeroReg})
1527 .getReg(0);
1528
1529 if (DstTy.getScalarSizeInBits() < 32)
1530 MIB.buildTrunc(DstReg, ExtReg);
1531 else
1532 MIB.buildCopy(DstReg, ExtReg);
1533
1534 MI.eraseFromParent();
1535
1536 return true;
1537 }
1538 case Intrinsic::aarch64_neon_smax:
1539 case Intrinsic::aarch64_neon_smin:
1540 case Intrinsic::aarch64_neon_umax:
1541 case Intrinsic::aarch64_neon_umin:
1542 case Intrinsic::aarch64_neon_fmax:
1543 case Intrinsic::aarch64_neon_fmin:
1544 case Intrinsic::aarch64_neon_fmaxnm:
1545 case Intrinsic::aarch64_neon_fminnm: {
1546 MachineIRBuilder MIB(MI);
1547 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1548 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1549 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1550 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1551 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1552 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1553 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1554 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1555 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1556 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1557 {MI.getOperand(2), MI.getOperand(3)});
1558 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1559 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1560 {MI.getOperand(2), MI.getOperand(3)});
1561 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1562 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1563 {MI.getOperand(2), MI.getOperand(3)});
1564 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1565 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1566 {MI.getOperand(2), MI.getOperand(3)});
1567 MI.eraseFromParent();
1568 return true;
1569 }
1570 case Intrinsic::experimental_vector_reverse:
1571 // TODO: Add support for vector_reverse
1572 return false;
1573 }
1574
1575 return true;
1576}
1577
1578bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1580 GISelChangeObserver &Observer) const {
1581 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1582 MI.getOpcode() == TargetOpcode::G_LSHR ||
1583 MI.getOpcode() == TargetOpcode::G_SHL);
1584 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1585 // imported patterns can select it later. Either way, it will be legal.
1586 Register AmtReg = MI.getOperand(2).getReg();
1587 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1588 if (!VRegAndVal)
1589 return true;
1590 // Check the shift amount is in range for an immediate form.
1591 int64_t Amount = VRegAndVal->Value.getSExtValue();
1592 if (Amount > 31)
1593 return true; // This will have to remain a register variant.
1594 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1595 Observer.changingInstr(MI);
1596 MI.getOperand(2).setReg(ExtCst.getReg(0));
1597 Observer.changedInstr(MI);
1598 return true;
1599}
1600
1603 Base = Root;
1604 Offset = 0;
1605
1606 Register NewBase;
1607 int64_t NewOffset;
1608 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1609 isShiftedInt<7, 3>(NewOffset)) {
1610 Base = NewBase;
1611 Offset = NewOffset;
1612 }
1613}
1614
1615// FIXME: This should be removed and replaced with the generic bitcast legalize
1616// action.
1617bool AArch64LegalizerInfo::legalizeLoadStore(
1619 GISelChangeObserver &Observer) const {
1620 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1621 MI.getOpcode() == TargetOpcode::G_LOAD);
1622 // Here we just try to handle vector loads/stores where our value type might
1623 // have pointer elements, which the SelectionDAG importer can't handle. To
1624 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1625 // the value to use s64 types.
1626
1627 // Custom legalization requires the instruction, if not deleted, must be fully
1628 // legalized. In order to allow further legalization of the inst, we create
1629 // a new instruction and erase the existing one.
1630
1631 Register ValReg = MI.getOperand(0).getReg();
1632 const LLT ValTy = MRI.getType(ValReg);
1633
1634 if (ValTy == LLT::scalar(128)) {
1635
1636 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1637 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1638 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1639 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1640 bool IsRcpC3 =
1641 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1642
1643 LLT s64 = LLT::scalar(64);
1644
1645 unsigned Opcode;
1646 if (IsRcpC3) {
1647 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1648 } else {
1649 // For LSE2, loads/stores should have been converted to monotonic and had
1650 // a fence inserted after them.
1651 assert(Ordering == AtomicOrdering::Monotonic ||
1652 Ordering == AtomicOrdering::Unordered);
1653 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1654
1655 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1656 }
1657
1659 if (IsLoad) {
1660 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1661 MIRBuilder.buildMergeLikeInstr(
1662 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1663 } else {
1664 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1665 NewI = MIRBuilder.buildInstr(
1666 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1667 }
1668
1669 if (IsRcpC3) {
1670 NewI.addUse(MI.getOperand(1).getReg());
1671 } else {
1672 Register Base;
1673 int Offset;
1674 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1675 NewI.addUse(Base);
1676 NewI.addImm(Offset / 8);
1677 }
1678
1679 NewI.cloneMemRefs(MI);
1681 *MRI.getTargetRegisterInfo(),
1682 *ST->getRegBankInfo());
1683 MI.eraseFromParent();
1684 return true;
1685 }
1686
1687 if (!ValTy.isPointerVector() ||
1688 ValTy.getElementType().getAddressSpace() != 0) {
1689 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1690 return false;
1691 }
1692
1693 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1694 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1695 auto &MMO = **MI.memoperands_begin();
1696 MMO.setType(NewTy);
1697
1698 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1699 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1700 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1701 } else {
1702 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1703 MIRBuilder.buildBitcast(ValReg, NewLoad);
1704 }
1705 MI.eraseFromParent();
1706 return true;
1707}
1708
1709bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1711 MachineIRBuilder &MIRBuilder) const {
1712 MachineFunction &MF = MIRBuilder.getMF();
1713 Align Alignment(MI.getOperand(2).getImm());
1714 Register Dst = MI.getOperand(0).getReg();
1715 Register ListPtr = MI.getOperand(1).getReg();
1716
1717 LLT PtrTy = MRI.getType(ListPtr);
1718 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1719
1720 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1721 const Align PtrAlign = Align(PtrSize);
1722 auto List = MIRBuilder.buildLoad(
1723 PtrTy, ListPtr,
1725 PtrTy, PtrAlign));
1726
1727 MachineInstrBuilder DstPtr;
1728 if (Alignment > PtrAlign) {
1729 // Realign the list to the actual required alignment.
1730 auto AlignMinus1 =
1731 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1732 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1733 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1734 } else
1735 DstPtr = List;
1736
1737 LLT ValTy = MRI.getType(Dst);
1738 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1739 MIRBuilder.buildLoad(
1740 Dst, DstPtr,
1742 ValTy, std::max(Alignment, PtrAlign)));
1743
1744 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1745
1746 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1747
1748 MIRBuilder.buildStore(NewList, ListPtr,
1751 PtrTy, PtrAlign));
1752
1753 MI.eraseFromParent();
1754 return true;
1755}
1756
1757bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1759 // Only legal if we can select immediate forms.
1760 // TODO: Lower this otherwise.
1761 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1762 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1763}
1764
1765bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1767 LegalizerHelper &Helper) const {
1768 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1769 // it can be more efficiently lowered to the following sequence that uses
1770 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1771 // registers are cheap.
1772 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1773 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1774 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1775 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1776 //
1777 // For 128 bit vector popcounts, we lower to the following sequence:
1778 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1779 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1780 // uaddlp.4s v0, v0 // v4s32, v2s64
1781 // uaddlp.2d v0, v0 // v2s64
1782 //
1783 // For 64 bit vector popcounts, we lower to the following sequence:
1784 // cnt.8b v0, v0 // v4s16, v2s32
1785 // uaddlp.4h v0, v0 // v4s16, v2s32
1786 // uaddlp.2s v0, v0 // v2s32
1787
1788 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1789 Register Dst = MI.getOperand(0).getReg();
1790 Register Val = MI.getOperand(1).getReg();
1791 LLT Ty = MRI.getType(Val);
1792 unsigned Size = Ty.getSizeInBits();
1793
1794 assert(Ty == MRI.getType(Dst) &&
1795 "Expected src and dst to have the same type!");
1796
1797 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1798 LLT s64 = LLT::scalar(64);
1799
1800 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1801 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1802 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1803 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1804
1805 MIRBuilder.buildZExt(Dst, Add);
1806 MI.eraseFromParent();
1807 return true;
1808 }
1809
1810 if (!ST->hasNEON() ||
1811 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1812 // Use generic lowering when custom lowering is not possible.
1813 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1814 Helper.lowerBitCount(MI) ==
1816 }
1817
1818 // Pre-conditioning: widen Val up to the nearest vector type.
1819 // s32,s64,v4s16,v2s32 -> v8i8
1820 // v8s16,v4s32,v2s64 -> v16i8
1821 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1822 if (Ty.isScalar()) {
1823 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1824 if (Size == 32) {
1825 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1826 }
1827 }
1828 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1829
1830 // Count bits in each byte-sized lane.
1831 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1832
1833 // Sum across lanes.
1834 Register HSum = CTPOP.getReg(0);
1835 unsigned Opc;
1836 SmallVector<LLT> HAddTys;
1837 if (Ty.isScalar()) {
1838 Opc = Intrinsic::aarch64_neon_uaddlv;
1839 HAddTys.push_back(LLT::scalar(32));
1840 } else if (Ty == LLT::fixed_vector(8, 16)) {
1841 Opc = Intrinsic::aarch64_neon_uaddlp;
1842 HAddTys.push_back(LLT::fixed_vector(8, 16));
1843 } else if (Ty == LLT::fixed_vector(4, 32)) {
1844 Opc = Intrinsic::aarch64_neon_uaddlp;
1845 HAddTys.push_back(LLT::fixed_vector(8, 16));
1846 HAddTys.push_back(LLT::fixed_vector(4, 32));
1847 } else if (Ty == LLT::fixed_vector(2, 64)) {
1848 Opc = Intrinsic::aarch64_neon_uaddlp;
1849 HAddTys.push_back(LLT::fixed_vector(8, 16));
1850 HAddTys.push_back(LLT::fixed_vector(4, 32));
1851 HAddTys.push_back(LLT::fixed_vector(2, 64));
1852 } else if (Ty == LLT::fixed_vector(4, 16)) {
1853 Opc = Intrinsic::aarch64_neon_uaddlp;
1854 HAddTys.push_back(LLT::fixed_vector(4, 16));
1855 } else if (Ty == LLT::fixed_vector(2, 32)) {
1856 Opc = Intrinsic::aarch64_neon_uaddlp;
1857 HAddTys.push_back(LLT::fixed_vector(4, 16));
1858 HAddTys.push_back(LLT::fixed_vector(2, 32));
1859 } else
1860 llvm_unreachable("unexpected vector shape");
1862 for (LLT HTy : HAddTys) {
1863 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
1864 HSum = UADD.getReg(0);
1865 }
1866
1867 // Post-conditioning.
1868 if (Ty.isScalar() && (Size == 64 || Size == 128))
1869 MIRBuilder.buildZExt(Dst, UADD);
1870 else
1871 UADD->getOperand(0).setReg(Dst);
1872 MI.eraseFromParent();
1873 return true;
1874}
1875
1876bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1878 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1879 LLT s64 = LLT::scalar(64);
1880 auto Addr = MI.getOperand(1).getReg();
1881 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1882 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1883 auto DstLo = MRI.createGenericVirtualRegister(s64);
1884 auto DstHi = MRI.createGenericVirtualRegister(s64);
1885
1887 if (ST->hasLSE()) {
1888 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1889 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1890 // the rest of the MIR so we must reassemble the extracted registers into a
1891 // 128-bit known-regclass one with code like this:
1892 //
1893 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1894 // %out = CASP %in1, ...
1895 // %OldLo = G_EXTRACT %out, 0
1896 // %OldHi = G_EXTRACT %out, 64
1897 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1898 unsigned Opcode;
1899 switch (Ordering) {
1901 Opcode = AArch64::CASPAX;
1902 break;
1904 Opcode = AArch64::CASPLX;
1905 break;
1908 Opcode = AArch64::CASPALX;
1909 break;
1910 default:
1911 Opcode = AArch64::CASPX;
1912 break;
1913 }
1914
1915 LLT s128 = LLT::scalar(128);
1916 auto CASDst = MRI.createGenericVirtualRegister(s128);
1917 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1918 auto CASNew = MRI.createGenericVirtualRegister(s128);
1919 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1920 .addUse(DesiredI->getOperand(0).getReg())
1921 .addImm(AArch64::sube64)
1922 .addUse(DesiredI->getOperand(1).getReg())
1923 .addImm(AArch64::subo64);
1924 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1925 .addUse(NewI->getOperand(0).getReg())
1926 .addImm(AArch64::sube64)
1927 .addUse(NewI->getOperand(1).getReg())
1928 .addImm(AArch64::subo64);
1929
1930 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1931
1932 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1933 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1934 } else {
1935 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1936 // can take arbitrary registers so it just has the normal GPR64 operands the
1937 // rest of AArch64 is expecting.
1938 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1939 unsigned Opcode;
1940 switch (Ordering) {
1942 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1943 break;
1945 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1946 break;
1949 Opcode = AArch64::CMP_SWAP_128;
1950 break;
1951 default:
1952 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1953 break;
1954 }
1955
1956 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1957 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1958 {Addr, DesiredI->getOperand(0),
1959 DesiredI->getOperand(1), NewI->getOperand(0),
1960 NewI->getOperand(1)});
1961 }
1962
1963 CAS.cloneMemRefs(MI);
1965 *MRI.getTargetRegisterInfo(),
1966 *ST->getRegBankInfo());
1967
1968 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
1969 MI.eraseFromParent();
1970 return true;
1971}
1972
1973bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1974 LegalizerHelper &Helper) const {
1975 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1976 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1977 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1978 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1979 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1980 MI.eraseFromParent();
1981 return true;
1982}
1983
1984bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
1985 LegalizerHelper &Helper) const {
1986 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1987
1988 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
1989 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
1990 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1991 // the instruction).
1992 auto &Value = MI.getOperand(1);
1993 Register ExtValueReg =
1994 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1995 Value.setReg(ExtValueReg);
1996 return true;
1997 }
1998
1999 return false;
2000}
2001
2002bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2004 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2005 auto VRegAndVal =
2006 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2007 if (VRegAndVal)
2008 return true;
2009 return Helper.lowerExtractInsertVectorElt(MI) !=
2011}
2012
2013bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2014 MachineInstr &MI, LegalizerHelper &Helper) const {
2015 MachineFunction &MF = *MI.getParent()->getParent();
2016 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2017 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2018
2019 // If stack probing is not enabled for this function, use the default
2020 // lowering.
2021 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2022 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2023 "inline-asm") {
2024 Helper.lowerDynStackAlloc(MI);
2025 return true;
2026 }
2027
2028 Register Dst = MI.getOperand(0).getReg();
2029 Register AllocSize = MI.getOperand(1).getReg();
2030 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2031
2032 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2033 "Unexpected type for dynamic alloca");
2034 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2035 "Unexpected type for dynamic alloca");
2036
2037 LLT PtrTy = MRI.getType(Dst);
2038 Register SPReg =
2040 Register SPTmp =
2041 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2042 auto NewMI =
2043 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2044 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2045 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2046 MIRBuilder.buildCopy(Dst, SPTmp);
2047
2048 MI.eraseFromParent();
2049 return true;
2050}
2051
2052bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2053 LegalizerHelper &Helper) const {
2054 MachineIRBuilder &MIB = Helper.MIRBuilder;
2055 auto &AddrVal = MI.getOperand(0);
2056
2057 int64_t IsWrite = MI.getOperand(1).getImm();
2058 int64_t Locality = MI.getOperand(2).getImm();
2059 int64_t IsData = MI.getOperand(3).getImm();
2060
2061 bool IsStream = Locality == 0;
2062 if (Locality != 0) {
2063 assert(Locality <= 3 && "Prefetch locality out-of-range");
2064 // The locality degree is the opposite of the cache speed.
2065 // Put the number the other way around.
2066 // The encoding starts at 0 for level 1
2067 Locality = 3 - Locality;
2068 }
2069
2070 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2071
2072 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2073 MI.eraseFromParent();
2074 return true;
2075}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:153
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...