LLVM 19.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
65 v16s8, v8s16, v4s32,
66 v2s64, v2p0,
67 /* End 128bit types */
68 /* Begin 64bit types */
69 v8s8, v4s16, v2s32};
70 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
71 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
72 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
73
74 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
75
76 // FIXME: support subtargets which have neon/fp-armv8 disabled.
77 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
79 return;
80 }
81
82 // Some instructions only support s16 if the subtarget has full 16-bit FP
83 // support.
84 const bool HasFP16 = ST.hasFullFP16();
85 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
86
87 const bool HasCSSC = ST.hasCSSC();
88 const bool HasRCPC3 = ST.hasRCPC3();
89
91 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
92 .legalFor({p0, s8, s16, s32, s64})
93 .legalFor(PackedVectorAllTypeList)
95 .clampScalar(0, s8, s64)
98 .clampNumElements(0, v8s8, v16s8)
99 .clampNumElements(0, v4s16, v8s16)
100 .clampNumElements(0, v2s32, v4s32)
101 .clampNumElements(0, v2s64, v2s64);
102
104 .legalFor({p0, s16, s32, s64})
105 .legalFor(PackedVectorAllTypeList)
107 .clampScalar(0, s16, s64)
108 // Maximum: sN * k = 128
109 .clampMaxNumElements(0, s8, 16)
110 .clampMaxNumElements(0, s16, 8)
111 .clampMaxNumElements(0, s32, 4)
112 .clampMaxNumElements(0, s64, 2)
113 .clampMaxNumElements(0, p0, 2);
114
116 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
118 .clampScalar(0, s32, s64)
119 .clampNumElements(0, v4s16, v8s16)
120 .clampNumElements(0, v2s32, v4s32)
121 .clampNumElements(0, v2s64, v2s64)
122 .moreElementsToNextPow2(0);
123
124 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
125 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
126 .widenScalarToNextPow2(0)
127 .clampScalar(0, s32, s64)
128 .clampMaxNumElements(0, s8, 16)
129 .clampMaxNumElements(0, s16, 8)
130 .clampNumElements(0, v2s32, v4s32)
131 .clampNumElements(0, v2s64, v2s64)
133 [=](const LegalityQuery &Query) {
134 return Query.Types[0].getNumElements() <= 2;
135 },
136 0, s32)
137 .minScalarOrEltIf(
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 4;
140 },
141 0, s16)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 16;
145 },
146 0, s8)
148
149 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
150 .customIf([=](const LegalityQuery &Query) {
151 const auto &SrcTy = Query.Types[0];
152 const auto &AmtTy = Query.Types[1];
153 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
154 AmtTy.getSizeInBits() == 32;
155 })
156 .legalFor({
157 {s32, s32},
158 {s32, s64},
159 {s64, s64},
160 {v8s8, v8s8},
161 {v16s8, v16s8},
162 {v4s16, v4s16},
163 {v8s16, v8s16},
164 {v2s32, v2s32},
165 {v4s32, v4s32},
166 {v2s64, v2s64},
167 })
168 .widenScalarToNextPow2(0)
169 .clampScalar(1, s32, s64)
170 .clampScalar(0, s32, s64)
171 .clampNumElements(0, v8s8, v16s8)
172 .clampNumElements(0, v4s16, v8s16)
173 .clampNumElements(0, v2s32, v4s32)
174 .clampNumElements(0, v2s64, v2s64)
176 .minScalarSameAs(1, 0);
177
179 .legalFor({{p0, s64}, {v2p0, v2s64}})
180 .clampScalar(1, s64, s64);
181
182 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
183
184 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
185 .legalFor({s32, s64})
186 .libcallFor({s128})
187 .clampScalar(0, s32, s64)
189 .scalarize(0);
190
191 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
192 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
194 .clampScalarOrElt(0, s32, s64)
195 .clampNumElements(0, v2s32, v4s32)
196 .clampNumElements(0, v2s64, v2s64)
197 .moreElementsToNextPow2(0);
198
199
200 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
201 .widenScalarToNextPow2(0, /*Min = */ 32)
202 .clampScalar(0, s32, s64)
203 .lower();
204
205 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
206 .legalFor({s64, v8s16, v16s8, v4s32})
207 .lower();
208
209 auto &MinMaxActions = getActionDefinitionsBuilder(
210 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
211 if (HasCSSC)
212 MinMaxActions
213 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
214 // Making clamping conditional on CSSC extension as without legal types we
215 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
216 // if we detect a type smaller than 32-bit.
217 .minScalar(0, s32);
218 else
219 MinMaxActions
220 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
221 MinMaxActions
222 .clampNumElements(0, v8s8, v16s8)
223 .clampNumElements(0, v4s16, v8s16)
224 .clampNumElements(0, v2s32, v4s32)
225 // FIXME: This sholdn't be needed as v2s64 types are going to
226 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
227 .clampNumElements(0, v2s64, v2s64)
228 .lower();
229
231 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
232 .legalFor({{s32, s32}, {s64, s32}})
233 .clampScalar(0, s32, s64)
234 .clampScalar(1, s32, s64)
236
237 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
238 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
239 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
240 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
241 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
242 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
243 .legalIf([=](const LegalityQuery &Query) {
244 const auto &Ty = Query.Types[0];
245 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
246 })
247 .libcallFor({s128})
248 .minScalarOrElt(0, MinFPScalar)
249 .clampNumElements(0, v4s16, v8s16)
250 .clampNumElements(0, v2s32, v4s32)
251 .clampNumElements(0, v2s64, v2s64)
253
255 .libcallFor({s32, s64})
256 .minScalar(0, s32)
257 .scalarize(0);
258
259 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
260 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
261 .libcallFor({{s64, s128}})
262 .minScalarOrElt(1, MinFPScalar);
263
265 {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10,
266 G_FEXP, G_FEXP2, G_FEXP10})
267 // We need a call for these, so we always need to scalarize.
268 .scalarize(0)
269 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
270 .minScalar(0, s32)
271 .libcallFor({s32, s64});
273 .scalarize(0)
274 .minScalar(0, s32)
275 .libcallFor({{s32, s32}, {s64, s32}});
276
278 .legalIf(all(typeInSet(0, {s32, s64, p0}),
279 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
281 .clampScalar(0, s32, s64)
283 .minScalar(1, s8)
284 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
285 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
286
288 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
289 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
291 .clampScalar(1, s32, s128)
293 .minScalar(0, s16)
294 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
295 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
296 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
297
298
299 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
300 auto &Actions = getActionDefinitionsBuilder(Op);
301
302 if (Op == G_SEXTLOAD)
304
305 // Atomics have zero extending behavior.
306 Actions
307 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
308 {s32, p0, s16, 8},
309 {s32, p0, s32, 8},
310 {s64, p0, s8, 2},
311 {s64, p0, s16, 2},
312 {s64, p0, s32, 4},
313 {s64, p0, s64, 8},
314 {p0, p0, s64, 8},
315 {v2s32, p0, s64, 8}})
316 .widenScalarToNextPow2(0)
317 .clampScalar(0, s32, s64)
318 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
319 // how to do that yet.
320 .unsupportedIfMemSizeNotPow2()
321 // Lower anything left over into G_*EXT and G_LOAD
322 .lower();
323 }
324
325 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
326 const LLT &ValTy = Query.Types[0];
327 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
328 };
329
331 .customIf([=](const LegalityQuery &Query) {
332 return HasRCPC3 && Query.Types[0] == s128 &&
333 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
334 })
335 .customIf([=](const LegalityQuery &Query) {
336 return Query.Types[0] == s128 &&
337 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
338 })
339 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
340 {s16, p0, s16, 8},
341 {s32, p0, s32, 8},
342 {s64, p0, s64, 8},
343 {p0, p0, s64, 8},
344 {s128, p0, s128, 8},
345 {v8s8, p0, s64, 8},
346 {v16s8, p0, s128, 8},
347 {v4s16, p0, s64, 8},
348 {v8s16, p0, s128, 8},
349 {v2s32, p0, s64, 8},
350 {v4s32, p0, s128, 8},
351 {v2s64, p0, s128, 8}})
352 // These extends are also legal
353 .legalForTypesWithMemDesc(
354 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
355 .widenScalarToNextPow2(0, /* MinSize = */ 8)
356 .clampMaxNumElements(0, s8, 16)
357 .clampMaxNumElements(0, s16, 8)
358 .clampMaxNumElements(0, s32, 4)
359 .clampMaxNumElements(0, s64, 2)
360 .clampMaxNumElements(0, p0, 2)
362 .clampScalar(0, s8, s64)
364 [=](const LegalityQuery &Query) {
365 // Clamp extending load results to 32-bits.
366 return Query.Types[0].isScalar() &&
367 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
368 Query.Types[0].getSizeInBits() > 32;
369 },
370 changeTo(0, s32))
371 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
372 .bitcastIf(typeInSet(0, {v4s8}),
373 [=](const LegalityQuery &Query) {
374 const LLT VecTy = Query.Types[0];
375 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
376 })
377 .customIf(IsPtrVecPred)
378 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
379
381 .customIf([=](const LegalityQuery &Query) {
382 return HasRCPC3 && Query.Types[0] == s128 &&
383 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
384 })
385 .customIf([=](const LegalityQuery &Query) {
386 return Query.Types[0] == s128 &&
387 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
388 })
389 .legalForTypesWithMemDesc(
390 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
391 {s32, p0, s8, 8}, // truncstorei8 from s32
392 {s64, p0, s8, 8}, // truncstorei8 from s64
393 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
394 {s64, p0, s16, 8}, // truncstorei16 from s64
395 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
396 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
397 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
398 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
399 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
400 .clampScalar(0, s8, s64)
401 .lowerIf([=](const LegalityQuery &Query) {
402 return Query.Types[0].isScalar() &&
403 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
404 })
405 // Maximum: sN * k = 128
406 .clampMaxNumElements(0, s8, 16)
407 .clampMaxNumElements(0, s16, 8)
408 .clampMaxNumElements(0, s32, 4)
409 .clampMaxNumElements(0, s64, 2)
410 .clampMaxNumElements(0, p0, 2)
412 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
413 .bitcastIf(typeInSet(0, {v4s8}),
414 [=](const LegalityQuery &Query) {
415 const LLT VecTy = Query.Types[0];
416 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
417 })
418 .customIf(IsPtrVecPred)
419 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
420
421 getActionDefinitionsBuilder(G_INDEXED_STORE)
422 // Idx 0 == Ptr, Idx 1 == Val
423 // TODO: we can implement legalizations but as of now these are
424 // generated in a very specific way.
426 {p0, s8, s8, 8},
427 {p0, s16, s16, 8},
428 {p0, s32, s8, 8},
429 {p0, s32, s16, 8},
430 {p0, s32, s32, 8},
431 {p0, s64, s64, 8},
432 {p0, p0, p0, 8},
433 {p0, v8s8, v8s8, 8},
434 {p0, v16s8, v16s8, 8},
435 {p0, v4s16, v4s16, 8},
436 {p0, v8s16, v8s16, 8},
437 {p0, v2s32, v2s32, 8},
438 {p0, v4s32, v4s32, 8},
439 {p0, v2s64, v2s64, 8},
440 {p0, v2p0, v2p0, 8},
441 {p0, s128, s128, 8},
442 })
443 .unsupported();
444
445 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
446 LLT LdTy = Query.Types[0];
447 LLT PtrTy = Query.Types[1];
448 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
449 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
450 return false;
451 if (PtrTy != p0)
452 return false;
453 return true;
454 };
455 getActionDefinitionsBuilder(G_INDEXED_LOAD)
458 .legalIf(IndexedLoadBasicPred)
459 .unsupported();
460 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
461 .unsupportedIf(
463 .legalIf(all(typeInSet(0, {s16, s32, s64}),
464 LegalityPredicate([=](const LegalityQuery &Q) {
465 LLT LdTy = Q.Types[0];
466 LLT PtrTy = Q.Types[1];
467 LLT MemTy = Q.MMODescrs[0].MemoryTy;
468 if (PtrTy != p0)
469 return false;
470 if (LdTy == s16)
471 return MemTy == s8;
472 if (LdTy == s32)
473 return MemTy == s8 || MemTy == s16;
474 if (LdTy == s64)
475 return MemTy == s8 || MemTy == s16 || MemTy == s32;
476 return false;
477 })))
478 .unsupported();
479
480 // Constants
482 .legalFor({p0, s8, s16, s32, s64})
483 .widenScalarToNextPow2(0)
484 .clampScalar(0, s8, s64);
485 getActionDefinitionsBuilder(G_FCONSTANT)
486 .legalIf([=](const LegalityQuery &Query) {
487 const auto &Ty = Query.Types[0];
488 if (HasFP16 && Ty == s16)
489 return true;
490 return Ty == s32 || Ty == s64 || Ty == s128;
491 })
492 .clampScalar(0, MinFPScalar, s128);
493
494 // FIXME: fix moreElementsToNextPow2
496 .legalFor({{s32, s32},
497 {s32, s64},
498 {s32, p0},
499 {v4s32, v4s32},
500 {v2s32, v2s32},
501 {v2s64, v2s64},
502 {v2s64, v2p0},
503 {v4s16, v4s16},
504 {v8s16, v8s16},
505 {v8s8, v8s8},
506 {v16s8, v16s8}})
508 .clampScalar(1, s32, s64)
509 .clampScalar(0, s32, s32)
510 .minScalarEltSameAsIf(
511 [=](const LegalityQuery &Query) {
512 const LLT &Ty = Query.Types[0];
513 const LLT &SrcTy = Query.Types[1];
514 return Ty.isVector() && !SrcTy.isPointerVector() &&
515 Ty.getElementType() != SrcTy.getElementType();
516 },
517 0, 1)
518 .minScalarOrEltIf(
519 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
520 1, s32)
521 .minScalarOrEltIf(
522 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
523 s64)
525 .clampNumElements(1, v8s8, v16s8)
526 .clampNumElements(1, v4s16, v8s16)
527 .clampNumElements(1, v2s32, v4s32)
528 .clampNumElements(1, v2s64, v2s64);
529
531 .legalFor({{s32, MinFPScalar},
532 {s32, s32},
533 {s32, s64},
534 {v4s32, v4s32},
535 {v2s32, v2s32},
536 {v2s64, v2s64}})
537 .legalIf([=](const LegalityQuery &Query) {
538 const auto &Ty = Query.Types[1];
539 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
540 })
542 .clampScalar(0, s32, s32)
543 .clampScalarOrElt(1, MinFPScalar, s64)
544 .minScalarEltSameAsIf(
545 [=](const LegalityQuery &Query) {
546 const LLT &Ty = Query.Types[0];
547 const LLT &SrcTy = Query.Types[1];
548 return Ty.isVector() && !SrcTy.isPointerVector() &&
549 Ty.getElementType() != SrcTy.getElementType();
550 },
551 0, 1)
552 .clampNumElements(1, v4s16, v8s16)
553 .clampNumElements(1, v2s32, v4s32)
554 .clampMaxNumElements(1, s64, 2)
555 .moreElementsToNextPow2(1);
556
557 // Extensions
558 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
559 unsigned DstSize = Query.Types[0].getSizeInBits();
560
561 // Handle legal vectors using legalFor
562 if (Query.Types[0].isVector())
563 return false;
564
565 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
566 return false; // Extending to a scalar s128 needs narrowing.
567
568 const LLT &SrcTy = Query.Types[1];
569
570 // Make sure we fit in a register otherwise. Don't bother checking that
571 // the source type is below 128 bits. We shouldn't be allowing anything
572 // through which is wider than the destination in the first place.
573 unsigned SrcSize = SrcTy.getSizeInBits();
574 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
575 return false;
576
577 return true;
578 };
579 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
580 .legalIf(ExtLegalFunc)
581 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
582 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
584 .clampMaxNumElements(1, s8, 8)
585 .clampMaxNumElements(1, s16, 4)
586 .clampMaxNumElements(1, s32, 2)
587 // Tries to convert a large EXTEND into two smaller EXTENDs
588 .lowerIf([=](const LegalityQuery &Query) {
589 return (Query.Types[0].getScalarSizeInBits() >
590 Query.Types[1].getScalarSizeInBits() * 2) &&
591 Query.Types[0].isVector() &&
592 (Query.Types[1].getScalarSizeInBits() == 8 ||
593 Query.Types[1].getScalarSizeInBits() == 16);
594 })
595 .clampMinNumElements(1, s8, 8)
596 .clampMinNumElements(1, s16, 4);
597
599 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
601 .clampMaxNumElements(0, s8, 8)
602 .clampMaxNumElements(0, s16, 4)
603 .clampMaxNumElements(0, s32, 2)
604 .minScalarOrEltIf(
605 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
606 0, s8)
607 .lowerIf([=](const LegalityQuery &Query) {
608 LLT DstTy = Query.Types[0];
609 LLT SrcTy = Query.Types[1];
610 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
611 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
612 })
613 .clampMinNumElements(0, s8, 8)
614 .clampMinNumElements(0, s16, 4)
615 .alwaysLegal();
616
617 getActionDefinitionsBuilder(G_SEXT_INREG)
618 .legalFor({s32, s64})
619 .legalFor(PackedVectorAllTypeList)
620 .maxScalar(0, s64)
621 .clampNumElements(0, v8s8, v16s8)
622 .clampNumElements(0, v4s16, v8s16)
623 .clampNumElements(0, v2s32, v4s32)
624 .clampMaxNumElements(0, s64, 2)
625 .lower();
626
627 // FP conversions
629 .legalFor(
630 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
631 .clampNumElements(0, v4s16, v4s16)
632 .clampNumElements(0, v2s32, v2s32)
633 .scalarize(0);
634
636 .legalFor(
637 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
638 .clampNumElements(0, v4s32, v4s32)
639 .clampNumElements(0, v2s64, v2s64)
640 .scalarize(0);
641
642 // Conversions
643 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
644 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
645 .legalIf([=](const LegalityQuery &Query) {
646 return HasFP16 &&
647 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
648 Query.Types[1] == v8s16) &&
649 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
650 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
651 })
652 .widenScalarToNextPow2(0)
653 .clampScalar(0, s32, s64)
655 .clampScalarOrElt(1, MinFPScalar, s64)
658 [=](const LegalityQuery &Query) {
659 return Query.Types[0].getScalarSizeInBits() >
660 Query.Types[1].getScalarSizeInBits();
661 },
663 .widenScalarIf(
664 [=](const LegalityQuery &Query) {
665 return Query.Types[0].getScalarSizeInBits() <
666 Query.Types[1].getScalarSizeInBits();
667 },
669 .clampNumElements(0, v4s16, v8s16)
670 .clampNumElements(0, v2s32, v4s32)
671 .clampMaxNumElements(0, s64, 2);
672
673 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
674 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
675 .legalIf([=](const LegalityQuery &Query) {
676 return HasFP16 &&
677 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
678 Query.Types[0] == v8s16) &&
679 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
680 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
681 })
682 .widenScalarToNextPow2(1)
683 .clampScalar(1, s32, s64)
685 .clampScalarOrElt(0, MinFPScalar, s64)
688 [=](const LegalityQuery &Query) {
689 return Query.Types[0].getScalarSizeInBits() <
690 Query.Types[1].getScalarSizeInBits();
691 },
693 .widenScalarIf(
694 [=](const LegalityQuery &Query) {
695 return Query.Types[0].getScalarSizeInBits() >
696 Query.Types[1].getScalarSizeInBits();
697 },
699 .clampNumElements(0, v4s16, v8s16)
700 .clampNumElements(0, v2s32, v4s32)
701 .clampMaxNumElements(0, s64, 2);
702
703 // Control-flow
705 .legalFor({s32})
706 .clampScalar(0, s32, s32);
707 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
708
710 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
711 .widenScalarToNextPow2(0)
712 .clampScalar(0, s32, s64)
713 .clampScalar(1, s32, s32)
715 .lowerIf(isVector(0));
716
717 // Pointer-handling
718 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
719
720 if (TM.getCodeModel() == CodeModel::Small)
721 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
722 else
723 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
724
726 .legalFor({{s64, p0}, {v2s64, v2p0}})
727 .widenScalarToNextPow2(0, 64)
728 .clampScalar(0, s64, s64);
729
731 .unsupportedIf([&](const LegalityQuery &Query) {
732 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
733 })
734 .legalFor({{p0, s64}, {v2p0, v2s64}});
735
736 // Casts for 32 and 64-bit width type are just copies.
737 // Same for 128-bit width type, except they are on the FPR bank.
739 // Keeping 32-bit instructions legal to prevent regression in some tests
740 .legalForCartesianProduct({s32, v2s16, v4s8})
741 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
742 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
743 .lowerIf([=](const LegalityQuery &Query) {
744 return Query.Types[0].isVector() != Query.Types[1].isVector();
745 })
747 .clampNumElements(0, v8s8, v16s8)
748 .clampNumElements(0, v4s16, v8s16)
749 .clampNumElements(0, v2s32, v4s32)
750 .lower();
751
752 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
753
754 // va_list must be a pointer, but most sized types are pretty easy to handle
755 // as the destination.
757 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
758 .clampScalar(0, s8, s64)
759 .widenScalarToNextPow2(0, /*Min*/ 8);
760
761 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
762 .lowerIf(
763 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
764
765 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
766 return ST.outlineAtomics() && !ST.hasLSE();
767 };
768
769 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
770 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
771 predNot(UseOutlineAtomics)))
772 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
773 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
774 return Query.Types[0].getSizeInBits() == 128 &&
775 !UseOutlineAtomics(Query);
776 })
777 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
778 UseOutlineAtomics))
779 .clampScalar(0, s32, s64);
780
781 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
782 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
783 G_ATOMICRMW_XOR})
784 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
785 predNot(UseOutlineAtomics)))
786 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
787 UseOutlineAtomics))
788 .clampScalar(0, s32, s64);
789
790 // Do not outline these atomics operations, as per comment in
791 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
793 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
794 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
795 .clampScalar(0, s32, s64);
796
797 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
798
799 // Merge/Unmerge
800 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
801 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
802 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
804 .widenScalarToNextPow2(LitTyIdx, 8)
805 .widenScalarToNextPow2(BigTyIdx, 32)
806 .clampScalar(LitTyIdx, s8, s64)
807 .clampScalar(BigTyIdx, s32, s128)
808 .legalIf([=](const LegalityQuery &Q) {
809 switch (Q.Types[BigTyIdx].getSizeInBits()) {
810 case 32:
811 case 64:
812 case 128:
813 break;
814 default:
815 return false;
816 }
817 switch (Q.Types[LitTyIdx].getSizeInBits()) {
818 case 8:
819 case 16:
820 case 32:
821 case 64:
822 return true;
823 default:
824 return false;
825 }
826 });
827 }
828
829 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
830 .unsupportedIf([=](const LegalityQuery &Query) {
831 const LLT &EltTy = Query.Types[1].getElementType();
832 return Query.Types[0] != EltTy;
833 })
834 .minScalar(2, s64)
835 .customIf([=](const LegalityQuery &Query) {
836 const LLT &VecTy = Query.Types[1];
837 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
838 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
839 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
840 })
841 .minScalarOrEltIf(
842 [=](const LegalityQuery &Query) {
843 // We want to promote to <M x s1> to <M x s64> if that wouldn't
844 // cause the total vec size to be > 128b.
845 return Query.Types[1].getNumElements() <= 2;
846 },
847 0, s64)
848 .minScalarOrEltIf(
849 [=](const LegalityQuery &Query) {
850 return Query.Types[1].getNumElements() <= 4;
851 },
852 0, s32)
853 .minScalarOrEltIf(
854 [=](const LegalityQuery &Query) {
855 return Query.Types[1].getNumElements() <= 8;
856 },
857 0, s16)
858 .minScalarOrEltIf(
859 [=](const LegalityQuery &Query) {
860 return Query.Types[1].getNumElements() <= 16;
861 },
862 0, s8)
863 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
865 .clampMaxNumElements(1, s64, 2)
866 .clampMaxNumElements(1, s32, 4)
867 .clampMaxNumElements(1, s16, 8)
868 .clampMaxNumElements(1, s8, 16)
869 .clampMaxNumElements(1, p0, 2);
870
871 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
872 .legalIf(
873 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
876 .clampNumElements(0, v8s8, v16s8)
877 .clampNumElements(0, v4s16, v8s16)
878 .clampNumElements(0, v2s32, v4s32)
879 .clampMaxNumElements(0, s64, 2)
880 .clampMaxNumElements(0, p0, 2);
881
882 getActionDefinitionsBuilder(G_BUILD_VECTOR)
883 .legalFor({{v8s8, s8},
884 {v16s8, s8},
885 {v4s16, s16},
886 {v8s16, s16},
887 {v2s32, s32},
888 {v4s32, s32},
889 {v2p0, p0},
890 {v2s64, s64}})
891 .clampNumElements(0, v4s32, v4s32)
892 .clampNumElements(0, v2s64, v2s64)
893 .minScalarOrElt(0, s8)
895 .minScalarSameAs(1, 0);
896
897 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
898
901 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
902 .scalarize(1)
903 .widenScalarToNextPow2(1, /*Min=*/32)
904 .clampScalar(1, s32, s64)
905 .scalarSameSizeAs(0, 1);
906 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
907
908 // TODO: Custom lowering for v2s32, v4s32, v2s64.
909 getActionDefinitionsBuilder(G_BITREVERSE)
910 .legalFor({s32, s64, v8s8, v16s8})
911 .widenScalarToNextPow2(0, /*Min = */ 32)
912 .clampScalar(0, s32, s64);
913
914 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
915
917 .lowerIf(isVector(0))
918 .widenScalarToNextPow2(1, /*Min=*/32)
919 .clampScalar(1, s32, s64)
920 .scalarSameSizeAs(0, 1)
921 .legalIf([=](const LegalityQuery &Query) {
922 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
923 })
924 .customIf([=](const LegalityQuery &Query) {
925 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
926 });
927
928 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
929 .legalIf([=](const LegalityQuery &Query) {
930 const LLT &DstTy = Query.Types[0];
931 const LLT &SrcTy = Query.Types[1];
932 // For now just support the TBL2 variant which needs the source vectors
933 // to be the same size as the dest.
934 if (DstTy != SrcTy)
935 return false;
936 return llvm::is_contained(
937 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
938 })
939 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
940 // just want those lowered into G_BUILD_VECTOR
941 .lowerIf([=](const LegalityQuery &Query) {
942 return !Query.Types[1].isVector();
943 })
944 .moreElementsIf(
945 [](const LegalityQuery &Query) {
946 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
947 Query.Types[0].getNumElements() >
948 Query.Types[1].getNumElements();
949 },
950 changeTo(1, 0))
952 .moreElementsIf(
953 [](const LegalityQuery &Query) {
954 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
955 Query.Types[0].getNumElements() <
956 Query.Types[1].getNumElements();
957 },
958 changeTo(0, 1))
959 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
960 .clampNumElements(0, v8s8, v16s8)
961 .clampNumElements(0, v4s16, v8s16)
962 .clampNumElements(0, v4s32, v4s32)
963 .clampNumElements(0, v2s64, v2s64);
964
965 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
966 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
967
968 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
969
970 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
971
972 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
973
974 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
975
976 if (ST.hasMOPS()) {
977 // G_BZERO is not supported. Currently it is only emitted by
978 // PreLegalizerCombiner for G_MEMSET with zero constant.
980
982 .legalForCartesianProduct({p0}, {s64}, {s64})
983 .customForCartesianProduct({p0}, {s8}, {s64})
984 .immIdx(0); // Inform verifier imm idx 0 is handled.
985
986 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
987 .legalForCartesianProduct({p0}, {p0}, {s64})
988 .immIdx(0); // Inform verifier imm idx 0 is handled.
989
990 // G_MEMCPY_INLINE does not have a tailcall immediate
991 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
992 .legalForCartesianProduct({p0}, {p0}, {s64});
993
994 } else {
995 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
996 .libcall();
997 }
998
999 // FIXME: Legal vector types are only legal with NEON.
1000 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
1001 if (HasCSSC)
1002 ABSActions
1003 .legalFor({s32, s64});
1004 ABSActions.legalFor(PackedVectorAllTypeList)
1005 .customIf([=](const LegalityQuery &Q) {
1006 // TODO: Fix suboptimal codegen for 128+ bit types.
1007 LLT SrcTy = Q.Types[0];
1008 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1009 })
1010 .widenScalarIf(
1011 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1012 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1013 .widenScalarIf(
1014 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1015 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1016 .clampNumElements(0, v8s8, v16s8)
1017 .clampNumElements(0, v4s16, v8s16)
1018 .clampNumElements(0, v2s32, v4s32)
1019 .clampNumElements(0, v2s64, v2s64)
1020 .moreElementsToNextPow2(0)
1021 .lower();
1022
1023 // For fadd reductions we have pairwise operations available. We treat the
1024 // usual legal types as legal and handle the lowering to pairwise instructions
1025 // later.
1026 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1027 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1028 .legalIf([=](const LegalityQuery &Query) {
1029 const auto &Ty = Query.Types[1];
1030 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1031 })
1032 .minScalarOrElt(0, MinFPScalar)
1033 .clampMaxNumElements(1, s64, 2)
1034 .clampMaxNumElements(1, s32, 4)
1035 .clampMaxNumElements(1, s16, 8)
1036 .lower();
1037
1038 // For fmul reductions we need to split up into individual operations. We
1039 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1040 // smaller types, followed by scalarizing what remains.
1041 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1042 .minScalarOrElt(0, MinFPScalar)
1043 .clampMaxNumElements(1, s64, 2)
1044 .clampMaxNumElements(1, s32, 4)
1045 .clampMaxNumElements(1, s16, 8)
1046 .clampMaxNumElements(1, s32, 2)
1047 .clampMaxNumElements(1, s16, 4)
1048 .scalarize(1)
1049 .lower();
1050
1051 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1052 .scalarize(2)
1053 .lower();
1054
1055 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1056 .legalFor({{s8, v16s8},
1057 {s8, v8s8},
1058 {s16, v8s16},
1059 {s16, v4s16},
1060 {s32, v4s32},
1061 {s32, v2s32},
1062 {s64, v2s64}})
1063 .clampMaxNumElements(1, s64, 2)
1064 .clampMaxNumElements(1, s32, 4)
1065 .clampMaxNumElements(1, s16, 8)
1066 .clampMaxNumElements(1, s8, 16)
1067 .lower();
1068
1069 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1070 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1071 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1072 .legalIf([=](const LegalityQuery &Query) {
1073 const auto &Ty = Query.Types[1];
1074 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1075 })
1076 .minScalarOrElt(0, MinFPScalar)
1077 .clampMaxNumElements(1, s64, 2)
1078 .clampMaxNumElements(1, s32, 4)
1079 .clampMaxNumElements(1, s16, 8)
1080 .lower();
1081
1082 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1083 .clampMaxNumElements(1, s32, 2)
1084 .clampMaxNumElements(1, s16, 4)
1085 .clampMaxNumElements(1, s8, 8)
1086 .scalarize(1)
1087 .lower();
1088
1090 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1091 .legalFor({{s8, v8s8},
1092 {s8, v16s8},
1093 {s16, v4s16},
1094 {s16, v8s16},
1095 {s32, v2s32},
1096 {s32, v4s32}})
1097 .moreElementsIf(
1098 [=](const LegalityQuery &Query) {
1099 return Query.Types[1].isVector() &&
1100 Query.Types[1].getElementType() != s8 &&
1101 Query.Types[1].getNumElements() & 1;
1102 },
1104 .clampMaxNumElements(1, s64, 2)
1105 .clampMaxNumElements(1, s32, 4)
1106 .clampMaxNumElements(1, s16, 8)
1107 .clampMaxNumElements(1, s8, 16)
1108 .scalarize(1)
1109 .lower();
1110
1112 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1113 // Try to break down into smaller vectors as long as they're at least 64
1114 // bits. This lets us use vector operations for some parts of the
1115 // reduction.
1116 .fewerElementsIf(
1117 [=](const LegalityQuery &Q) {
1118 LLT SrcTy = Q.Types[1];
1119 if (SrcTy.isScalar())
1120 return false;
1121 if (!isPowerOf2_32(SrcTy.getNumElements()))
1122 return false;
1123 // We can usually perform 64b vector operations.
1124 return SrcTy.getSizeInBits() > 64;
1125 },
1126 [=](const LegalityQuery &Q) {
1127 LLT SrcTy = Q.Types[1];
1128 return std::make_pair(1, SrcTy.divide(2));
1129 })
1130 .scalarize(1)
1131 .lower();
1132
1133 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1134 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1135 .lower();
1136
1138 .legalFor({{s32, s64}, {s64, s64}})
1139 .customIf([=](const LegalityQuery &Q) {
1140 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1141 })
1142 .lower();
1144
1145 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1146 .customFor({{s32, s32}, {s64, s64}});
1147
1148 auto always = [=](const LegalityQuery &Q) { return true; };
1149 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1150 if (HasCSSC)
1151 CTPOPActions
1152 .legalFor({{s32, s32},
1153 {s64, s64},
1154 {v8s8, v8s8},
1155 {v16s8, v16s8}})
1156 .customFor({{s128, s128},
1157 {v2s64, v2s64},
1158 {v2s32, v2s32},
1159 {v4s32, v4s32},
1160 {v4s16, v4s16},
1161 {v8s16, v8s16}});
1162 else
1163 CTPOPActions
1164 .legalFor({{v8s8, v8s8},
1165 {v16s8, v16s8}})
1166 .customFor({{s32, s32},
1167 {s64, s64},
1168 {s128, s128},
1169 {v2s64, v2s64},
1170 {v2s32, v2s32},
1171 {v4s32, v4s32},
1172 {v4s16, v4s16},
1173 {v8s16, v8s16}});
1174 CTPOPActions
1175 .clampScalar(0, s32, s128)
1176 .widenScalarToNextPow2(0)
1177 .minScalarEltSameAsIf(always, 1, 0)
1178 .maxScalarEltSameAsIf(always, 1, 0);
1179
1180 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1181 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1182 .clampNumElements(0, v8s8, v16s8)
1183 .clampNumElements(0, v4s16, v8s16)
1184 .clampNumElements(0, v2s32, v4s32)
1185 .clampMaxNumElements(0, s64, 2)
1187 .lower();
1188
1189 // TODO: Libcall support for s128.
1190 // TODO: s16 should be legal with full FP16 support.
1191 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1192 .legalFor({{s64, s32}, {s64, s64}});
1193
1194 // TODO: Custom legalization for mismatched types.
1195 getActionDefinitionsBuilder(G_FCOPYSIGN)
1197 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1198 [=](const LegalityQuery &Query) {
1199 const LLT Ty = Query.Types[0];
1200 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1201 })
1202 .lower();
1203
1205
1206 // Access to floating-point environment.
1207 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1208 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1209 .libcall();
1210
1211 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1212
1213 getActionDefinitionsBuilder(G_PREFETCH).custom();
1214
1216 verify(*ST.getInstrInfo());
1217}
1218
1221 LostDebugLocObserver &LocObserver) const {
1222 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1223 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1224 GISelChangeObserver &Observer = Helper.Observer;
1225 switch (MI.getOpcode()) {
1226 default:
1227 // No idea what to do.
1228 return false;
1229 case TargetOpcode::G_VAARG:
1230 return legalizeVaArg(MI, MRI, MIRBuilder);
1231 case TargetOpcode::G_LOAD:
1232 case TargetOpcode::G_STORE:
1233 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1234 case TargetOpcode::G_SHL:
1235 case TargetOpcode::G_ASHR:
1236 case TargetOpcode::G_LSHR:
1237 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1238 case TargetOpcode::G_GLOBAL_VALUE:
1239 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1240 case TargetOpcode::G_SBFX:
1241 case TargetOpcode::G_UBFX:
1242 return legalizeBitfieldExtract(MI, MRI, Helper);
1243 case TargetOpcode::G_FSHL:
1244 case TargetOpcode::G_FSHR:
1245 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1246 case TargetOpcode::G_ROTR:
1247 return legalizeRotate(MI, MRI, Helper);
1248 case TargetOpcode::G_CTPOP:
1249 return legalizeCTPOP(MI, MRI, Helper);
1250 case TargetOpcode::G_ATOMIC_CMPXCHG:
1251 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1252 case TargetOpcode::G_CTTZ:
1253 return legalizeCTTZ(MI, Helper);
1254 case TargetOpcode::G_BZERO:
1255 case TargetOpcode::G_MEMCPY:
1256 case TargetOpcode::G_MEMMOVE:
1257 case TargetOpcode::G_MEMSET:
1258 return legalizeMemOps(MI, Helper);
1259 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1260 return legalizeExtractVectorElt(MI, MRI, Helper);
1261 case TargetOpcode::G_DYN_STACKALLOC:
1262 return legalizeDynStackAlloc(MI, Helper);
1263 case TargetOpcode::G_PREFETCH:
1264 return legalizePrefetch(MI, Helper);
1265 case TargetOpcode::G_ABS:
1266 return Helper.lowerAbsToCNeg(MI);
1267 }
1268
1269 llvm_unreachable("expected switch to return");
1270}
1271
1272bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1274 MachineIRBuilder &MIRBuilder,
1275 GISelChangeObserver &Observer,
1276 LegalizerHelper &Helper) const {
1277 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1278 MI.getOpcode() == TargetOpcode::G_FSHR);
1279
1280 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1281 // lowering
1282 Register ShiftNo = MI.getOperand(3).getReg();
1283 LLT ShiftTy = MRI.getType(ShiftNo);
1284 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1285
1286 // Adjust shift amount according to Opcode (FSHL/FSHR)
1287 // Convert FSHL to FSHR
1288 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1289 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1290
1291 // Lower non-constant shifts and leave zero shifts to the optimizer.
1292 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1293 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1295
1296 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1297
1298 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1299
1300 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1301 // in the range of 0 <-> BitWidth, it is legal
1302 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1303 VRegAndVal->Value.ult(BitWidth))
1304 return true;
1305
1306 // Cast the ShiftNumber to a 64-bit type
1307 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1308
1309 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1310 Observer.changingInstr(MI);
1311 MI.getOperand(3).setReg(Cast64.getReg(0));
1312 Observer.changedInstr(MI);
1313 }
1314 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1315 // instruction
1316 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1317 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1318 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1319 Cast64.getReg(0)});
1320 MI.eraseFromParent();
1321 }
1322 return true;
1323}
1324
1325bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1327 LegalizerHelper &Helper) const {
1328 // To allow for imported patterns to match, we ensure that the rotate amount
1329 // is 64b with an extension.
1330 Register AmtReg = MI.getOperand(2).getReg();
1331 LLT AmtTy = MRI.getType(AmtReg);
1332 (void)AmtTy;
1333 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1334 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1335 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1336 Helper.Observer.changingInstr(MI);
1337 MI.getOperand(2).setReg(NewAmt.getReg(0));
1338 Helper.Observer.changedInstr(MI);
1339 return true;
1340}
1341
1342bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1344 GISelChangeObserver &Observer) const {
1345 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1346 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1347 // G_ADD_LOW instructions.
1348 // By splitting this here, we can optimize accesses in the small code model by
1349 // folding in the G_ADD_LOW into the load/store offset.
1350 auto &GlobalOp = MI.getOperand(1);
1351 // Don't modify an intrinsic call.
1352 if (GlobalOp.isSymbol())
1353 return true;
1354 const auto* GV = GlobalOp.getGlobal();
1355 if (GV->isThreadLocal())
1356 return true; // Don't want to modify TLS vars.
1357
1358 auto &TM = ST->getTargetLowering()->getTargetMachine();
1359 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1360
1361 if (OpFlags & AArch64II::MO_GOT)
1362 return true;
1363
1364 auto Offset = GlobalOp.getOffset();
1365 Register DstReg = MI.getOperand(0).getReg();
1366 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1367 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1368 // Set the regclass on the dest reg too.
1369 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1370
1371 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1372 // by creating a MOVK that sets bits 48-63 of the register to (global address
1373 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1374 // prevent an incorrect tag being generated during relocation when the
1375 // global appears before the code section. Without the offset, a global at
1376 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1377 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1378 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1379 // instead of `0xf`.
1380 // This assumes that we're in the small code model so we can assume a binary
1381 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1382 // binary must also be loaded into address range [0, 2^48). Both of these
1383 // properties need to be ensured at runtime when using tagged addresses.
1384 if (OpFlags & AArch64II::MO_TAGGED) {
1385 assert(!Offset &&
1386 "Should not have folded in an offset for a tagged global!");
1387 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1388 .addGlobalAddress(GV, 0x100000000,
1390 .addImm(48);
1391 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1392 }
1393
1394 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1395 .addGlobalAddress(GV, Offset,
1397 MI.eraseFromParent();
1398 return true;
1399}
1400
1402 MachineInstr &MI) const {
1403 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1404 switch (IntrinsicID) {
1405 case Intrinsic::vacopy: {
1406 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1407 unsigned VaListSize =
1408 (ST->isTargetDarwin() || ST->isTargetWindows())
1409 ? PtrSize
1410 : ST->isTargetILP32() ? 20 : 32;
1411
1412 MachineFunction &MF = *MI.getMF();
1414 LLT::scalar(VaListSize * 8));
1415 MachineIRBuilder MIB(MI);
1416 MIB.buildLoad(Val, MI.getOperand(2),
1419 VaListSize, Align(PtrSize)));
1420 MIB.buildStore(Val, MI.getOperand(1),
1423 VaListSize, Align(PtrSize)));
1424 MI.eraseFromParent();
1425 return true;
1426 }
1427 case Intrinsic::get_dynamic_area_offset: {
1428 MachineIRBuilder &MIB = Helper.MIRBuilder;
1429 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1430 MI.eraseFromParent();
1431 return true;
1432 }
1433 case Intrinsic::aarch64_mops_memset_tag: {
1434 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1435 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1436 // the instruction).
1437 MachineIRBuilder MIB(MI);
1438 auto &Value = MI.getOperand(3);
1439 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1440 Value.setReg(ExtValueReg);
1441 return true;
1442 }
1443 case Intrinsic::aarch64_prefetch: {
1444 MachineIRBuilder MIB(MI);
1445 auto &AddrVal = MI.getOperand(1);
1446
1447 int64_t IsWrite = MI.getOperand(2).getImm();
1448 int64_t Target = MI.getOperand(3).getImm();
1449 int64_t IsStream = MI.getOperand(4).getImm();
1450 int64_t IsData = MI.getOperand(5).getImm();
1451
1452 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1453 (!IsData << 3) | // IsDataCache bit
1454 (Target << 1) | // Cache level bits
1455 (unsigned)IsStream; // Stream bit
1456
1457 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1458 MI.eraseFromParent();
1459 return true;
1460 }
1461 case Intrinsic::aarch64_neon_uaddv:
1462 case Intrinsic::aarch64_neon_saddv:
1463 case Intrinsic::aarch64_neon_umaxv:
1464 case Intrinsic::aarch64_neon_smaxv:
1465 case Intrinsic::aarch64_neon_uminv:
1466 case Intrinsic::aarch64_neon_sminv: {
1467 MachineIRBuilder MIB(MI);
1468 MachineRegisterInfo &MRI = *MIB.getMRI();
1469 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1470 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1471 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1472
1473 auto OldDst = MI.getOperand(0).getReg();
1474 auto OldDstTy = MRI.getType(OldDst);
1475 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1476 if (OldDstTy == NewDstTy)
1477 return true;
1478
1479 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1480
1481 Helper.Observer.changingInstr(MI);
1482 MI.getOperand(0).setReg(NewDst);
1483 Helper.Observer.changedInstr(MI);
1484
1485 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1486 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1487 OldDst, NewDst);
1488
1489 return true;
1490 }
1491 case Intrinsic::aarch64_neon_uaddlp:
1492 case Intrinsic::aarch64_neon_saddlp: {
1493 MachineIRBuilder MIB(MI);
1494
1495 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1496 ? AArch64::G_UADDLP
1497 : AArch64::G_SADDLP;
1498 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1499 MI.eraseFromParent();
1500
1501 return true;
1502 }
1503 case Intrinsic::aarch64_neon_uaddlv:
1504 case Intrinsic::aarch64_neon_saddlv: {
1505 MachineIRBuilder MIB(MI);
1506 MachineRegisterInfo &MRI = *MIB.getMRI();
1507
1508 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1509 ? AArch64::G_UADDLV
1510 : AArch64::G_SADDLV;
1511 Register DstReg = MI.getOperand(0).getReg();
1512 Register SrcReg = MI.getOperand(2).getReg();
1513 LLT DstTy = MRI.getType(DstReg);
1514
1515 LLT MidTy, ExtTy;
1516 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1517 MidTy = LLT::fixed_vector(4, 32);
1518 ExtTy = LLT::scalar(32);
1519 } else {
1520 MidTy = LLT::fixed_vector(2, 64);
1521 ExtTy = LLT::scalar(64);
1522 }
1523
1524 Register MidReg =
1525 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1526 Register ZeroReg =
1527 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1528 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1529 {MidReg, ZeroReg})
1530 .getReg(0);
1531
1532 if (DstTy.getScalarSizeInBits() < 32)
1533 MIB.buildTrunc(DstReg, ExtReg);
1534 else
1535 MIB.buildCopy(DstReg, ExtReg);
1536
1537 MI.eraseFromParent();
1538
1539 return true;
1540 }
1541 case Intrinsic::aarch64_neon_smax:
1542 case Intrinsic::aarch64_neon_smin:
1543 case Intrinsic::aarch64_neon_umax:
1544 case Intrinsic::aarch64_neon_umin:
1545 case Intrinsic::aarch64_neon_fmax:
1546 case Intrinsic::aarch64_neon_fmin:
1547 case Intrinsic::aarch64_neon_fmaxnm:
1548 case Intrinsic::aarch64_neon_fminnm: {
1549 MachineIRBuilder MIB(MI);
1550 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1551 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1552 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1553 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1554 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1555 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1556 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1557 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1558 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1559 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1560 {MI.getOperand(2), MI.getOperand(3)});
1561 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1562 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1563 {MI.getOperand(2), MI.getOperand(3)});
1564 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1565 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1566 {MI.getOperand(2), MI.getOperand(3)});
1567 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1568 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1569 {MI.getOperand(2), MI.getOperand(3)});
1570 MI.eraseFromParent();
1571 return true;
1572 }
1573 case Intrinsic::vector_reverse:
1574 // TODO: Add support for vector_reverse
1575 return false;
1576 }
1577
1578 return true;
1579}
1580
1581bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1583 GISelChangeObserver &Observer) const {
1584 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1585 MI.getOpcode() == TargetOpcode::G_LSHR ||
1586 MI.getOpcode() == TargetOpcode::G_SHL);
1587 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1588 // imported patterns can select it later. Either way, it will be legal.
1589 Register AmtReg = MI.getOperand(2).getReg();
1590 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1591 if (!VRegAndVal)
1592 return true;
1593 // Check the shift amount is in range for an immediate form.
1594 int64_t Amount = VRegAndVal->Value.getSExtValue();
1595 if (Amount > 31)
1596 return true; // This will have to remain a register variant.
1597 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1598 Observer.changingInstr(MI);
1599 MI.getOperand(2).setReg(ExtCst.getReg(0));
1600 Observer.changedInstr(MI);
1601 return true;
1602}
1603
1606 Base = Root;
1607 Offset = 0;
1608
1609 Register NewBase;
1610 int64_t NewOffset;
1611 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1612 isShiftedInt<7, 3>(NewOffset)) {
1613 Base = NewBase;
1614 Offset = NewOffset;
1615 }
1616}
1617
1618// FIXME: This should be removed and replaced with the generic bitcast legalize
1619// action.
1620bool AArch64LegalizerInfo::legalizeLoadStore(
1622 GISelChangeObserver &Observer) const {
1623 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1624 MI.getOpcode() == TargetOpcode::G_LOAD);
1625 // Here we just try to handle vector loads/stores where our value type might
1626 // have pointer elements, which the SelectionDAG importer can't handle. To
1627 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1628 // the value to use s64 types.
1629
1630 // Custom legalization requires the instruction, if not deleted, must be fully
1631 // legalized. In order to allow further legalization of the inst, we create
1632 // a new instruction and erase the existing one.
1633
1634 Register ValReg = MI.getOperand(0).getReg();
1635 const LLT ValTy = MRI.getType(ValReg);
1636
1637 if (ValTy == LLT::scalar(128)) {
1638
1639 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1640 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1641 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1642 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1643 bool IsRcpC3 =
1644 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1645
1646 LLT s64 = LLT::scalar(64);
1647
1648 unsigned Opcode;
1649 if (IsRcpC3) {
1650 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1651 } else {
1652 // For LSE2, loads/stores should have been converted to monotonic and had
1653 // a fence inserted after them.
1654 assert(Ordering == AtomicOrdering::Monotonic ||
1655 Ordering == AtomicOrdering::Unordered);
1656 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1657
1658 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1659 }
1660
1662 if (IsLoad) {
1663 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1664 MIRBuilder.buildMergeLikeInstr(
1665 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1666 } else {
1667 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1668 NewI = MIRBuilder.buildInstr(
1669 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1670 }
1671
1672 if (IsRcpC3) {
1673 NewI.addUse(MI.getOperand(1).getReg());
1674 } else {
1675 Register Base;
1676 int Offset;
1677 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1678 NewI.addUse(Base);
1679 NewI.addImm(Offset / 8);
1680 }
1681
1682 NewI.cloneMemRefs(MI);
1684 *MRI.getTargetRegisterInfo(),
1685 *ST->getRegBankInfo());
1686 MI.eraseFromParent();
1687 return true;
1688 }
1689
1690 if (!ValTy.isPointerVector() ||
1691 ValTy.getElementType().getAddressSpace() != 0) {
1692 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1693 return false;
1694 }
1695
1696 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1697 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1698 auto &MMO = **MI.memoperands_begin();
1699 MMO.setType(NewTy);
1700
1701 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1702 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1703 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1704 } else {
1705 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1706 MIRBuilder.buildBitcast(ValReg, NewLoad);
1707 }
1708 MI.eraseFromParent();
1709 return true;
1710}
1711
1712bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1714 MachineIRBuilder &MIRBuilder) const {
1715 MachineFunction &MF = MIRBuilder.getMF();
1716 Align Alignment(MI.getOperand(2).getImm());
1717 Register Dst = MI.getOperand(0).getReg();
1718 Register ListPtr = MI.getOperand(1).getReg();
1719
1720 LLT PtrTy = MRI.getType(ListPtr);
1721 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1722
1723 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1724 const Align PtrAlign = Align(PtrSize);
1725 auto List = MIRBuilder.buildLoad(
1726 PtrTy, ListPtr,
1728 PtrTy, PtrAlign));
1729
1730 MachineInstrBuilder DstPtr;
1731 if (Alignment > PtrAlign) {
1732 // Realign the list to the actual required alignment.
1733 auto AlignMinus1 =
1734 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1735 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1736 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1737 } else
1738 DstPtr = List;
1739
1740 LLT ValTy = MRI.getType(Dst);
1741 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1742 MIRBuilder.buildLoad(
1743 Dst, DstPtr,
1745 ValTy, std::max(Alignment, PtrAlign)));
1746
1747 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1748
1749 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1750
1751 MIRBuilder.buildStore(NewList, ListPtr,
1754 PtrTy, PtrAlign));
1755
1756 MI.eraseFromParent();
1757 return true;
1758}
1759
1760bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1762 // Only legal if we can select immediate forms.
1763 // TODO: Lower this otherwise.
1764 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1765 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1766}
1767
1768bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1770 LegalizerHelper &Helper) const {
1771 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1772 // it can be more efficiently lowered to the following sequence that uses
1773 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1774 // registers are cheap.
1775 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1776 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1777 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1778 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1779 //
1780 // For 128 bit vector popcounts, we lower to the following sequence:
1781 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1782 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1783 // uaddlp.4s v0, v0 // v4s32, v2s64
1784 // uaddlp.2d v0, v0 // v2s64
1785 //
1786 // For 64 bit vector popcounts, we lower to the following sequence:
1787 // cnt.8b v0, v0 // v4s16, v2s32
1788 // uaddlp.4h v0, v0 // v4s16, v2s32
1789 // uaddlp.2s v0, v0 // v2s32
1790
1791 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1792 Register Dst = MI.getOperand(0).getReg();
1793 Register Val = MI.getOperand(1).getReg();
1794 LLT Ty = MRI.getType(Val);
1795 unsigned Size = Ty.getSizeInBits();
1796
1797 assert(Ty == MRI.getType(Dst) &&
1798 "Expected src and dst to have the same type!");
1799
1800 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1801 LLT s64 = LLT::scalar(64);
1802
1803 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1804 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1805 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1806 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1807
1808 MIRBuilder.buildZExt(Dst, Add);
1809 MI.eraseFromParent();
1810 return true;
1811 }
1812
1813 if (!ST->hasNEON() ||
1814 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1815 // Use generic lowering when custom lowering is not possible.
1816 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1817 Helper.lowerBitCount(MI) ==
1819 }
1820
1821 // Pre-conditioning: widen Val up to the nearest vector type.
1822 // s32,s64,v4s16,v2s32 -> v8i8
1823 // v8s16,v4s32,v2s64 -> v16i8
1824 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1825 if (Ty.isScalar()) {
1826 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1827 if (Size == 32) {
1828 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1829 }
1830 }
1831 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1832
1833 // Count bits in each byte-sized lane.
1834 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1835
1836 // Sum across lanes.
1837 Register HSum = CTPOP.getReg(0);
1838 unsigned Opc;
1839 SmallVector<LLT> HAddTys;
1840 if (Ty.isScalar()) {
1841 Opc = Intrinsic::aarch64_neon_uaddlv;
1842 HAddTys.push_back(LLT::scalar(32));
1843 } else if (Ty == LLT::fixed_vector(8, 16)) {
1844 Opc = Intrinsic::aarch64_neon_uaddlp;
1845 HAddTys.push_back(LLT::fixed_vector(8, 16));
1846 } else if (Ty == LLT::fixed_vector(4, 32)) {
1847 Opc = Intrinsic::aarch64_neon_uaddlp;
1848 HAddTys.push_back(LLT::fixed_vector(8, 16));
1849 HAddTys.push_back(LLT::fixed_vector(4, 32));
1850 } else if (Ty == LLT::fixed_vector(2, 64)) {
1851 Opc = Intrinsic::aarch64_neon_uaddlp;
1852 HAddTys.push_back(LLT::fixed_vector(8, 16));
1853 HAddTys.push_back(LLT::fixed_vector(4, 32));
1854 HAddTys.push_back(LLT::fixed_vector(2, 64));
1855 } else if (Ty == LLT::fixed_vector(4, 16)) {
1856 Opc = Intrinsic::aarch64_neon_uaddlp;
1857 HAddTys.push_back(LLT::fixed_vector(4, 16));
1858 } else if (Ty == LLT::fixed_vector(2, 32)) {
1859 Opc = Intrinsic::aarch64_neon_uaddlp;
1860 HAddTys.push_back(LLT::fixed_vector(4, 16));
1861 HAddTys.push_back(LLT::fixed_vector(2, 32));
1862 } else
1863 llvm_unreachable("unexpected vector shape");
1865 for (LLT HTy : HAddTys) {
1866 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
1867 HSum = UADD.getReg(0);
1868 }
1869
1870 // Post-conditioning.
1871 if (Ty.isScalar() && (Size == 64 || Size == 128))
1872 MIRBuilder.buildZExt(Dst, UADD);
1873 else
1874 UADD->getOperand(0).setReg(Dst);
1875 MI.eraseFromParent();
1876 return true;
1877}
1878
1879bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1881 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1882 LLT s64 = LLT::scalar(64);
1883 auto Addr = MI.getOperand(1).getReg();
1884 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1885 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1886 auto DstLo = MRI.createGenericVirtualRegister(s64);
1887 auto DstHi = MRI.createGenericVirtualRegister(s64);
1888
1890 if (ST->hasLSE()) {
1891 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1892 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1893 // the rest of the MIR so we must reassemble the extracted registers into a
1894 // 128-bit known-regclass one with code like this:
1895 //
1896 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1897 // %out = CASP %in1, ...
1898 // %OldLo = G_EXTRACT %out, 0
1899 // %OldHi = G_EXTRACT %out, 64
1900 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1901 unsigned Opcode;
1902 switch (Ordering) {
1904 Opcode = AArch64::CASPAX;
1905 break;
1907 Opcode = AArch64::CASPLX;
1908 break;
1911 Opcode = AArch64::CASPALX;
1912 break;
1913 default:
1914 Opcode = AArch64::CASPX;
1915 break;
1916 }
1917
1918 LLT s128 = LLT::scalar(128);
1919 auto CASDst = MRI.createGenericVirtualRegister(s128);
1920 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1921 auto CASNew = MRI.createGenericVirtualRegister(s128);
1922 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1923 .addUse(DesiredI->getOperand(0).getReg())
1924 .addImm(AArch64::sube64)
1925 .addUse(DesiredI->getOperand(1).getReg())
1926 .addImm(AArch64::subo64);
1927 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1928 .addUse(NewI->getOperand(0).getReg())
1929 .addImm(AArch64::sube64)
1930 .addUse(NewI->getOperand(1).getReg())
1931 .addImm(AArch64::subo64);
1932
1933 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1934
1935 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1936 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1937 } else {
1938 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1939 // can take arbitrary registers so it just has the normal GPR64 operands the
1940 // rest of AArch64 is expecting.
1941 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1942 unsigned Opcode;
1943 switch (Ordering) {
1945 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1946 break;
1948 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1949 break;
1952 Opcode = AArch64::CMP_SWAP_128;
1953 break;
1954 default:
1955 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1956 break;
1957 }
1958
1959 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1960 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1961 {Addr, DesiredI->getOperand(0),
1962 DesiredI->getOperand(1), NewI->getOperand(0),
1963 NewI->getOperand(1)});
1964 }
1965
1966 CAS.cloneMemRefs(MI);
1968 *MRI.getTargetRegisterInfo(),
1969 *ST->getRegBankInfo());
1970
1971 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
1972 MI.eraseFromParent();
1973 return true;
1974}
1975
1976bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1977 LegalizerHelper &Helper) const {
1978 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1979 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1980 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1981 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1982 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1983 MI.eraseFromParent();
1984 return true;
1985}
1986
1987bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
1988 LegalizerHelper &Helper) const {
1989 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1990
1991 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
1992 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
1993 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1994 // the instruction).
1995 auto &Value = MI.getOperand(1);
1996 Register ExtValueReg =
1997 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1998 Value.setReg(ExtValueReg);
1999 return true;
2000 }
2001
2002 return false;
2003}
2004
2005bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2007 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2008 auto VRegAndVal =
2009 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2010 if (VRegAndVal)
2011 return true;
2012 return Helper.lowerExtractInsertVectorElt(MI) !=
2014}
2015
2016bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2017 MachineInstr &MI, LegalizerHelper &Helper) const {
2018 MachineFunction &MF = *MI.getParent()->getParent();
2019 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2020 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2021
2022 // If stack probing is not enabled for this function, use the default
2023 // lowering.
2024 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2025 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2026 "inline-asm") {
2027 Helper.lowerDynStackAlloc(MI);
2028 return true;
2029 }
2030
2031 Register Dst = MI.getOperand(0).getReg();
2032 Register AllocSize = MI.getOperand(1).getReg();
2033 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2034
2035 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2036 "Unexpected type for dynamic alloca");
2037 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2038 "Unexpected type for dynamic alloca");
2039
2040 LLT PtrTy = MRI.getType(Dst);
2041 Register SPReg =
2043 Register SPTmp =
2044 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2045 auto NewMI =
2046 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2047 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2048 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2049 MIRBuilder.buildCopy(Dst, SPTmp);
2050
2051 MI.eraseFromParent();
2052 return true;
2053}
2054
2055bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2056 LegalizerHelper &Helper) const {
2057 MachineIRBuilder &MIB = Helper.MIRBuilder;
2058 auto &AddrVal = MI.getOperand(0);
2059
2060 int64_t IsWrite = MI.getOperand(1).getImm();
2061 int64_t Locality = MI.getOperand(2).getImm();
2062 int64_t IsData = MI.getOperand(3).getImm();
2063
2064 bool IsStream = Locality == 0;
2065 if (Locality != 0) {
2066 assert(Locality <= 3 && "Prefetch locality out-of-range");
2067 // The locality degree is the opposite of the cache speed.
2068 // Put the number the other way around.
2069 // The encoding starts at 0 for level 1
2070 Locality = 3 - Locality;
2071 }
2072
2073 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2074
2075 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2076 MI.eraseFromParent();
2077 return true;
2078}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:415
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...