LLVM 19.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
65 v16s8, v8s16, v4s32,
66 v2s64, v2p0,
67 /* End 128bit types */
68 /* Begin 64bit types */
69 v8s8, v4s16, v2s32};
70 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
71 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
72 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
73
74 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
75
76 // FIXME: support subtargets which have neon/fp-armv8 disabled.
77 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
79 return;
80 }
81
82 // Some instructions only support s16 if the subtarget has full 16-bit FP
83 // support.
84 const bool HasFP16 = ST.hasFullFP16();
85 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
86
87 const bool HasCSSC = ST.hasCSSC();
88 const bool HasRCPC3 = ST.hasRCPC3();
89
91 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
92 .legalFor({p0, s8, s16, s32, s64})
93 .legalFor(PackedVectorAllTypeList)
95 .clampScalar(0, s8, s64)
98 .clampNumElements(0, v8s8, v16s8)
99 .clampNumElements(0, v4s16, v8s16)
100 .clampNumElements(0, v2s32, v4s32)
101 .clampNumElements(0, v2s64, v2s64);
102
104 .legalFor({p0, s16, s32, s64})
105 .legalFor(PackedVectorAllTypeList)
107 .clampScalar(0, s16, s64)
108 // Maximum: sN * k = 128
109 .clampMaxNumElements(0, s8, 16)
110 .clampMaxNumElements(0, s16, 8)
111 .clampMaxNumElements(0, s32, 4)
112 .clampMaxNumElements(0, s64, 2)
113 .clampMaxNumElements(0, p0, 2);
114
116 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
118 .clampScalar(0, s32, s64)
119 .clampNumElements(0, v4s16, v8s16)
120 .clampNumElements(0, v2s32, v4s32)
121 .clampNumElements(0, v2s64, v2s64)
122 .moreElementsToNextPow2(0);
123
124 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
125 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
126 .widenScalarToNextPow2(0)
127 .clampScalar(0, s32, s64)
128 .clampMaxNumElements(0, s8, 16)
129 .clampMaxNumElements(0, s16, 8)
130 .clampNumElements(0, v2s32, v4s32)
131 .clampNumElements(0, v2s64, v2s64)
133 [=](const LegalityQuery &Query) {
134 return Query.Types[0].getNumElements() <= 2;
135 },
136 0, s32)
137 .minScalarOrEltIf(
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 4;
140 },
141 0, s16)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 16;
145 },
146 0, s8)
148
149 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
150 .customIf([=](const LegalityQuery &Query) {
151 const auto &SrcTy = Query.Types[0];
152 const auto &AmtTy = Query.Types[1];
153 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
154 AmtTy.getSizeInBits() == 32;
155 })
156 .legalFor({
157 {s32, s32},
158 {s32, s64},
159 {s64, s64},
160 {v8s8, v8s8},
161 {v16s8, v16s8},
162 {v4s16, v4s16},
163 {v8s16, v8s16},
164 {v2s32, v2s32},
165 {v4s32, v4s32},
166 {v2s64, v2s64},
167 })
168 .widenScalarToNextPow2(0)
169 .clampScalar(1, s32, s64)
170 .clampScalar(0, s32, s64)
171 .clampNumElements(0, v8s8, v16s8)
172 .clampNumElements(0, v4s16, v8s16)
173 .clampNumElements(0, v2s32, v4s32)
174 .clampNumElements(0, v2s64, v2s64)
176 .minScalarSameAs(1, 0);
177
179 .legalFor({{p0, s64}, {v2p0, v2s64}})
180 .clampScalarOrElt(1, s64, s64)
181 .clampNumElements(0, v2p0, v2p0);
182
183 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
184
185 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
186 .legalFor({s32, s64})
187 .libcallFor({s128})
188 .clampScalar(0, s32, s64)
190 .scalarize(0);
191
192 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
193 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
195 .clampScalarOrElt(0, s32, s64)
196 .clampNumElements(0, v2s32, v4s32)
197 .clampNumElements(0, v2s64, v2s64)
198 .moreElementsToNextPow2(0);
199
200
201 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
202 .widenScalarToNextPow2(0, /*Min = */ 32)
203 .clampScalar(0, s32, s64)
204 .lower();
205
206 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
207 .legalFor({s64, v8s16, v16s8, v4s32})
208 .lower();
209
210 auto &MinMaxActions = getActionDefinitionsBuilder(
211 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
212 if (HasCSSC)
213 MinMaxActions
214 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
215 // Making clamping conditional on CSSC extension as without legal types we
216 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
217 // if we detect a type smaller than 32-bit.
218 .minScalar(0, s32);
219 else
220 MinMaxActions
221 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
222 MinMaxActions
223 .clampNumElements(0, v8s8, v16s8)
224 .clampNumElements(0, v4s16, v8s16)
225 .clampNumElements(0, v2s32, v4s32)
226 // FIXME: This sholdn't be needed as v2s64 types are going to
227 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
228 .clampNumElements(0, v2s64, v2s64)
229 .lower();
230
232 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
233 .legalFor({{s32, s32}, {s64, s32}})
234 .clampScalar(0, s32, s64)
235 .clampScalar(1, s32, s64)
237
238 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
239 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
240 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
241 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
242 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
243 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
244 .legalIf([=](const LegalityQuery &Query) {
245 const auto &Ty = Query.Types[0];
246 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
247 })
248 .libcallFor({s128})
249 .minScalarOrElt(0, MinFPScalar)
250 .clampNumElements(0, v4s16, v8s16)
251 .clampNumElements(0, v2s32, v4s32)
252 .clampNumElements(0, v2s64, v2s64)
254
256 .libcallFor({s32, s64})
257 .minScalar(0, s32)
258 .scalarize(0);
259
260 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
261 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
262 .libcallFor({{s64, s128}})
263 .minScalarOrElt(1, MinFPScalar);
264
266 {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10,
267 G_FEXP, G_FEXP2, G_FEXP10})
268 // We need a call for these, so we always need to scalarize.
269 .scalarize(0)
270 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
271 .minScalar(0, s32)
272 .libcallFor({s32, s64});
274 .scalarize(0)
275 .minScalar(0, s32)
276 .libcallFor({{s32, s32}, {s64, s32}});
277
279 .legalIf(all(typeInSet(0, {s32, s64, p0}),
280 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
282 .clampScalar(0, s32, s64)
284 .minScalar(1, s8)
285 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
286 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
287
289 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
290 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
292 .clampScalar(1, s32, s128)
294 .minScalar(0, s16)
295 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
296 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
297 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
298
299
300 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
301 auto &Actions = getActionDefinitionsBuilder(Op);
302
303 if (Op == G_SEXTLOAD)
305
306 // Atomics have zero extending behavior.
307 Actions
308 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
309 {s32, p0, s16, 8},
310 {s32, p0, s32, 8},
311 {s64, p0, s8, 2},
312 {s64, p0, s16, 2},
313 {s64, p0, s32, 4},
314 {s64, p0, s64, 8},
315 {p0, p0, s64, 8},
316 {v2s32, p0, s64, 8}})
317 .widenScalarToNextPow2(0)
318 .clampScalar(0, s32, s64)
319 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
320 // how to do that yet.
321 .unsupportedIfMemSizeNotPow2()
322 // Lower anything left over into G_*EXT and G_LOAD
323 .lower();
324 }
325
326 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
327 const LLT &ValTy = Query.Types[0];
328 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
329 };
330
332 .customIf([=](const LegalityQuery &Query) {
333 return HasRCPC3 && Query.Types[0] == s128 &&
334 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
335 })
336 .customIf([=](const LegalityQuery &Query) {
337 return Query.Types[0] == s128 &&
338 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
339 })
340 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
341 {s16, p0, s16, 8},
342 {s32, p0, s32, 8},
343 {s64, p0, s64, 8},
344 {p0, p0, s64, 8},
345 {s128, p0, s128, 8},
346 {v8s8, p0, s64, 8},
347 {v16s8, p0, s128, 8},
348 {v4s16, p0, s64, 8},
349 {v8s16, p0, s128, 8},
350 {v2s32, p0, s64, 8},
351 {v4s32, p0, s128, 8},
352 {v2s64, p0, s128, 8}})
353 // These extends are also legal
354 .legalForTypesWithMemDesc(
355 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
356 .widenScalarToNextPow2(0, /* MinSize = */ 8)
357 .clampMaxNumElements(0, s8, 16)
358 .clampMaxNumElements(0, s16, 8)
359 .clampMaxNumElements(0, s32, 4)
360 .clampMaxNumElements(0, s64, 2)
361 .clampMaxNumElements(0, p0, 2)
363 .clampScalar(0, s8, s64)
365 [=](const LegalityQuery &Query) {
366 // Clamp extending load results to 32-bits.
367 return Query.Types[0].isScalar() &&
368 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
369 Query.Types[0].getSizeInBits() > 32;
370 },
371 changeTo(0, s32))
372 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
373 .bitcastIf(typeInSet(0, {v4s8}),
374 [=](const LegalityQuery &Query) {
375 const LLT VecTy = Query.Types[0];
376 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
377 })
378 .customIf(IsPtrVecPred)
379 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
380
382 .customIf([=](const LegalityQuery &Query) {
383 return HasRCPC3 && Query.Types[0] == s128 &&
384 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
385 })
386 .customIf([=](const LegalityQuery &Query) {
387 return Query.Types[0] == s128 &&
388 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
389 })
390 .legalForTypesWithMemDesc(
391 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
392 {s32, p0, s8, 8}, // truncstorei8 from s32
393 {s64, p0, s8, 8}, // truncstorei8 from s64
394 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
395 {s64, p0, s16, 8}, // truncstorei16 from s64
396 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
397 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
398 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
399 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
400 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
401 .clampScalar(0, s8, s64)
402 .lowerIf([=](const LegalityQuery &Query) {
403 return Query.Types[0].isScalar() &&
404 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
405 })
406 // Maximum: sN * k = 128
407 .clampMaxNumElements(0, s8, 16)
408 .clampMaxNumElements(0, s16, 8)
409 .clampMaxNumElements(0, s32, 4)
410 .clampMaxNumElements(0, s64, 2)
411 .clampMaxNumElements(0, p0, 2)
413 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
414 .bitcastIf(typeInSet(0, {v4s8}),
415 [=](const LegalityQuery &Query) {
416 const LLT VecTy = Query.Types[0];
417 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
418 })
419 .customIf(IsPtrVecPred)
420 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
421
422 getActionDefinitionsBuilder(G_INDEXED_STORE)
423 // Idx 0 == Ptr, Idx 1 == Val
424 // TODO: we can implement legalizations but as of now these are
425 // generated in a very specific way.
427 {p0, s8, s8, 8},
428 {p0, s16, s16, 8},
429 {p0, s32, s8, 8},
430 {p0, s32, s16, 8},
431 {p0, s32, s32, 8},
432 {p0, s64, s64, 8},
433 {p0, p0, p0, 8},
434 {p0, v8s8, v8s8, 8},
435 {p0, v16s8, v16s8, 8},
436 {p0, v4s16, v4s16, 8},
437 {p0, v8s16, v8s16, 8},
438 {p0, v2s32, v2s32, 8},
439 {p0, v4s32, v4s32, 8},
440 {p0, v2s64, v2s64, 8},
441 {p0, v2p0, v2p0, 8},
442 {p0, s128, s128, 8},
443 })
444 .unsupported();
445
446 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
447 LLT LdTy = Query.Types[0];
448 LLT PtrTy = Query.Types[1];
449 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
450 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
451 return false;
452 if (PtrTy != p0)
453 return false;
454 return true;
455 };
456 getActionDefinitionsBuilder(G_INDEXED_LOAD)
459 .legalIf(IndexedLoadBasicPred)
460 .unsupported();
461 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
462 .unsupportedIf(
464 .legalIf(all(typeInSet(0, {s16, s32, s64}),
465 LegalityPredicate([=](const LegalityQuery &Q) {
466 LLT LdTy = Q.Types[0];
467 LLT PtrTy = Q.Types[1];
468 LLT MemTy = Q.MMODescrs[0].MemoryTy;
469 if (PtrTy != p0)
470 return false;
471 if (LdTy == s16)
472 return MemTy == s8;
473 if (LdTy == s32)
474 return MemTy == s8 || MemTy == s16;
475 if (LdTy == s64)
476 return MemTy == s8 || MemTy == s16 || MemTy == s32;
477 return false;
478 })))
479 .unsupported();
480
481 // Constants
483 .legalFor({p0, s8, s16, s32, s64})
484 .widenScalarToNextPow2(0)
485 .clampScalar(0, s8, s64);
486 getActionDefinitionsBuilder(G_FCONSTANT)
487 .legalIf([=](const LegalityQuery &Query) {
488 const auto &Ty = Query.Types[0];
489 if (HasFP16 && Ty == s16)
490 return true;
491 return Ty == s32 || Ty == s64 || Ty == s128;
492 })
493 .clampScalar(0, MinFPScalar, s128);
494
495 // FIXME: fix moreElementsToNextPow2
497 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
499 .clampScalar(1, s32, s64)
500 .clampScalar(0, s32, s32)
501 .minScalarEltSameAsIf(
502 [=](const LegalityQuery &Query) {
503 const LLT &Ty = Query.Types[0];
504 const LLT &SrcTy = Query.Types[1];
505 return Ty.isVector() && !SrcTy.isPointerVector() &&
506 Ty.getElementType() != SrcTy.getElementType();
507 },
508 0, 1)
509 .minScalarOrEltIf(
510 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
511 1, s32)
512 .minScalarOrEltIf(
513 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
514 s64)
516 .clampNumElements(1, v8s8, v16s8)
517 .clampNumElements(1, v4s16, v8s16)
518 .clampNumElements(1, v2s32, v4s32)
519 .clampNumElements(1, v2s64, v2s64)
520 .customIf(isVector(0));
521
523 .legalFor({{s32, MinFPScalar},
524 {s32, s32},
525 {s32, s64},
526 {v4s32, v4s32},
527 {v2s32, v2s32},
528 {v2s64, v2s64}})
529 .legalIf([=](const LegalityQuery &Query) {
530 const auto &Ty = Query.Types[1];
531 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
532 })
534 .clampScalar(0, s32, s32)
535 .clampScalarOrElt(1, MinFPScalar, s64)
536 .minScalarEltSameAsIf(
537 [=](const LegalityQuery &Query) {
538 const LLT &Ty = Query.Types[0];
539 const LLT &SrcTy = Query.Types[1];
540 return Ty.isVector() && !SrcTy.isPointerVector() &&
541 Ty.getElementType() != SrcTy.getElementType();
542 },
543 0, 1)
544 .clampNumElements(1, v4s16, v8s16)
545 .clampNumElements(1, v2s32, v4s32)
546 .clampMaxNumElements(1, s64, 2)
547 .moreElementsToNextPow2(1);
548
549 // Extensions
550 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
551 unsigned DstSize = Query.Types[0].getSizeInBits();
552
553 // Handle legal vectors using legalFor
554 if (Query.Types[0].isVector())
555 return false;
556
557 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
558 return false; // Extending to a scalar s128 needs narrowing.
559
560 const LLT &SrcTy = Query.Types[1];
561
562 // Make sure we fit in a register otherwise. Don't bother checking that
563 // the source type is below 128 bits. We shouldn't be allowing anything
564 // through which is wider than the destination in the first place.
565 unsigned SrcSize = SrcTy.getSizeInBits();
566 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
567 return false;
568
569 return true;
570 };
571 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
572 .legalIf(ExtLegalFunc)
573 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
574 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
576 .clampMaxNumElements(1, s8, 8)
577 .clampMaxNumElements(1, s16, 4)
578 .clampMaxNumElements(1, s32, 2)
579 // Tries to convert a large EXTEND into two smaller EXTENDs
580 .lowerIf([=](const LegalityQuery &Query) {
581 return (Query.Types[0].getScalarSizeInBits() >
582 Query.Types[1].getScalarSizeInBits() * 2) &&
583 Query.Types[0].isVector() &&
584 (Query.Types[1].getScalarSizeInBits() == 8 ||
585 Query.Types[1].getScalarSizeInBits() == 16);
586 })
587 .clampMinNumElements(1, s8, 8)
588 .clampMinNumElements(1, s16, 4);
589
591 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
593 .clampMaxNumElements(0, s8, 8)
594 .clampMaxNumElements(0, s16, 4)
595 .clampMaxNumElements(0, s32, 2)
596 .minScalarOrEltIf(
597 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
598 0, s8)
599 .lowerIf([=](const LegalityQuery &Query) {
600 LLT DstTy = Query.Types[0];
601 LLT SrcTy = Query.Types[1];
602 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
603 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
604 })
605 .clampMinNumElements(0, s8, 8)
606 .clampMinNumElements(0, s16, 4)
607 .alwaysLegal();
608
609 getActionDefinitionsBuilder(G_SEXT_INREG)
610 .legalFor({s32, s64})
611 .legalFor(PackedVectorAllTypeList)
612 .maxScalar(0, s64)
613 .clampNumElements(0, v8s8, v16s8)
614 .clampNumElements(0, v4s16, v8s16)
615 .clampNumElements(0, v2s32, v4s32)
616 .clampMaxNumElements(0, s64, 2)
617 .lower();
618
619 // FP conversions
621 .legalFor(
622 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
623 .clampNumElements(0, v4s16, v4s16)
624 .clampNumElements(0, v2s32, v2s32)
625 .scalarize(0);
626
628 .legalFor(
629 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
630 .clampNumElements(0, v4s32, v4s32)
631 .clampNumElements(0, v2s64, v2s64)
632 .scalarize(0);
633
634 // Conversions
635 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
636 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
637 .legalIf([=](const LegalityQuery &Query) {
638 return HasFP16 &&
639 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
640 Query.Types[1] == v8s16) &&
641 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
642 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
643 })
644 .widenScalarToNextPow2(0)
645 .clampScalar(0, s32, s64)
647 .clampScalarOrElt(1, MinFPScalar, s64)
650 [=](const LegalityQuery &Query) {
651 return Query.Types[0].getScalarSizeInBits() >
652 Query.Types[1].getScalarSizeInBits();
653 },
655 .widenScalarIf(
656 [=](const LegalityQuery &Query) {
657 return Query.Types[0].getScalarSizeInBits() <
658 Query.Types[1].getScalarSizeInBits();
659 },
661 .clampNumElements(0, v4s16, v8s16)
662 .clampNumElements(0, v2s32, v4s32)
663 .clampMaxNumElements(0, s64, 2);
664
665 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
666 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
667 .legalIf([=](const LegalityQuery &Query) {
668 return HasFP16 &&
669 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
670 Query.Types[0] == v8s16) &&
671 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
672 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
673 })
674 .widenScalarToNextPow2(1)
675 .clampScalar(1, s32, s64)
677 .clampScalarOrElt(0, MinFPScalar, s64)
680 [=](const LegalityQuery &Query) {
681 return Query.Types[0].getScalarSizeInBits() <
682 Query.Types[1].getScalarSizeInBits();
683 },
685 .widenScalarIf(
686 [=](const LegalityQuery &Query) {
687 return Query.Types[0].getScalarSizeInBits() >
688 Query.Types[1].getScalarSizeInBits();
689 },
691 .clampNumElements(0, v4s16, v8s16)
692 .clampNumElements(0, v2s32, v4s32)
693 .clampMaxNumElements(0, s64, 2);
694
695 // Control-flow
697 .legalFor({s32})
698 .clampScalar(0, s32, s32);
699 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
700
702 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
703 .widenScalarToNextPow2(0)
704 .clampScalar(0, s32, s64)
705 .clampScalar(1, s32, s32)
707 .lowerIf(isVector(0));
708
709 // Pointer-handling
710 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
711
712 if (TM.getCodeModel() == CodeModel::Small)
713 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
714 else
715 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
716
718 .legalFor({{s64, p0}, {v2s64, v2p0}})
719 .widenScalarToNextPow2(0, 64)
720 .clampScalar(0, s64, s64);
721
723 .unsupportedIf([&](const LegalityQuery &Query) {
724 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
725 })
726 .legalFor({{p0, s64}, {v2p0, v2s64}});
727
728 // Casts for 32 and 64-bit width type are just copies.
729 // Same for 128-bit width type, except they are on the FPR bank.
731 // Keeping 32-bit instructions legal to prevent regression in some tests
732 .legalForCartesianProduct({s32, v2s16, v4s8})
733 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
734 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
735 .lowerIf([=](const LegalityQuery &Query) {
736 return Query.Types[0].isVector() != Query.Types[1].isVector();
737 })
739 .clampNumElements(0, v8s8, v16s8)
740 .clampNumElements(0, v4s16, v8s16)
741 .clampNumElements(0, v2s32, v4s32)
742 .lower();
743
744 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
745
746 // va_list must be a pointer, but most sized types are pretty easy to handle
747 // as the destination.
749 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
750 .clampScalar(0, s8, s64)
751 .widenScalarToNextPow2(0, /*Min*/ 8);
752
753 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
754 .lowerIf(
755 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
756
757 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
758 return ST.outlineAtomics() && !ST.hasLSE();
759 };
760
761 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
762 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
763 predNot(UseOutlineAtomics)))
764 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
765 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
766 return Query.Types[0].getSizeInBits() == 128 &&
767 !UseOutlineAtomics(Query);
768 })
769 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
770 UseOutlineAtomics))
771 .clampScalar(0, s32, s64);
772
773 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
774 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
775 G_ATOMICRMW_XOR})
776 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
777 predNot(UseOutlineAtomics)))
778 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
779 UseOutlineAtomics))
780 .clampScalar(0, s32, s64);
781
782 // Do not outline these atomics operations, as per comment in
783 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
785 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
786 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
787 .clampScalar(0, s32, s64);
788
789 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
790
791 // Merge/Unmerge
792 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
793 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
794 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
796 .widenScalarToNextPow2(LitTyIdx, 8)
797 .widenScalarToNextPow2(BigTyIdx, 32)
798 .clampScalar(LitTyIdx, s8, s64)
799 .clampScalar(BigTyIdx, s32, s128)
800 .legalIf([=](const LegalityQuery &Q) {
801 switch (Q.Types[BigTyIdx].getSizeInBits()) {
802 case 32:
803 case 64:
804 case 128:
805 break;
806 default:
807 return false;
808 }
809 switch (Q.Types[LitTyIdx].getSizeInBits()) {
810 case 8:
811 case 16:
812 case 32:
813 case 64:
814 return true;
815 default:
816 return false;
817 }
818 });
819 }
820
821 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
822 .unsupportedIf([=](const LegalityQuery &Query) {
823 const LLT &EltTy = Query.Types[1].getElementType();
824 return Query.Types[0] != EltTy;
825 })
826 .minScalar(2, s64)
827 .customIf([=](const LegalityQuery &Query) {
828 const LLT &VecTy = Query.Types[1];
829 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
830 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
831 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
832 })
833 .minScalarOrEltIf(
834 [=](const LegalityQuery &Query) {
835 // We want to promote to <M x s1> to <M x s64> if that wouldn't
836 // cause the total vec size to be > 128b.
837 return Query.Types[1].getNumElements() <= 2;
838 },
839 0, s64)
840 .minScalarOrEltIf(
841 [=](const LegalityQuery &Query) {
842 return Query.Types[1].getNumElements() <= 4;
843 },
844 0, s32)
845 .minScalarOrEltIf(
846 [=](const LegalityQuery &Query) {
847 return Query.Types[1].getNumElements() <= 8;
848 },
849 0, s16)
850 .minScalarOrEltIf(
851 [=](const LegalityQuery &Query) {
852 return Query.Types[1].getNumElements() <= 16;
853 },
854 0, s8)
855 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
857 .clampMaxNumElements(1, s64, 2)
858 .clampMaxNumElements(1, s32, 4)
859 .clampMaxNumElements(1, s16, 8)
860 .clampMaxNumElements(1, s8, 16)
861 .clampMaxNumElements(1, p0, 2);
862
863 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
864 .legalIf(
865 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
868 .clampNumElements(0, v8s8, v16s8)
869 .clampNumElements(0, v4s16, v8s16)
870 .clampNumElements(0, v2s32, v4s32)
871 .clampMaxNumElements(0, s64, 2)
872 .clampMaxNumElements(0, p0, 2);
873
874 getActionDefinitionsBuilder(G_BUILD_VECTOR)
875 .legalFor({{v8s8, s8},
876 {v16s8, s8},
877 {v4s16, s16},
878 {v8s16, s16},
879 {v2s32, s32},
880 {v4s32, s32},
881 {v2p0, p0},
882 {v2s64, s64}})
883 .clampNumElements(0, v4s32, v4s32)
884 .clampNumElements(0, v2s64, v2s64)
885 .minScalarOrElt(0, s8)
887 .minScalarSameAs(1, 0);
888
889 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
890
893 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
894 .scalarize(1)
895 .widenScalarToNextPow2(1, /*Min=*/32)
896 .clampScalar(1, s32, s64)
897 .scalarSameSizeAs(0, 1);
898 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
899
900 // TODO: Custom lowering for v2s32, v4s32, v2s64.
901 getActionDefinitionsBuilder(G_BITREVERSE)
902 .legalFor({s32, s64, v8s8, v16s8})
903 .widenScalarToNextPow2(0, /*Min = */ 32)
904 .clampScalar(0, s32, s64);
905
906 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
907
909 .lowerIf(isVector(0))
910 .widenScalarToNextPow2(1, /*Min=*/32)
911 .clampScalar(1, s32, s64)
912 .scalarSameSizeAs(0, 1)
913 .legalIf([=](const LegalityQuery &Query) {
914 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
915 })
916 .customIf([=](const LegalityQuery &Query) {
917 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
918 });
919
920 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
921 .legalIf([=](const LegalityQuery &Query) {
922 const LLT &DstTy = Query.Types[0];
923 const LLT &SrcTy = Query.Types[1];
924 // For now just support the TBL2 variant which needs the source vectors
925 // to be the same size as the dest.
926 if (DstTy != SrcTy)
927 return false;
928 return llvm::is_contained(
929 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
930 })
931 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
932 // just want those lowered into G_BUILD_VECTOR
933 .lowerIf([=](const LegalityQuery &Query) {
934 return !Query.Types[1].isVector();
935 })
936 .moreElementsIf(
937 [](const LegalityQuery &Query) {
938 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
939 Query.Types[0].getNumElements() >
940 Query.Types[1].getNumElements();
941 },
942 changeTo(1, 0))
944 .moreElementsIf(
945 [](const LegalityQuery &Query) {
946 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
947 Query.Types[0].getNumElements() <
948 Query.Types[1].getNumElements();
949 },
950 changeTo(0, 1))
951 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
952 .clampNumElements(0, v8s8, v16s8)
953 .clampNumElements(0, v4s16, v8s16)
954 .clampNumElements(0, v4s32, v4s32)
955 .clampNumElements(0, v2s64, v2s64);
956
957 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
958 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
959
960 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
961
962 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
963
964 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
965
966 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
967
968 if (ST.hasMOPS()) {
969 // G_BZERO is not supported. Currently it is only emitted by
970 // PreLegalizerCombiner for G_MEMSET with zero constant.
972
974 .legalForCartesianProduct({p0}, {s64}, {s64})
975 .customForCartesianProduct({p0}, {s8}, {s64})
976 .immIdx(0); // Inform verifier imm idx 0 is handled.
977
978 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
979 .legalForCartesianProduct({p0}, {p0}, {s64})
980 .immIdx(0); // Inform verifier imm idx 0 is handled.
981
982 // G_MEMCPY_INLINE does not have a tailcall immediate
983 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
984 .legalForCartesianProduct({p0}, {p0}, {s64});
985
986 } else {
987 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
988 .libcall();
989 }
990
991 // FIXME: Legal vector types are only legal with NEON.
992 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
993 if (HasCSSC)
994 ABSActions
995 .legalFor({s32, s64});
996 ABSActions.legalFor(PackedVectorAllTypeList)
997 .customIf([=](const LegalityQuery &Q) {
998 // TODO: Fix suboptimal codegen for 128+ bit types.
999 LLT SrcTy = Q.Types[0];
1000 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1001 })
1002 .widenScalarIf(
1003 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1004 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1005 .widenScalarIf(
1006 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1007 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1008 .clampNumElements(0, v8s8, v16s8)
1009 .clampNumElements(0, v4s16, v8s16)
1010 .clampNumElements(0, v2s32, v4s32)
1011 .clampNumElements(0, v2s64, v2s64)
1012 .moreElementsToNextPow2(0)
1013 .lower();
1014
1015 // For fadd reductions we have pairwise operations available. We treat the
1016 // usual legal types as legal and handle the lowering to pairwise instructions
1017 // later.
1018 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1019 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1020 .legalIf([=](const LegalityQuery &Query) {
1021 const auto &Ty = Query.Types[1];
1022 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1023 })
1024 .minScalarOrElt(0, MinFPScalar)
1025 .clampMaxNumElements(1, s64, 2)
1026 .clampMaxNumElements(1, s32, 4)
1027 .clampMaxNumElements(1, s16, 8)
1028 .lower();
1029
1030 // For fmul reductions we need to split up into individual operations. We
1031 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1032 // smaller types, followed by scalarizing what remains.
1033 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1034 .minScalarOrElt(0, MinFPScalar)
1035 .clampMaxNumElements(1, s64, 2)
1036 .clampMaxNumElements(1, s32, 4)
1037 .clampMaxNumElements(1, s16, 8)
1038 .clampMaxNumElements(1, s32, 2)
1039 .clampMaxNumElements(1, s16, 4)
1040 .scalarize(1)
1041 .lower();
1042
1043 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1044 .scalarize(2)
1045 .lower();
1046
1047 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1048 .legalFor({{s8, v16s8},
1049 {s8, v8s8},
1050 {s16, v8s16},
1051 {s16, v4s16},
1052 {s32, v4s32},
1053 {s32, v2s32},
1054 {s64, v2s64}})
1055 .clampMaxNumElements(1, s64, 2)
1056 .clampMaxNumElements(1, s32, 4)
1057 .clampMaxNumElements(1, s16, 8)
1058 .clampMaxNumElements(1, s8, 16)
1059 .lower();
1060
1061 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1062 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1063 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1064 .legalIf([=](const LegalityQuery &Query) {
1065 const auto &Ty = Query.Types[1];
1066 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1067 })
1068 .minScalarOrElt(0, MinFPScalar)
1069 .clampMaxNumElements(1, s64, 2)
1070 .clampMaxNumElements(1, s32, 4)
1071 .clampMaxNumElements(1, s16, 8)
1072 .lower();
1073
1074 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1075 .clampMaxNumElements(1, s32, 2)
1076 .clampMaxNumElements(1, s16, 4)
1077 .clampMaxNumElements(1, s8, 8)
1078 .scalarize(1)
1079 .lower();
1080
1082 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1083 .legalFor({{s8, v8s8},
1084 {s8, v16s8},
1085 {s16, v4s16},
1086 {s16, v8s16},
1087 {s32, v2s32},
1088 {s32, v4s32}})
1089 .moreElementsIf(
1090 [=](const LegalityQuery &Query) {
1091 return Query.Types[1].isVector() &&
1092 Query.Types[1].getElementType() != s8 &&
1093 Query.Types[1].getNumElements() & 1;
1094 },
1096 .clampMaxNumElements(1, s64, 2)
1097 .clampMaxNumElements(1, s32, 4)
1098 .clampMaxNumElements(1, s16, 8)
1099 .clampMaxNumElements(1, s8, 16)
1100 .scalarize(1)
1101 .lower();
1102
1104 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1105 // Try to break down into smaller vectors as long as they're at least 64
1106 // bits. This lets us use vector operations for some parts of the
1107 // reduction.
1108 .fewerElementsIf(
1109 [=](const LegalityQuery &Q) {
1110 LLT SrcTy = Q.Types[1];
1111 if (SrcTy.isScalar())
1112 return false;
1113 if (!isPowerOf2_32(SrcTy.getNumElements()))
1114 return false;
1115 // We can usually perform 64b vector operations.
1116 return SrcTy.getSizeInBits() > 64;
1117 },
1118 [=](const LegalityQuery &Q) {
1119 LLT SrcTy = Q.Types[1];
1120 return std::make_pair(1, SrcTy.divide(2));
1121 })
1122 .scalarize(1)
1123 .lower();
1124
1125 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1126 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1127 .lower();
1128
1130 .legalFor({{s32, s64}, {s64, s64}})
1131 .customIf([=](const LegalityQuery &Q) {
1132 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1133 })
1134 .lower();
1136
1137 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1138 .customFor({{s32, s32}, {s64, s64}});
1139
1140 auto always = [=](const LegalityQuery &Q) { return true; };
1141 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1142 if (HasCSSC)
1143 CTPOPActions
1144 .legalFor({{s32, s32},
1145 {s64, s64},
1146 {v8s8, v8s8},
1147 {v16s8, v16s8}})
1148 .customFor({{s128, s128},
1149 {v2s64, v2s64},
1150 {v2s32, v2s32},
1151 {v4s32, v4s32},
1152 {v4s16, v4s16},
1153 {v8s16, v8s16}});
1154 else
1155 CTPOPActions
1156 .legalFor({{v8s8, v8s8},
1157 {v16s8, v16s8}})
1158 .customFor({{s32, s32},
1159 {s64, s64},
1160 {s128, s128},
1161 {v2s64, v2s64},
1162 {v2s32, v2s32},
1163 {v4s32, v4s32},
1164 {v4s16, v4s16},
1165 {v8s16, v8s16}});
1166 CTPOPActions
1167 .clampScalar(0, s32, s128)
1168 .widenScalarToNextPow2(0)
1169 .minScalarEltSameAsIf(always, 1, 0)
1170 .maxScalarEltSameAsIf(always, 1, 0);
1171
1172 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1173 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1174 .clampNumElements(0, v8s8, v16s8)
1175 .clampNumElements(0, v4s16, v8s16)
1176 .clampNumElements(0, v2s32, v4s32)
1177 .clampMaxNumElements(0, s64, 2)
1179 .lower();
1180
1181 // TODO: Libcall support for s128.
1182 // TODO: s16 should be legal with full FP16 support.
1183 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1184 .legalFor({{s64, s32}, {s64, s64}});
1185
1186 // TODO: Custom legalization for mismatched types.
1187 getActionDefinitionsBuilder(G_FCOPYSIGN)
1189 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1190 [=](const LegalityQuery &Query) {
1191 const LLT Ty = Query.Types[0];
1192 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1193 })
1194 .lower();
1195
1197
1198 // Access to floating-point environment.
1199 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1200 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1201 .libcall();
1202
1203 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1204
1205 getActionDefinitionsBuilder(G_PREFETCH).custom();
1206
1208 verify(*ST.getInstrInfo());
1209}
1210
1213 LostDebugLocObserver &LocObserver) const {
1214 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1215 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1216 GISelChangeObserver &Observer = Helper.Observer;
1217 switch (MI.getOpcode()) {
1218 default:
1219 // No idea what to do.
1220 return false;
1221 case TargetOpcode::G_VAARG:
1222 return legalizeVaArg(MI, MRI, MIRBuilder);
1223 case TargetOpcode::G_LOAD:
1224 case TargetOpcode::G_STORE:
1225 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1226 case TargetOpcode::G_SHL:
1227 case TargetOpcode::G_ASHR:
1228 case TargetOpcode::G_LSHR:
1229 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1230 case TargetOpcode::G_GLOBAL_VALUE:
1231 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1232 case TargetOpcode::G_SBFX:
1233 case TargetOpcode::G_UBFX:
1234 return legalizeBitfieldExtract(MI, MRI, Helper);
1235 case TargetOpcode::G_FSHL:
1236 case TargetOpcode::G_FSHR:
1237 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1238 case TargetOpcode::G_ROTR:
1239 return legalizeRotate(MI, MRI, Helper);
1240 case TargetOpcode::G_CTPOP:
1241 return legalizeCTPOP(MI, MRI, Helper);
1242 case TargetOpcode::G_ATOMIC_CMPXCHG:
1243 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1244 case TargetOpcode::G_CTTZ:
1245 return legalizeCTTZ(MI, Helper);
1246 case TargetOpcode::G_BZERO:
1247 case TargetOpcode::G_MEMCPY:
1248 case TargetOpcode::G_MEMMOVE:
1249 case TargetOpcode::G_MEMSET:
1250 return legalizeMemOps(MI, Helper);
1251 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1252 return legalizeExtractVectorElt(MI, MRI, Helper);
1253 case TargetOpcode::G_DYN_STACKALLOC:
1254 return legalizeDynStackAlloc(MI, Helper);
1255 case TargetOpcode::G_PREFETCH:
1256 return legalizePrefetch(MI, Helper);
1257 case TargetOpcode::G_ABS:
1258 return Helper.lowerAbsToCNeg(MI);
1259 case TargetOpcode::G_ICMP:
1260 return legalizeICMP(MI, MRI, MIRBuilder);
1261 }
1262
1263 llvm_unreachable("expected switch to return");
1264}
1265
1266bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1268 MachineIRBuilder &MIRBuilder,
1269 GISelChangeObserver &Observer,
1270 LegalizerHelper &Helper) const {
1271 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1272 MI.getOpcode() == TargetOpcode::G_FSHR);
1273
1274 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1275 // lowering
1276 Register ShiftNo = MI.getOperand(3).getReg();
1277 LLT ShiftTy = MRI.getType(ShiftNo);
1278 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1279
1280 // Adjust shift amount according to Opcode (FSHL/FSHR)
1281 // Convert FSHL to FSHR
1282 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1283 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1284
1285 // Lower non-constant shifts and leave zero shifts to the optimizer.
1286 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1287 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1289
1290 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1291
1292 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1293
1294 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1295 // in the range of 0 <-> BitWidth, it is legal
1296 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1297 VRegAndVal->Value.ult(BitWidth))
1298 return true;
1299
1300 // Cast the ShiftNumber to a 64-bit type
1301 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1302
1303 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1304 Observer.changingInstr(MI);
1305 MI.getOperand(3).setReg(Cast64.getReg(0));
1306 Observer.changedInstr(MI);
1307 }
1308 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1309 // instruction
1310 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1311 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1312 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1313 Cast64.getReg(0)});
1314 MI.eraseFromParent();
1315 }
1316 return true;
1317}
1318
1319bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1321 MachineIRBuilder &MIRBuilder) const {
1322 Register DstReg = MI.getOperand(0).getReg();
1323 Register SrcReg1 = MI.getOperand(2).getReg();
1324 Register SrcReg2 = MI.getOperand(3).getReg();
1325 LLT DstTy = MRI.getType(DstReg);
1326 LLT SrcTy = MRI.getType(SrcReg1);
1327
1328 // Check the vector types are legal
1329 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1330 DstTy.getNumElements() != SrcTy.getNumElements() ||
1331 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1332 return false;
1333
1334 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1335 // following passes
1336 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1337 if (Pred != CmpInst::ICMP_NE)
1338 return true;
1339 Register CmpReg =
1340 MIRBuilder
1341 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1342 .getReg(0);
1343 MIRBuilder.buildNot(DstReg, CmpReg);
1344
1345 MI.eraseFromParent();
1346 return true;
1347}
1348
1349bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1351 LegalizerHelper &Helper) const {
1352 // To allow for imported patterns to match, we ensure that the rotate amount
1353 // is 64b with an extension.
1354 Register AmtReg = MI.getOperand(2).getReg();
1355 LLT AmtTy = MRI.getType(AmtReg);
1356 (void)AmtTy;
1357 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1358 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1359 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1360 Helper.Observer.changingInstr(MI);
1361 MI.getOperand(2).setReg(NewAmt.getReg(0));
1362 Helper.Observer.changedInstr(MI);
1363 return true;
1364}
1365
1366bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1368 GISelChangeObserver &Observer) const {
1369 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1370 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1371 // G_ADD_LOW instructions.
1372 // By splitting this here, we can optimize accesses in the small code model by
1373 // folding in the G_ADD_LOW into the load/store offset.
1374 auto &GlobalOp = MI.getOperand(1);
1375 // Don't modify an intrinsic call.
1376 if (GlobalOp.isSymbol())
1377 return true;
1378 const auto* GV = GlobalOp.getGlobal();
1379 if (GV->isThreadLocal())
1380 return true; // Don't want to modify TLS vars.
1381
1382 auto &TM = ST->getTargetLowering()->getTargetMachine();
1383 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1384
1385 if (OpFlags & AArch64II::MO_GOT)
1386 return true;
1387
1388 auto Offset = GlobalOp.getOffset();
1389 Register DstReg = MI.getOperand(0).getReg();
1390 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1391 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1392 // Set the regclass on the dest reg too.
1393 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1394
1395 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1396 // by creating a MOVK that sets bits 48-63 of the register to (global address
1397 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1398 // prevent an incorrect tag being generated during relocation when the
1399 // global appears before the code section. Without the offset, a global at
1400 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1401 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1402 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1403 // instead of `0xf`.
1404 // This assumes that we're in the small code model so we can assume a binary
1405 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1406 // binary must also be loaded into address range [0, 2^48). Both of these
1407 // properties need to be ensured at runtime when using tagged addresses.
1408 if (OpFlags & AArch64II::MO_TAGGED) {
1409 assert(!Offset &&
1410 "Should not have folded in an offset for a tagged global!");
1411 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1412 .addGlobalAddress(GV, 0x100000000,
1414 .addImm(48);
1415 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1416 }
1417
1418 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1419 .addGlobalAddress(GV, Offset,
1421 MI.eraseFromParent();
1422 return true;
1423}
1424
1426 MachineInstr &MI) const {
1427 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1428 switch (IntrinsicID) {
1429 case Intrinsic::vacopy: {
1430 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1431 unsigned VaListSize =
1432 (ST->isTargetDarwin() || ST->isTargetWindows())
1433 ? PtrSize
1434 : ST->isTargetILP32() ? 20 : 32;
1435
1436 MachineFunction &MF = *MI.getMF();
1438 LLT::scalar(VaListSize * 8));
1439 MachineIRBuilder MIB(MI);
1440 MIB.buildLoad(Val, MI.getOperand(2),
1443 VaListSize, Align(PtrSize)));
1444 MIB.buildStore(Val, MI.getOperand(1),
1447 VaListSize, Align(PtrSize)));
1448 MI.eraseFromParent();
1449 return true;
1450 }
1451 case Intrinsic::get_dynamic_area_offset: {
1452 MachineIRBuilder &MIB = Helper.MIRBuilder;
1453 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1454 MI.eraseFromParent();
1455 return true;
1456 }
1457 case Intrinsic::aarch64_mops_memset_tag: {
1458 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1459 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1460 // the instruction).
1461 MachineIRBuilder MIB(MI);
1462 auto &Value = MI.getOperand(3);
1463 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1464 Value.setReg(ExtValueReg);
1465 return true;
1466 }
1467 case Intrinsic::aarch64_prefetch: {
1468 MachineIRBuilder MIB(MI);
1469 auto &AddrVal = MI.getOperand(1);
1470
1471 int64_t IsWrite = MI.getOperand(2).getImm();
1472 int64_t Target = MI.getOperand(3).getImm();
1473 int64_t IsStream = MI.getOperand(4).getImm();
1474 int64_t IsData = MI.getOperand(5).getImm();
1475
1476 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1477 (!IsData << 3) | // IsDataCache bit
1478 (Target << 1) | // Cache level bits
1479 (unsigned)IsStream; // Stream bit
1480
1481 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1482 MI.eraseFromParent();
1483 return true;
1484 }
1485 case Intrinsic::aarch64_neon_uaddv:
1486 case Intrinsic::aarch64_neon_saddv:
1487 case Intrinsic::aarch64_neon_umaxv:
1488 case Intrinsic::aarch64_neon_smaxv:
1489 case Intrinsic::aarch64_neon_uminv:
1490 case Intrinsic::aarch64_neon_sminv: {
1491 MachineIRBuilder MIB(MI);
1492 MachineRegisterInfo &MRI = *MIB.getMRI();
1493 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1494 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1495 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1496
1497 auto OldDst = MI.getOperand(0).getReg();
1498 auto OldDstTy = MRI.getType(OldDst);
1499 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1500 if (OldDstTy == NewDstTy)
1501 return true;
1502
1503 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1504
1505 Helper.Observer.changingInstr(MI);
1506 MI.getOperand(0).setReg(NewDst);
1507 Helper.Observer.changedInstr(MI);
1508
1509 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1510 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1511 OldDst, NewDst);
1512
1513 return true;
1514 }
1515 case Intrinsic::aarch64_neon_uaddlp:
1516 case Intrinsic::aarch64_neon_saddlp: {
1517 MachineIRBuilder MIB(MI);
1518
1519 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1520 ? AArch64::G_UADDLP
1521 : AArch64::G_SADDLP;
1522 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1523 MI.eraseFromParent();
1524
1525 return true;
1526 }
1527 case Intrinsic::aarch64_neon_uaddlv:
1528 case Intrinsic::aarch64_neon_saddlv: {
1529 MachineIRBuilder MIB(MI);
1530 MachineRegisterInfo &MRI = *MIB.getMRI();
1531
1532 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1533 ? AArch64::G_UADDLV
1534 : AArch64::G_SADDLV;
1535 Register DstReg = MI.getOperand(0).getReg();
1536 Register SrcReg = MI.getOperand(2).getReg();
1537 LLT DstTy = MRI.getType(DstReg);
1538
1539 LLT MidTy, ExtTy;
1540 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1541 MidTy = LLT::fixed_vector(4, 32);
1542 ExtTy = LLT::scalar(32);
1543 } else {
1544 MidTy = LLT::fixed_vector(2, 64);
1545 ExtTy = LLT::scalar(64);
1546 }
1547
1548 Register MidReg =
1549 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1550 Register ZeroReg =
1551 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1552 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1553 {MidReg, ZeroReg})
1554 .getReg(0);
1555
1556 if (DstTy.getScalarSizeInBits() < 32)
1557 MIB.buildTrunc(DstReg, ExtReg);
1558 else
1559 MIB.buildCopy(DstReg, ExtReg);
1560
1561 MI.eraseFromParent();
1562
1563 return true;
1564 }
1565 case Intrinsic::aarch64_neon_smax:
1566 case Intrinsic::aarch64_neon_smin:
1567 case Intrinsic::aarch64_neon_umax:
1568 case Intrinsic::aarch64_neon_umin:
1569 case Intrinsic::aarch64_neon_fmax:
1570 case Intrinsic::aarch64_neon_fmin:
1571 case Intrinsic::aarch64_neon_fmaxnm:
1572 case Intrinsic::aarch64_neon_fminnm: {
1573 MachineIRBuilder MIB(MI);
1574 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1575 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1576 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1577 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1578 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1579 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1580 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1581 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1582 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1583 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1584 {MI.getOperand(2), MI.getOperand(3)});
1585 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1586 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1587 {MI.getOperand(2), MI.getOperand(3)});
1588 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1589 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1590 {MI.getOperand(2), MI.getOperand(3)});
1591 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1592 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1593 {MI.getOperand(2), MI.getOperand(3)});
1594 MI.eraseFromParent();
1595 return true;
1596 }
1597 case Intrinsic::vector_reverse:
1598 // TODO: Add support for vector_reverse
1599 return false;
1600 }
1601
1602 return true;
1603}
1604
1605bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1607 GISelChangeObserver &Observer) const {
1608 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1609 MI.getOpcode() == TargetOpcode::G_LSHR ||
1610 MI.getOpcode() == TargetOpcode::G_SHL);
1611 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1612 // imported patterns can select it later. Either way, it will be legal.
1613 Register AmtReg = MI.getOperand(2).getReg();
1614 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1615 if (!VRegAndVal)
1616 return true;
1617 // Check the shift amount is in range for an immediate form.
1618 int64_t Amount = VRegAndVal->Value.getSExtValue();
1619 if (Amount > 31)
1620 return true; // This will have to remain a register variant.
1621 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1622 Observer.changingInstr(MI);
1623 MI.getOperand(2).setReg(ExtCst.getReg(0));
1624 Observer.changedInstr(MI);
1625 return true;
1626}
1627
1630 Base = Root;
1631 Offset = 0;
1632
1633 Register NewBase;
1634 int64_t NewOffset;
1635 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1636 isShiftedInt<7, 3>(NewOffset)) {
1637 Base = NewBase;
1638 Offset = NewOffset;
1639 }
1640}
1641
1642// FIXME: This should be removed and replaced with the generic bitcast legalize
1643// action.
1644bool AArch64LegalizerInfo::legalizeLoadStore(
1646 GISelChangeObserver &Observer) const {
1647 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1648 MI.getOpcode() == TargetOpcode::G_LOAD);
1649 // Here we just try to handle vector loads/stores where our value type might
1650 // have pointer elements, which the SelectionDAG importer can't handle. To
1651 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1652 // the value to use s64 types.
1653
1654 // Custom legalization requires the instruction, if not deleted, must be fully
1655 // legalized. In order to allow further legalization of the inst, we create
1656 // a new instruction and erase the existing one.
1657
1658 Register ValReg = MI.getOperand(0).getReg();
1659 const LLT ValTy = MRI.getType(ValReg);
1660
1661 if (ValTy == LLT::scalar(128)) {
1662
1663 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1664 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1665 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1666 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1667 bool IsRcpC3 =
1668 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1669
1670 LLT s64 = LLT::scalar(64);
1671
1672 unsigned Opcode;
1673 if (IsRcpC3) {
1674 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1675 } else {
1676 // For LSE2, loads/stores should have been converted to monotonic and had
1677 // a fence inserted after them.
1678 assert(Ordering == AtomicOrdering::Monotonic ||
1679 Ordering == AtomicOrdering::Unordered);
1680 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1681
1682 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1683 }
1684
1686 if (IsLoad) {
1687 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1688 MIRBuilder.buildMergeLikeInstr(
1689 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1690 } else {
1691 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1692 NewI = MIRBuilder.buildInstr(
1693 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1694 }
1695
1696 if (IsRcpC3) {
1697 NewI.addUse(MI.getOperand(1).getReg());
1698 } else {
1699 Register Base;
1700 int Offset;
1701 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1702 NewI.addUse(Base);
1703 NewI.addImm(Offset / 8);
1704 }
1705
1706 NewI.cloneMemRefs(MI);
1708 *MRI.getTargetRegisterInfo(),
1709 *ST->getRegBankInfo());
1710 MI.eraseFromParent();
1711 return true;
1712 }
1713
1714 if (!ValTy.isPointerVector() ||
1715 ValTy.getElementType().getAddressSpace() != 0) {
1716 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1717 return false;
1718 }
1719
1720 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1721 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1722 auto &MMO = **MI.memoperands_begin();
1723 MMO.setType(NewTy);
1724
1725 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1726 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1727 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1728 } else {
1729 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1730 MIRBuilder.buildBitcast(ValReg, NewLoad);
1731 }
1732 MI.eraseFromParent();
1733 return true;
1734}
1735
1736bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1738 MachineIRBuilder &MIRBuilder) const {
1739 MachineFunction &MF = MIRBuilder.getMF();
1740 Align Alignment(MI.getOperand(2).getImm());
1741 Register Dst = MI.getOperand(0).getReg();
1742 Register ListPtr = MI.getOperand(1).getReg();
1743
1744 LLT PtrTy = MRI.getType(ListPtr);
1745 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1746
1747 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1748 const Align PtrAlign = Align(PtrSize);
1749 auto List = MIRBuilder.buildLoad(
1750 PtrTy, ListPtr,
1752 PtrTy, PtrAlign));
1753
1754 MachineInstrBuilder DstPtr;
1755 if (Alignment > PtrAlign) {
1756 // Realign the list to the actual required alignment.
1757 auto AlignMinus1 =
1758 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1759 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1760 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1761 } else
1762 DstPtr = List;
1763
1764 LLT ValTy = MRI.getType(Dst);
1765 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1766 MIRBuilder.buildLoad(
1767 Dst, DstPtr,
1769 ValTy, std::max(Alignment, PtrAlign)));
1770
1771 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1772
1773 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1774
1775 MIRBuilder.buildStore(NewList, ListPtr,
1778 PtrTy, PtrAlign));
1779
1780 MI.eraseFromParent();
1781 return true;
1782}
1783
1784bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1786 // Only legal if we can select immediate forms.
1787 // TODO: Lower this otherwise.
1788 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1789 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1790}
1791
1792bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1794 LegalizerHelper &Helper) const {
1795 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1796 // it can be more efficiently lowered to the following sequence that uses
1797 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1798 // registers are cheap.
1799 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1800 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1801 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1802 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1803 //
1804 // For 128 bit vector popcounts, we lower to the following sequence:
1805 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1806 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1807 // uaddlp.4s v0, v0 // v4s32, v2s64
1808 // uaddlp.2d v0, v0 // v2s64
1809 //
1810 // For 64 bit vector popcounts, we lower to the following sequence:
1811 // cnt.8b v0, v0 // v4s16, v2s32
1812 // uaddlp.4h v0, v0 // v4s16, v2s32
1813 // uaddlp.2s v0, v0 // v2s32
1814
1815 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1816 Register Dst = MI.getOperand(0).getReg();
1817 Register Val = MI.getOperand(1).getReg();
1818 LLT Ty = MRI.getType(Val);
1819 unsigned Size = Ty.getSizeInBits();
1820
1821 assert(Ty == MRI.getType(Dst) &&
1822 "Expected src and dst to have the same type!");
1823
1824 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1825 LLT s64 = LLT::scalar(64);
1826
1827 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1828 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1829 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1830 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1831
1832 MIRBuilder.buildZExt(Dst, Add);
1833 MI.eraseFromParent();
1834 return true;
1835 }
1836
1837 if (!ST->hasNEON() ||
1838 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1839 // Use generic lowering when custom lowering is not possible.
1840 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1841 Helper.lowerBitCount(MI) ==
1843 }
1844
1845 // Pre-conditioning: widen Val up to the nearest vector type.
1846 // s32,s64,v4s16,v2s32 -> v8i8
1847 // v8s16,v4s32,v2s64 -> v16i8
1848 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1849 if (Ty.isScalar()) {
1850 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1851 if (Size == 32) {
1852 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1853 }
1854 }
1855 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1856
1857 // Count bits in each byte-sized lane.
1858 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1859
1860 // Sum across lanes.
1861 Register HSum = CTPOP.getReg(0);
1862 unsigned Opc;
1863 SmallVector<LLT> HAddTys;
1864 if (Ty.isScalar()) {
1865 Opc = Intrinsic::aarch64_neon_uaddlv;
1866 HAddTys.push_back(LLT::scalar(32));
1867 } else if (Ty == LLT::fixed_vector(8, 16)) {
1868 Opc = Intrinsic::aarch64_neon_uaddlp;
1869 HAddTys.push_back(LLT::fixed_vector(8, 16));
1870 } else if (Ty == LLT::fixed_vector(4, 32)) {
1871 Opc = Intrinsic::aarch64_neon_uaddlp;
1872 HAddTys.push_back(LLT::fixed_vector(8, 16));
1873 HAddTys.push_back(LLT::fixed_vector(4, 32));
1874 } else if (Ty == LLT::fixed_vector(2, 64)) {
1875 Opc = Intrinsic::aarch64_neon_uaddlp;
1876 HAddTys.push_back(LLT::fixed_vector(8, 16));
1877 HAddTys.push_back(LLT::fixed_vector(4, 32));
1878 HAddTys.push_back(LLT::fixed_vector(2, 64));
1879 } else if (Ty == LLT::fixed_vector(4, 16)) {
1880 Opc = Intrinsic::aarch64_neon_uaddlp;
1881 HAddTys.push_back(LLT::fixed_vector(4, 16));
1882 } else if (Ty == LLT::fixed_vector(2, 32)) {
1883 Opc = Intrinsic::aarch64_neon_uaddlp;
1884 HAddTys.push_back(LLT::fixed_vector(4, 16));
1885 HAddTys.push_back(LLT::fixed_vector(2, 32));
1886 } else
1887 llvm_unreachable("unexpected vector shape");
1889 for (LLT HTy : HAddTys) {
1890 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
1891 HSum = UADD.getReg(0);
1892 }
1893
1894 // Post-conditioning.
1895 if (Ty.isScalar() && (Size == 64 || Size == 128))
1896 MIRBuilder.buildZExt(Dst, UADD);
1897 else
1898 UADD->getOperand(0).setReg(Dst);
1899 MI.eraseFromParent();
1900 return true;
1901}
1902
1903bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1905 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1906 LLT s64 = LLT::scalar(64);
1907 auto Addr = MI.getOperand(1).getReg();
1908 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1909 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1910 auto DstLo = MRI.createGenericVirtualRegister(s64);
1911 auto DstHi = MRI.createGenericVirtualRegister(s64);
1912
1914 if (ST->hasLSE()) {
1915 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1916 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1917 // the rest of the MIR so we must reassemble the extracted registers into a
1918 // 128-bit known-regclass one with code like this:
1919 //
1920 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1921 // %out = CASP %in1, ...
1922 // %OldLo = G_EXTRACT %out, 0
1923 // %OldHi = G_EXTRACT %out, 64
1924 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1925 unsigned Opcode;
1926 switch (Ordering) {
1928 Opcode = AArch64::CASPAX;
1929 break;
1931 Opcode = AArch64::CASPLX;
1932 break;
1935 Opcode = AArch64::CASPALX;
1936 break;
1937 default:
1938 Opcode = AArch64::CASPX;
1939 break;
1940 }
1941
1942 LLT s128 = LLT::scalar(128);
1943 auto CASDst = MRI.createGenericVirtualRegister(s128);
1944 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1945 auto CASNew = MRI.createGenericVirtualRegister(s128);
1946 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1947 .addUse(DesiredI->getOperand(0).getReg())
1948 .addImm(AArch64::sube64)
1949 .addUse(DesiredI->getOperand(1).getReg())
1950 .addImm(AArch64::subo64);
1951 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1952 .addUse(NewI->getOperand(0).getReg())
1953 .addImm(AArch64::sube64)
1954 .addUse(NewI->getOperand(1).getReg())
1955 .addImm(AArch64::subo64);
1956
1957 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1958
1959 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1960 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1961 } else {
1962 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1963 // can take arbitrary registers so it just has the normal GPR64 operands the
1964 // rest of AArch64 is expecting.
1965 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1966 unsigned Opcode;
1967 switch (Ordering) {
1969 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1970 break;
1972 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1973 break;
1976 Opcode = AArch64::CMP_SWAP_128;
1977 break;
1978 default:
1979 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1980 break;
1981 }
1982
1983 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1984 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1985 {Addr, DesiredI->getOperand(0),
1986 DesiredI->getOperand(1), NewI->getOperand(0),
1987 NewI->getOperand(1)});
1988 }
1989
1990 CAS.cloneMemRefs(MI);
1992 *MRI.getTargetRegisterInfo(),
1993 *ST->getRegBankInfo());
1994
1995 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
1996 MI.eraseFromParent();
1997 return true;
1998}
1999
2000bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2001 LegalizerHelper &Helper) const {
2002 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2003 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2004 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2005 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2006 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2007 MI.eraseFromParent();
2008 return true;
2009}
2010
2011bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2012 LegalizerHelper &Helper) const {
2013 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2014
2015 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2016 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2017 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2018 // the instruction).
2019 auto &Value = MI.getOperand(1);
2020 Register ExtValueReg =
2021 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2022 Value.setReg(ExtValueReg);
2023 return true;
2024 }
2025
2026 return false;
2027}
2028
2029bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2031 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2032 auto VRegAndVal =
2033 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2034 if (VRegAndVal)
2035 return true;
2036 return Helper.lowerExtractInsertVectorElt(MI) !=
2038}
2039
2040bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2041 MachineInstr &MI, LegalizerHelper &Helper) const {
2042 MachineFunction &MF = *MI.getParent()->getParent();
2043 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2044 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2045
2046 // If stack probing is not enabled for this function, use the default
2047 // lowering.
2048 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2049 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2050 "inline-asm") {
2051 Helper.lowerDynStackAlloc(MI);
2052 return true;
2053 }
2054
2055 Register Dst = MI.getOperand(0).getReg();
2056 Register AllocSize = MI.getOperand(1).getReg();
2057 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2058
2059 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2060 "Unexpected type for dynamic alloca");
2061 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2062 "Unexpected type for dynamic alloca");
2063
2064 LLT PtrTy = MRI.getType(Dst);
2065 Register SPReg =
2067 Register SPTmp =
2068 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2069 auto NewMI =
2070 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2071 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2072 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2073 MIRBuilder.buildCopy(Dst, SPTmp);
2074
2075 MI.eraseFromParent();
2076 return true;
2077}
2078
2079bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2080 LegalizerHelper &Helper) const {
2081 MachineIRBuilder &MIB = Helper.MIRBuilder;
2082 auto &AddrVal = MI.getOperand(0);
2083
2084 int64_t IsWrite = MI.getOperand(1).getImm();
2085 int64_t Locality = MI.getOperand(2).getImm();
2086 int64_t IsData = MI.getOperand(3).getImm();
2087
2088 bool IsStream = Locality == 0;
2089 if (Locality != 0) {
2090 assert(Locality <= 3 && "Prefetch locality out-of-range");
2091 // The locality degree is the opposite of the cache speed.
2092 // Put the number the other way around.
2093 // The encoding starts at 0 for level 1
2094 Locality = 3 - Locality;
2095 }
2096
2097 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2098
2099 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2100 MI.eraseFromParent();
2101 return true;
2102}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...