LLVM 20.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
65 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
66 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
67 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
68
69 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
70 v16s8, v8s16, v4s32,
71 v2s64, v2p0,
72 /* End 128bit types */
73 /* Begin 64bit types */
74 v8s8, v4s16, v2s32};
75 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
76 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
77 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
78
79 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
80
81 // FIXME: support subtargets which have neon/fp-armv8 disabled.
82 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
84 return;
85 }
86
87 // Some instructions only support s16 if the subtarget has full 16-bit FP
88 // support.
89 const bool HasFP16 = ST.hasFullFP16();
90 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
91
92 const bool HasCSSC = ST.hasCSSC();
93 const bool HasRCPC3 = ST.hasRCPC3();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor(PackedVectorAllTypeList)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampNumElements(0, v2s64, v2s64);
107
109 .legalFor({p0, s16, s32, s64})
110 .legalFor(PackedVectorAllTypeList)
112 .clampScalar(0, s16, s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(0, s8, 16)
115 .clampMaxNumElements(0, s16, 8)
116 .clampMaxNumElements(0, s32, 4)
117 .clampMaxNumElements(0, s64, 2)
118 .clampMaxNumElements(0, p0, 2);
119
121 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
123 .clampScalar(0, s32, s64)
124 .clampNumElements(0, v4s16, v8s16)
125 .clampNumElements(0, v2s32, v4s32)
126 .clampNumElements(0, v2s64, v2s64)
127 .moreElementsToNextPow2(0);
128
129 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
130 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
131 .widenScalarToNextPow2(0)
132 .clampScalar(0, s32, s64)
133 .clampMaxNumElements(0, s8, 16)
134 .clampMaxNumElements(0, s16, 8)
135 .clampNumElements(0, v2s32, v4s32)
136 .clampNumElements(0, v2s64, v2s64)
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 2;
140 },
141 0, s32)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 4;
145 },
146 0, s16)
147 .minScalarOrEltIf(
148 [=](const LegalityQuery &Query) {
149 return Query.Types[0].getNumElements() <= 16;
150 },
151 0, s8)
153
154 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
155 .customIf([=](const LegalityQuery &Query) {
156 const auto &SrcTy = Query.Types[0];
157 const auto &AmtTy = Query.Types[1];
158 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
159 AmtTy.getSizeInBits() == 32;
160 })
161 .legalFor({
162 {s32, s32},
163 {s32, s64},
164 {s64, s64},
165 {v8s8, v8s8},
166 {v16s8, v16s8},
167 {v4s16, v4s16},
168 {v8s16, v8s16},
169 {v2s32, v2s32},
170 {v4s32, v4s32},
171 {v2s64, v2s64},
172 })
173 .widenScalarToNextPow2(0)
174 .clampScalar(1, s32, s64)
175 .clampScalar(0, s32, s64)
176 .clampNumElements(0, v8s8, v16s8)
177 .clampNumElements(0, v4s16, v8s16)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 .minScalarSameAs(1, 0);
182
184 .legalFor({{p0, s64}, {v2p0, v2s64}})
185 .clampScalarOrElt(1, s64, s64)
186 .clampNumElements(0, v2p0, v2p0);
187
188 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
189
190 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
191 .legalFor({s32, s64})
192 .libcallFor({s128})
193 .clampScalar(0, s32, s64)
195 .scalarize(0);
196
197 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
198 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
200 .clampScalarOrElt(0, s32, s64)
201 .clampNumElements(0, v2s32, v4s32)
202 .clampNumElements(0, v2s64, v2s64)
203 .moreElementsToNextPow2(0);
204
205
206 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
207 .widenScalarToNextPow2(0, /*Min = */ 32)
208 .clampScalar(0, s32, s64)
209 .lower();
210
211 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
212 .legalFor({s64, v8s16, v16s8, v4s32})
213 .lower();
214
215 auto &MinMaxActions = getActionDefinitionsBuilder(
216 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
217 if (HasCSSC)
218 MinMaxActions
219 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
220 // Making clamping conditional on CSSC extension as without legal types we
221 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
222 // if we detect a type smaller than 32-bit.
223 .minScalar(0, s32);
224 else
225 MinMaxActions
226 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
227 MinMaxActions
228 .clampNumElements(0, v8s8, v16s8)
229 .clampNumElements(0, v4s16, v8s16)
230 .clampNumElements(0, v2s32, v4s32)
231 // FIXME: This sholdn't be needed as v2s64 types are going to
232 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
233 .clampNumElements(0, v2s64, v2s64)
234 .lower();
235
237 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
238 .legalFor({{s32, s32}, {s64, s32}})
239 .clampScalar(0, s32, s64)
240 .clampScalar(1, s32, s64)
242
243 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
244 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
245 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
246 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
247 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
248 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
249 .legalIf([=](const LegalityQuery &Query) {
250 const auto &Ty = Query.Types[0];
251 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
252 })
253 .libcallFor({s128})
254 .minScalarOrElt(0, MinFPScalar)
255 .clampNumElements(0, v4s16, v8s16)
256 .clampNumElements(0, v2s32, v4s32)
257 .clampNumElements(0, v2s64, v2s64)
259
261 .libcallFor({s32, s64})
262 .minScalar(0, s32)
263 .scalarize(0);
264
265 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
266 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
267 .libcallFor({{s64, s128}})
268 .minScalarOrElt(1, MinFPScalar);
269
271 {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
272 G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
273 // We need a call for these, so we always need to scalarize.
274 .scalarize(0)
275 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
276 .minScalar(0, s32)
277 .libcallFor({s32, s64});
279 .scalarize(0)
280 .minScalar(0, s32)
281 .libcallFor({{s32, s32}, {s64, s32}});
282
284 .legalIf(all(typeInSet(0, {s32, s64, p0}),
285 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
287 .clampScalar(0, s32, s64)
289 .minScalar(1, s8)
290 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
291 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
292
294 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
295 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
297 .clampScalar(1, s32, s128)
299 .minScalar(0, s16)
300 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
301 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
302 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
303
304
305 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
306 auto &Actions = getActionDefinitionsBuilder(Op);
307
308 if (Op == G_SEXTLOAD)
310
311 // Atomics have zero extending behavior.
312 Actions
313 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
314 {s32, p0, s16, 8},
315 {s32, p0, s32, 8},
316 {s64, p0, s8, 2},
317 {s64, p0, s16, 2},
318 {s64, p0, s32, 4},
319 {s64, p0, s64, 8},
320 {p0, p0, s64, 8},
321 {v2s32, p0, s64, 8}})
322 .widenScalarToNextPow2(0)
323 .clampScalar(0, s32, s64)
324 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
325 // how to do that yet.
326 .unsupportedIfMemSizeNotPow2()
327 // Lower anything left over into G_*EXT and G_LOAD
328 .lower();
329 }
330
331 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
332 const LLT &ValTy = Query.Types[0];
333 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
334 };
335
336 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
337 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
338
339 if (ST.hasSVE()) {
340 LoadActions.legalForTypesWithMemDesc({
341 // 128 bit base sizes
342 {nxv16s8, p0, nxv16s8, 8},
343 {nxv8s16, p0, nxv8s16, 8},
344 {nxv4s32, p0, nxv4s32, 8},
345 {nxv2s64, p0, nxv2s64, 8},
346 });
347
348 // TODO: Add nxv2p0. Consider bitcastIf.
349 // See #92130
350 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
351 StoreActions.legalForTypesWithMemDesc({
352 // 128 bit base sizes
353 {nxv16s8, p0, nxv16s8, 8},
354 {nxv8s16, p0, nxv8s16, 8},
355 {nxv4s32, p0, nxv4s32, 8},
356 {nxv2s64, p0, nxv2s64, 8},
357 });
358 }
359
360 LoadActions
361 .customIf([=](const LegalityQuery &Query) {
362 return HasRCPC3 && Query.Types[0] == s128 &&
363 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
364 })
365 .customIf([=](const LegalityQuery &Query) {
366 return Query.Types[0] == s128 &&
367 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
368 })
369 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
370 {s16, p0, s16, 8},
371 {s32, p0, s32, 8},
372 {s64, p0, s64, 8},
373 {p0, p0, s64, 8},
374 {s128, p0, s128, 8},
375 {v8s8, p0, s64, 8},
376 {v16s8, p0, s128, 8},
377 {v4s16, p0, s64, 8},
378 {v8s16, p0, s128, 8},
379 {v2s32, p0, s64, 8},
380 {v4s32, p0, s128, 8},
381 {v2s64, p0, s128, 8}})
382 // These extends are also legal
383 .legalForTypesWithMemDesc(
384 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
385 .widenScalarToNextPow2(0, /* MinSize = */ 8)
386 .clampMaxNumElements(0, s8, 16)
387 .clampMaxNumElements(0, s16, 8)
388 .clampMaxNumElements(0, s32, 4)
389 .clampMaxNumElements(0, s64, 2)
390 .clampMaxNumElements(0, p0, 2)
391 .lowerIfMemSizeNotByteSizePow2()
392 .clampScalar(0, s8, s64)
393 .narrowScalarIf(
394 [=](const LegalityQuery &Query) {
395 // Clamp extending load results to 32-bits.
396 return Query.Types[0].isScalar() &&
397 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
398 Query.Types[0].getSizeInBits() > 32;
399 },
400 changeTo(0, s32))
401 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
402 .bitcastIf(typeInSet(0, {v4s8}),
403 [=](const LegalityQuery &Query) {
404 const LLT VecTy = Query.Types[0];
405 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
406 })
407 .customIf(IsPtrVecPred)
408 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
409
410 StoreActions
411 .customIf([=](const LegalityQuery &Query) {
412 return HasRCPC3 && Query.Types[0] == s128 &&
413 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
414 })
415 .customIf([=](const LegalityQuery &Query) {
416 return Query.Types[0] == s128 &&
417 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
418 })
419 .legalForTypesWithMemDesc(
420 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
421 {s32, p0, s8, 8}, // truncstorei8 from s32
422 {s64, p0, s8, 8}, // truncstorei8 from s64
423 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
424 {s64, p0, s16, 8}, // truncstorei16 from s64
425 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
426 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
427 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
428 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
429 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
430 .clampScalar(0, s8, s64)
431 .lowerIf([=](const LegalityQuery &Query) {
432 return Query.Types[0].isScalar() &&
433 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
434 })
435 // Maximum: sN * k = 128
436 .clampMaxNumElements(0, s8, 16)
437 .clampMaxNumElements(0, s16, 8)
438 .clampMaxNumElements(0, s32, 4)
439 .clampMaxNumElements(0, s64, 2)
440 .clampMaxNumElements(0, p0, 2)
441 .lowerIfMemSizeNotPow2()
442 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
443 .bitcastIf(typeInSet(0, {v4s8}),
444 [=](const LegalityQuery &Query) {
445 const LLT VecTy = Query.Types[0];
446 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
447 })
448 .customIf(IsPtrVecPred)
449 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
450
451 getActionDefinitionsBuilder(G_INDEXED_STORE)
452 // Idx 0 == Ptr, Idx 1 == Val
453 // TODO: we can implement legalizations but as of now these are
454 // generated in a very specific way.
456 {p0, s8, s8, 8},
457 {p0, s16, s16, 8},
458 {p0, s32, s8, 8},
459 {p0, s32, s16, 8},
460 {p0, s32, s32, 8},
461 {p0, s64, s64, 8},
462 {p0, p0, p0, 8},
463 {p0, v8s8, v8s8, 8},
464 {p0, v16s8, v16s8, 8},
465 {p0, v4s16, v4s16, 8},
466 {p0, v8s16, v8s16, 8},
467 {p0, v2s32, v2s32, 8},
468 {p0, v4s32, v4s32, 8},
469 {p0, v2s64, v2s64, 8},
470 {p0, v2p0, v2p0, 8},
471 {p0, s128, s128, 8},
472 })
473 .unsupported();
474
475 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
476 LLT LdTy = Query.Types[0];
477 LLT PtrTy = Query.Types[1];
478 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
479 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
480 return false;
481 if (PtrTy != p0)
482 return false;
483 return true;
484 };
485 getActionDefinitionsBuilder(G_INDEXED_LOAD)
488 .legalIf(IndexedLoadBasicPred)
489 .unsupported();
490 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
491 .unsupportedIf(
493 .legalIf(all(typeInSet(0, {s16, s32, s64}),
494 LegalityPredicate([=](const LegalityQuery &Q) {
495 LLT LdTy = Q.Types[0];
496 LLT PtrTy = Q.Types[1];
497 LLT MemTy = Q.MMODescrs[0].MemoryTy;
498 if (PtrTy != p0)
499 return false;
500 if (LdTy == s16)
501 return MemTy == s8;
502 if (LdTy == s32)
503 return MemTy == s8 || MemTy == s16;
504 if (LdTy == s64)
505 return MemTy == s8 || MemTy == s16 || MemTy == s32;
506 return false;
507 })))
508 .unsupported();
509
510 // Constants
512 .legalFor({p0, s8, s16, s32, s64})
513 .widenScalarToNextPow2(0)
514 .clampScalar(0, s8, s64);
515 getActionDefinitionsBuilder(G_FCONSTANT)
516 .legalIf([=](const LegalityQuery &Query) {
517 const auto &Ty = Query.Types[0];
518 if (HasFP16 && Ty == s16)
519 return true;
520 return Ty == s32 || Ty == s64 || Ty == s128;
521 })
522 .clampScalar(0, MinFPScalar, s128);
523
524 // FIXME: fix moreElementsToNextPow2
526 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
528 .clampScalar(1, s32, s64)
529 .clampScalar(0, s32, s32)
530 .minScalarEltSameAsIf(
531 [=](const LegalityQuery &Query) {
532 const LLT &Ty = Query.Types[0];
533 const LLT &SrcTy = Query.Types[1];
534 return Ty.isVector() && !SrcTy.isPointerVector() &&
535 Ty.getElementType() != SrcTy.getElementType();
536 },
537 0, 1)
538 .minScalarOrEltIf(
539 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
540 1, s32)
541 .minScalarOrEltIf(
542 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
543 s64)
545 .clampNumElements(1, v8s8, v16s8)
546 .clampNumElements(1, v4s16, v8s16)
547 .clampNumElements(1, v2s32, v4s32)
548 .clampNumElements(1, v2s64, v2s64)
549 .customIf(isVector(0));
550
552 .legalFor({{s32, MinFPScalar},
553 {s32, s32},
554 {s32, s64},
555 {v4s32, v4s32},
556 {v2s32, v2s32},
557 {v2s64, v2s64}})
558 .legalIf([=](const LegalityQuery &Query) {
559 const auto &Ty = Query.Types[1];
560 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
561 })
563 .clampScalar(0, s32, s32)
564 .minScalarOrElt(1, MinFPScalar)
565 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
566 .minScalarEltSameAsIf(
567 [=](const LegalityQuery &Query) {
568 const LLT &Ty = Query.Types[0];
569 const LLT &SrcTy = Query.Types[1];
570 return Ty.isVector() && !SrcTy.isPointerVector() &&
571 Ty.getElementType() != SrcTy.getElementType();
572 },
573 0, 1)
574 .clampNumElements(1, v4s16, v8s16)
575 .clampNumElements(1, v2s32, v4s32)
576 .clampMaxNumElements(1, s64, 2)
577 .moreElementsToNextPow2(1)
578 .libcallFor({{s32, s128}});
579
580 // Extensions
581 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
582 unsigned DstSize = Query.Types[0].getSizeInBits();
583
584 // Handle legal vectors using legalFor
585 if (Query.Types[0].isVector())
586 return false;
587
588 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
589 return false; // Extending to a scalar s128 needs narrowing.
590
591 const LLT &SrcTy = Query.Types[1];
592
593 // Make sure we fit in a register otherwise. Don't bother checking that
594 // the source type is below 128 bits. We shouldn't be allowing anything
595 // through which is wider than the destination in the first place.
596 unsigned SrcSize = SrcTy.getSizeInBits();
597 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
598 return false;
599
600 return true;
601 };
602 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
603 .legalIf(ExtLegalFunc)
604 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
605 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
607 .clampMaxNumElements(1, s8, 8)
608 .clampMaxNumElements(1, s16, 4)
609 .clampMaxNumElements(1, s32, 2)
610 // Tries to convert a large EXTEND into two smaller EXTENDs
611 .lowerIf([=](const LegalityQuery &Query) {
612 return (Query.Types[0].getScalarSizeInBits() >
613 Query.Types[1].getScalarSizeInBits() * 2) &&
614 Query.Types[0].isVector() &&
615 (Query.Types[1].getScalarSizeInBits() == 8 ||
616 Query.Types[1].getScalarSizeInBits() == 16);
617 })
618 .clampMinNumElements(1, s8, 8)
619 .clampMinNumElements(1, s16, 4);
620
622 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
624 .clampMaxNumElements(0, s8, 8)
625 .clampMaxNumElements(0, s16, 4)
626 .clampMaxNumElements(0, s32, 2)
627 .minScalarOrEltIf(
628 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
629 0, s8)
630 .lowerIf([=](const LegalityQuery &Query) {
631 LLT DstTy = Query.Types[0];
632 LLT SrcTy = Query.Types[1];
633 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
634 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
635 })
636 .clampMinNumElements(0, s8, 8)
637 .clampMinNumElements(0, s16, 4)
638 .alwaysLegal();
639
640 getActionDefinitionsBuilder(G_SEXT_INREG)
641 .legalFor({s32, s64})
642 .legalFor(PackedVectorAllTypeList)
643 .maxScalar(0, s64)
644 .clampNumElements(0, v8s8, v16s8)
645 .clampNumElements(0, v4s16, v8s16)
646 .clampNumElements(0, v2s32, v4s32)
647 .clampMaxNumElements(0, s64, 2)
648 .lower();
649
650 // FP conversions
652 .legalFor(
653 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
654 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
655 .clampNumElements(0, v4s16, v4s16)
656 .clampNumElements(0, v2s32, v2s32)
657 .scalarize(0);
658
660 .legalFor(
661 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
662 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
663 .clampNumElements(0, v4s32, v4s32)
664 .clampNumElements(0, v2s64, v2s64)
665 .scalarize(0);
666
667 // Conversions
668 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
669 .legalFor({{s32, s32},
670 {s64, s32},
671 {s32, s64},
672 {s64, s64},
673 {v2s64, v2s64},
674 {v4s32, v4s32},
675 {v2s32, v2s32}})
676 .legalIf([=](const LegalityQuery &Query) {
677 return HasFP16 &&
678 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
679 Query.Types[1] == v8s16) &&
680 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
681 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
682 })
683 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
684 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
685 // The range of a fp16 value fits into an i17, so we can lower the width
686 // to i64.
687 .narrowScalarIf(
688 [=](const LegalityQuery &Query) {
689 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
690 },
691 changeTo(0, s64))
693 .widenScalarOrEltToNextPow2OrMinSize(0)
694 .minScalar(0, s32)
695 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
696 .widenScalarIf(
697 [=](const LegalityQuery &Query) {
698 return Query.Types[0].getScalarSizeInBits() <= 64 &&
699 Query.Types[0].getScalarSizeInBits() >
700 Query.Types[1].getScalarSizeInBits();
701 },
703 .widenScalarIf(
704 [=](const LegalityQuery &Query) {
705 return Query.Types[1].getScalarSizeInBits() <= 64 &&
706 Query.Types[0].getScalarSizeInBits() <
707 Query.Types[1].getScalarSizeInBits();
708 },
710 .clampNumElements(0, v4s16, v8s16)
711 .clampNumElements(0, v2s32, v4s32)
712 .clampMaxNumElements(0, s64, 2)
713 .libcallFor(
714 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
715
716 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
717 .legalFor({{s32, s32},
718 {s64, s32},
719 {s32, s64},
720 {s64, s64},
721 {v2s64, v2s64},
722 {v4s32, v4s32},
723 {v2s32, v2s32}})
724 .legalIf([=](const LegalityQuery &Query) {
725 return HasFP16 &&
726 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
727 Query.Types[0] == v8s16) &&
728 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
729 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
730 })
731 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
732 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
734 .widenScalarOrEltToNextPow2OrMinSize(1)
735 .minScalar(1, s32)
736 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
737 .widenScalarIf(
738 [=](const LegalityQuery &Query) {
739 return Query.Types[1].getScalarSizeInBits() <= 64 &&
740 Query.Types[0].getScalarSizeInBits() <
741 Query.Types[1].getScalarSizeInBits();
742 },
744 .widenScalarIf(
745 [=](const LegalityQuery &Query) {
746 return Query.Types[0].getScalarSizeInBits() <= 64 &&
747 Query.Types[0].getScalarSizeInBits() >
748 Query.Types[1].getScalarSizeInBits();
749 },
751 .clampNumElements(0, v4s16, v8s16)
752 .clampNumElements(0, v2s32, v4s32)
753 .clampMaxNumElements(0, s64, 2)
754 .libcallFor({{s16, s128},
755 {s32, s128},
756 {s64, s128},
757 {s128, s128},
758 {s128, s32},
759 {s128, s64}});
760
761 // Control-flow
763 .legalFor({s32})
764 .clampScalar(0, s32, s32);
765 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
766
768 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
769 .widenScalarToNextPow2(0)
770 .clampScalar(0, s32, s64)
771 .clampScalar(1, s32, s32)
773 .lowerIf(isVector(0));
774
775 // Pointer-handling
776 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
777
778 if (TM.getCodeModel() == CodeModel::Small)
779 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
780 else
781 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
782
783 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
784 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
785
787 .legalFor({{s64, p0}, {v2s64, v2p0}})
788 .widenScalarToNextPow2(0, 64)
789 .clampScalar(0, s64, s64);
790
792 .unsupportedIf([&](const LegalityQuery &Query) {
793 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
794 })
795 .legalFor({{p0, s64}, {v2p0, v2s64}});
796
797 // Casts for 32 and 64-bit width type are just copies.
798 // Same for 128-bit width type, except they are on the FPR bank.
800 // Keeping 32-bit instructions legal to prevent regression in some tests
801 .legalForCartesianProduct({s32, v2s16, v4s8})
802 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
803 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
804 .lowerIf([=](const LegalityQuery &Query) {
805 return Query.Types[0].isVector() != Query.Types[1].isVector();
806 })
808 .clampNumElements(0, v8s8, v16s8)
809 .clampNumElements(0, v4s16, v8s16)
810 .clampNumElements(0, v2s32, v4s32)
811 .lower();
812
813 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
814
815 // va_list must be a pointer, but most sized types are pretty easy to handle
816 // as the destination.
818 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
819 .clampScalar(0, s8, s64)
820 .widenScalarToNextPow2(0, /*Min*/ 8);
821
822 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
823 .lowerIf(
824 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
825
826 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
827 return ST.outlineAtomics() && !ST.hasLSE();
828 };
829
830 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
831 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
832 predNot(UseOutlineAtomics)))
833 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
834 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
835 return Query.Types[0].getSizeInBits() == 128 &&
836 !UseOutlineAtomics(Query);
837 })
838 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
839 UseOutlineAtomics))
840 .clampScalar(0, s32, s64);
841
842 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
843 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
844 G_ATOMICRMW_XOR})
845 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
846 predNot(UseOutlineAtomics)))
847 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
848 UseOutlineAtomics))
849 .clampScalar(0, s32, s64);
850
851 // Do not outline these atomics operations, as per comment in
852 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
854 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
855 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
856 .clampScalar(0, s32, s64);
857
858 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
859
860 // Merge/Unmerge
861 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
862 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
863 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
865 .widenScalarToNextPow2(LitTyIdx, 8)
866 .widenScalarToNextPow2(BigTyIdx, 32)
867 .clampScalar(LitTyIdx, s8, s64)
868 .clampScalar(BigTyIdx, s32, s128)
869 .legalIf([=](const LegalityQuery &Q) {
870 switch (Q.Types[BigTyIdx].getSizeInBits()) {
871 case 32:
872 case 64:
873 case 128:
874 break;
875 default:
876 return false;
877 }
878 switch (Q.Types[LitTyIdx].getSizeInBits()) {
879 case 8:
880 case 16:
881 case 32:
882 case 64:
883 return true;
884 default:
885 return false;
886 }
887 });
888 }
889
890 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
891 .unsupportedIf([=](const LegalityQuery &Query) {
892 const LLT &EltTy = Query.Types[1].getElementType();
893 return Query.Types[0] != EltTy;
894 })
895 .minScalar(2, s64)
896 .customIf([=](const LegalityQuery &Query) {
897 const LLT &VecTy = Query.Types[1];
898 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
899 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
900 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
901 })
902 .minScalarOrEltIf(
903 [=](const LegalityQuery &Query) {
904 // We want to promote to <M x s1> to <M x s64> if that wouldn't
905 // cause the total vec size to be > 128b.
906 return Query.Types[1].getNumElements() <= 2;
907 },
908 0, s64)
909 .minScalarOrEltIf(
910 [=](const LegalityQuery &Query) {
911 return Query.Types[1].getNumElements() <= 4;
912 },
913 0, s32)
914 .minScalarOrEltIf(
915 [=](const LegalityQuery &Query) {
916 return Query.Types[1].getNumElements() <= 8;
917 },
918 0, s16)
919 .minScalarOrEltIf(
920 [=](const LegalityQuery &Query) {
921 return Query.Types[1].getNumElements() <= 16;
922 },
923 0, s8)
924 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
926 .clampMaxNumElements(1, s64, 2)
927 .clampMaxNumElements(1, s32, 4)
928 .clampMaxNumElements(1, s16, 8)
929 .clampMaxNumElements(1, s8, 16)
930 .clampMaxNumElements(1, p0, 2);
931
932 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
933 .legalIf(
934 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
937 .clampNumElements(0, v8s8, v16s8)
938 .clampNumElements(0, v4s16, v8s16)
939 .clampNumElements(0, v2s32, v4s32)
940 .clampMaxNumElements(0, s64, 2)
941 .clampMaxNumElements(0, p0, 2);
942
943 getActionDefinitionsBuilder(G_BUILD_VECTOR)
944 .legalFor({{v8s8, s8},
945 {v16s8, s8},
946 {v4s16, s16},
947 {v8s16, s16},
948 {v2s32, s32},
949 {v4s32, s32},
950 {v2p0, p0},
951 {v2s64, s64}})
952 .clampNumElements(0, v4s32, v4s32)
953 .clampNumElements(0, v2s64, v2s64)
954 .minScalarOrElt(0, s8)
957 .minScalarSameAs(1, 0);
958
959 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
960
963 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
964 .scalarize(1)
965 .widenScalarToNextPow2(1, /*Min=*/32)
966 .clampScalar(1, s32, s64)
967 .scalarSameSizeAs(0, 1);
968 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
969
970 // TODO: Custom lowering for v2s32, v4s32, v2s64.
971 getActionDefinitionsBuilder(G_BITREVERSE)
972 .legalFor({s32, s64, v8s8, v16s8})
973 .widenScalarToNextPow2(0, /*Min = */ 32)
974 .clampScalar(0, s32, s64);
975
976 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
977
979 .lowerIf(isVector(0))
980 .widenScalarToNextPow2(1, /*Min=*/32)
981 .clampScalar(1, s32, s64)
982 .scalarSameSizeAs(0, 1)
983 .legalIf([=](const LegalityQuery &Query) {
984 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
985 })
986 .customIf([=](const LegalityQuery &Query) {
987 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
988 });
989
990 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
991 .legalIf([=](const LegalityQuery &Query) {
992 const LLT &DstTy = Query.Types[0];
993 const LLT &SrcTy = Query.Types[1];
994 // For now just support the TBL2 variant which needs the source vectors
995 // to be the same size as the dest.
996 if (DstTy != SrcTy)
997 return false;
998 return llvm::is_contained(
999 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
1000 })
1001 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
1002 // just want those lowered into G_BUILD_VECTOR
1003 .lowerIf([=](const LegalityQuery &Query) {
1004 return !Query.Types[1].isVector();
1005 })
1006 .moreElementsIf(
1007 [](const LegalityQuery &Query) {
1008 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1009 Query.Types[0].getNumElements() >
1010 Query.Types[1].getNumElements();
1011 },
1012 changeTo(1, 0))
1014 .moreElementsIf(
1015 [](const LegalityQuery &Query) {
1016 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1017 Query.Types[0].getNumElements() <
1018 Query.Types[1].getNumElements();
1019 },
1020 changeTo(0, 1))
1021 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1022 .clampNumElements(0, v8s8, v16s8)
1023 .clampNumElements(0, v4s16, v8s16)
1024 .clampNumElements(0, v4s32, v4s32)
1025 .clampNumElements(0, v2s64, v2s64);
1026
1027 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1028 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1029 .bitcastIf(
1030 [=](const LegalityQuery &Query) {
1031 return Query.Types[0].getSizeInBits() <= 128 &&
1032 Query.Types[1].getSizeInBits() <= 64;
1033 },
1034 [=](const LegalityQuery &Query) {
1035 const LLT DstTy = Query.Types[0];
1036 const LLT SrcTy = Query.Types[1];
1037 return std::pair(
1038 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1041 SrcTy.getNumElements())));
1042 });
1043
1044 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1045
1046 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1047
1048 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1049
1050 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1051
1052 if (ST.hasMOPS()) {
1053 // G_BZERO is not supported. Currently it is only emitted by
1054 // PreLegalizerCombiner for G_MEMSET with zero constant.
1056
1058 .legalForCartesianProduct({p0}, {s64}, {s64})
1059 .customForCartesianProduct({p0}, {s8}, {s64})
1060 .immIdx(0); // Inform verifier imm idx 0 is handled.
1061
1062 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1063 .legalForCartesianProduct({p0}, {p0}, {s64})
1064 .immIdx(0); // Inform verifier imm idx 0 is handled.
1065
1066 // G_MEMCPY_INLINE does not have a tailcall immediate
1067 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1068 .legalForCartesianProduct({p0}, {p0}, {s64});
1069
1070 } else {
1071 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1072 .libcall();
1073 }
1074
1075 // FIXME: Legal vector types are only legal with NEON.
1076 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
1077 if (HasCSSC)
1078 ABSActions
1079 .legalFor({s32, s64});
1080 ABSActions.legalFor(PackedVectorAllTypeList)
1081 .customIf([=](const LegalityQuery &Q) {
1082 // TODO: Fix suboptimal codegen for 128+ bit types.
1083 LLT SrcTy = Q.Types[0];
1084 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1085 })
1086 .widenScalarIf(
1087 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1088 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1089 .widenScalarIf(
1090 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1091 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1092 .clampNumElements(0, v8s8, v16s8)
1093 .clampNumElements(0, v4s16, v8s16)
1094 .clampNumElements(0, v2s32, v4s32)
1095 .clampNumElements(0, v2s64, v2s64)
1096 .moreElementsToNextPow2(0)
1097 .lower();
1098
1099 // For fadd reductions we have pairwise operations available. We treat the
1100 // usual legal types as legal and handle the lowering to pairwise instructions
1101 // later.
1102 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1103 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1104 .legalIf([=](const LegalityQuery &Query) {
1105 const auto &Ty = Query.Types[1];
1106 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1107 })
1108 .minScalarOrElt(0, MinFPScalar)
1109 .clampMaxNumElements(1, s64, 2)
1110 .clampMaxNumElements(1, s32, 4)
1111 .clampMaxNumElements(1, s16, 8)
1112 .lower();
1113
1114 // For fmul reductions we need to split up into individual operations. We
1115 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1116 // smaller types, followed by scalarizing what remains.
1117 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1118 .minScalarOrElt(0, MinFPScalar)
1119 .clampMaxNumElements(1, s64, 2)
1120 .clampMaxNumElements(1, s32, 4)
1121 .clampMaxNumElements(1, s16, 8)
1122 .clampMaxNumElements(1, s32, 2)
1123 .clampMaxNumElements(1, s16, 4)
1124 .scalarize(1)
1125 .lower();
1126
1127 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1128 .scalarize(2)
1129 .lower();
1130
1131 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1132 .legalFor({{s8, v16s8},
1133 {s8, v8s8},
1134 {s16, v8s16},
1135 {s16, v4s16},
1136 {s32, v4s32},
1137 {s32, v2s32},
1138 {s64, v2s64}})
1139 .clampMaxNumElements(1, s64, 2)
1140 .clampMaxNumElements(1, s32, 4)
1141 .clampMaxNumElements(1, s16, 8)
1142 .clampMaxNumElements(1, s8, 16)
1143 .lower();
1144
1145 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1146 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1147 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1148 .legalIf([=](const LegalityQuery &Query) {
1149 const auto &Ty = Query.Types[1];
1150 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1151 })
1152 .minScalarOrElt(0, MinFPScalar)
1153 .clampMaxNumElements(1, s64, 2)
1154 .clampMaxNumElements(1, s32, 4)
1155 .clampMaxNumElements(1, s16, 8)
1156 .lower();
1157
1158 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1159 .clampMaxNumElements(1, s32, 2)
1160 .clampMaxNumElements(1, s16, 4)
1161 .clampMaxNumElements(1, s8, 8)
1162 .scalarize(1)
1163 .lower();
1164
1166 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1167 .legalFor({{s8, v8s8},
1168 {s8, v16s8},
1169 {s16, v4s16},
1170 {s16, v8s16},
1171 {s32, v2s32},
1172 {s32, v4s32}})
1173 .moreElementsIf(
1174 [=](const LegalityQuery &Query) {
1175 return Query.Types[1].isVector() &&
1176 Query.Types[1].getElementType() != s8 &&
1177 Query.Types[1].getNumElements() & 1;
1178 },
1180 .clampMaxNumElements(1, s64, 2)
1181 .clampMaxNumElements(1, s32, 4)
1182 .clampMaxNumElements(1, s16, 8)
1183 .clampMaxNumElements(1, s8, 16)
1184 .scalarize(1)
1185 .lower();
1186
1188 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1189 // Try to break down into smaller vectors as long as they're at least 64
1190 // bits. This lets us use vector operations for some parts of the
1191 // reduction.
1192 .fewerElementsIf(
1193 [=](const LegalityQuery &Q) {
1194 LLT SrcTy = Q.Types[1];
1195 if (SrcTy.isScalar())
1196 return false;
1197 if (!isPowerOf2_32(SrcTy.getNumElements()))
1198 return false;
1199 // We can usually perform 64b vector operations.
1200 return SrcTy.getSizeInBits() > 64;
1201 },
1202 [=](const LegalityQuery &Q) {
1203 LLT SrcTy = Q.Types[1];
1204 return std::make_pair(1, SrcTy.divide(2));
1205 })
1206 .scalarize(1)
1207 .lower();
1208
1209 // TODO: Update this to correct handling when adding AArch64/SVE support.
1210 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1211
1212 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1213 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1214 .lower();
1215
1217 .legalFor({{s32, s64}, {s64, s64}})
1218 .customIf([=](const LegalityQuery &Q) {
1219 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1220 })
1221 .lower();
1223
1224 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1225 .customFor({{s32, s32}, {s64, s64}});
1226
1227 auto always = [=](const LegalityQuery &Q) { return true; };
1228 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1229 if (HasCSSC)
1230 CTPOPActions
1231 .legalFor({{s32, s32},
1232 {s64, s64},
1233 {v8s8, v8s8},
1234 {v16s8, v16s8}})
1235 .customFor({{s128, s128},
1236 {v2s64, v2s64},
1237 {v2s32, v2s32},
1238 {v4s32, v4s32},
1239 {v4s16, v4s16},
1240 {v8s16, v8s16}});
1241 else
1242 CTPOPActions
1243 .legalFor({{v8s8, v8s8},
1244 {v16s8, v16s8}})
1245 .customFor({{s32, s32},
1246 {s64, s64},
1247 {s128, s128},
1248 {v2s64, v2s64},
1249 {v2s32, v2s32},
1250 {v4s32, v4s32},
1251 {v4s16, v4s16},
1252 {v8s16, v8s16}});
1253 CTPOPActions
1254 .clampScalar(0, s32, s128)
1255 .widenScalarToNextPow2(0)
1256 .minScalarEltSameAsIf(always, 1, 0)
1257 .maxScalarEltSameAsIf(always, 1, 0);
1258
1259 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1260 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1261 .clampNumElements(0, v8s8, v16s8)
1262 .clampNumElements(0, v4s16, v8s16)
1263 .clampNumElements(0, v2s32, v4s32)
1264 .clampMaxNumElements(0, s64, 2)
1266 .lower();
1267
1268 // TODO: Libcall support for s128.
1269 // TODO: s16 should be legal with full FP16 support.
1270 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1271 .legalFor({{s64, s32}, {s64, s64}});
1272
1273 // TODO: Custom legalization for mismatched types.
1274 getActionDefinitionsBuilder(G_FCOPYSIGN)
1276 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1277 [=](const LegalityQuery &Query) {
1278 const LLT Ty = Query.Types[0];
1279 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1280 })
1281 .lower();
1282
1284
1285 // Access to floating-point environment.
1286 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1287 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1288 .libcall();
1289
1290 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1291
1292 getActionDefinitionsBuilder(G_PREFETCH).custom();
1293
1294 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1295
1297 verify(*ST.getInstrInfo());
1298}
1299
1302 LostDebugLocObserver &LocObserver) const {
1303 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1304 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1305 GISelChangeObserver &Observer = Helper.Observer;
1306 switch (MI.getOpcode()) {
1307 default:
1308 // No idea what to do.
1309 return false;
1310 case TargetOpcode::G_VAARG:
1311 return legalizeVaArg(MI, MRI, MIRBuilder);
1312 case TargetOpcode::G_LOAD:
1313 case TargetOpcode::G_STORE:
1314 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1315 case TargetOpcode::G_SHL:
1316 case TargetOpcode::G_ASHR:
1317 case TargetOpcode::G_LSHR:
1318 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1319 case TargetOpcode::G_GLOBAL_VALUE:
1320 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1321 case TargetOpcode::G_SBFX:
1322 case TargetOpcode::G_UBFX:
1323 return legalizeBitfieldExtract(MI, MRI, Helper);
1324 case TargetOpcode::G_FSHL:
1325 case TargetOpcode::G_FSHR:
1326 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1327 case TargetOpcode::G_ROTR:
1328 return legalizeRotate(MI, MRI, Helper);
1329 case TargetOpcode::G_CTPOP:
1330 return legalizeCTPOP(MI, MRI, Helper);
1331 case TargetOpcode::G_ATOMIC_CMPXCHG:
1332 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1333 case TargetOpcode::G_CTTZ:
1334 return legalizeCTTZ(MI, Helper);
1335 case TargetOpcode::G_BZERO:
1336 case TargetOpcode::G_MEMCPY:
1337 case TargetOpcode::G_MEMMOVE:
1338 case TargetOpcode::G_MEMSET:
1339 return legalizeMemOps(MI, Helper);
1340 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1341 return legalizeExtractVectorElt(MI, MRI, Helper);
1342 case TargetOpcode::G_DYN_STACKALLOC:
1343 return legalizeDynStackAlloc(MI, Helper);
1344 case TargetOpcode::G_PREFETCH:
1345 return legalizePrefetch(MI, Helper);
1346 case TargetOpcode::G_ABS:
1347 return Helper.lowerAbsToCNeg(MI);
1348 case TargetOpcode::G_ICMP:
1349 return legalizeICMP(MI, MRI, MIRBuilder);
1350 }
1351
1352 llvm_unreachable("expected switch to return");
1353}
1354
1355bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1357 MachineIRBuilder &MIRBuilder,
1358 GISelChangeObserver &Observer,
1359 LegalizerHelper &Helper) const {
1360 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1361 MI.getOpcode() == TargetOpcode::G_FSHR);
1362
1363 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1364 // lowering
1365 Register ShiftNo = MI.getOperand(3).getReg();
1366 LLT ShiftTy = MRI.getType(ShiftNo);
1367 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1368
1369 // Adjust shift amount according to Opcode (FSHL/FSHR)
1370 // Convert FSHL to FSHR
1371 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1372 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1373
1374 // Lower non-constant shifts and leave zero shifts to the optimizer.
1375 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1376 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1378
1379 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1380
1381 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1382
1383 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1384 // in the range of 0 <-> BitWidth, it is legal
1385 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1386 VRegAndVal->Value.ult(BitWidth))
1387 return true;
1388
1389 // Cast the ShiftNumber to a 64-bit type
1390 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1391
1392 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1393 Observer.changingInstr(MI);
1394 MI.getOperand(3).setReg(Cast64.getReg(0));
1395 Observer.changedInstr(MI);
1396 }
1397 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1398 // instruction
1399 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1400 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1401 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1402 Cast64.getReg(0)});
1403 MI.eraseFromParent();
1404 }
1405 return true;
1406}
1407
1408bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1410 MachineIRBuilder &MIRBuilder) const {
1411 Register DstReg = MI.getOperand(0).getReg();
1412 Register SrcReg1 = MI.getOperand(2).getReg();
1413 Register SrcReg2 = MI.getOperand(3).getReg();
1414 LLT DstTy = MRI.getType(DstReg);
1415 LLT SrcTy = MRI.getType(SrcReg1);
1416
1417 // Check the vector types are legal
1418 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1419 DstTy.getNumElements() != SrcTy.getNumElements() ||
1420 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1421 return false;
1422
1423 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1424 // following passes
1425 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1426 if (Pred != CmpInst::ICMP_NE)
1427 return true;
1428 Register CmpReg =
1429 MIRBuilder
1430 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1431 .getReg(0);
1432 MIRBuilder.buildNot(DstReg, CmpReg);
1433
1434 MI.eraseFromParent();
1435 return true;
1436}
1437
1438bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1440 LegalizerHelper &Helper) const {
1441 // To allow for imported patterns to match, we ensure that the rotate amount
1442 // is 64b with an extension.
1443 Register AmtReg = MI.getOperand(2).getReg();
1444 LLT AmtTy = MRI.getType(AmtReg);
1445 (void)AmtTy;
1446 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1447 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1448 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1449 Helper.Observer.changingInstr(MI);
1450 MI.getOperand(2).setReg(NewAmt.getReg(0));
1451 Helper.Observer.changedInstr(MI);
1452 return true;
1453}
1454
1455bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1457 GISelChangeObserver &Observer) const {
1458 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1459 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1460 // G_ADD_LOW instructions.
1461 // By splitting this here, we can optimize accesses in the small code model by
1462 // folding in the G_ADD_LOW into the load/store offset.
1463 auto &GlobalOp = MI.getOperand(1);
1464 // Don't modify an intrinsic call.
1465 if (GlobalOp.isSymbol())
1466 return true;
1467 const auto* GV = GlobalOp.getGlobal();
1468 if (GV->isThreadLocal())
1469 return true; // Don't want to modify TLS vars.
1470
1471 auto &TM = ST->getTargetLowering()->getTargetMachine();
1472 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1473
1474 if (OpFlags & AArch64II::MO_GOT)
1475 return true;
1476
1477 auto Offset = GlobalOp.getOffset();
1478 Register DstReg = MI.getOperand(0).getReg();
1479 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1480 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1481 // Set the regclass on the dest reg too.
1482 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1483
1484 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1485 // by creating a MOVK that sets bits 48-63 of the register to (global address
1486 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1487 // prevent an incorrect tag being generated during relocation when the
1488 // global appears before the code section. Without the offset, a global at
1489 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1490 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1491 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1492 // instead of `0xf`.
1493 // This assumes that we're in the small code model so we can assume a binary
1494 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1495 // binary must also be loaded into address range [0, 2^48). Both of these
1496 // properties need to be ensured at runtime when using tagged addresses.
1497 if (OpFlags & AArch64II::MO_TAGGED) {
1498 assert(!Offset &&
1499 "Should not have folded in an offset for a tagged global!");
1500 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1501 .addGlobalAddress(GV, 0x100000000,
1503 .addImm(48);
1504 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1505 }
1506
1507 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1508 .addGlobalAddress(GV, Offset,
1510 MI.eraseFromParent();
1511 return true;
1512}
1513
1515 MachineInstr &MI) const {
1516 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1517 switch (IntrinsicID) {
1518 case Intrinsic::vacopy: {
1519 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1520 unsigned VaListSize =
1521 (ST->isTargetDarwin() || ST->isTargetWindows())
1522 ? PtrSize
1523 : ST->isTargetILP32() ? 20 : 32;
1524
1525 MachineFunction &MF = *MI.getMF();
1527 LLT::scalar(VaListSize * 8));
1528 MachineIRBuilder MIB(MI);
1529 MIB.buildLoad(Val, MI.getOperand(2),
1532 VaListSize, Align(PtrSize)));
1533 MIB.buildStore(Val, MI.getOperand(1),
1536 VaListSize, Align(PtrSize)));
1537 MI.eraseFromParent();
1538 return true;
1539 }
1540 case Intrinsic::get_dynamic_area_offset: {
1541 MachineIRBuilder &MIB = Helper.MIRBuilder;
1542 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1543 MI.eraseFromParent();
1544 return true;
1545 }
1546 case Intrinsic::aarch64_mops_memset_tag: {
1547 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1548 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1549 // the instruction).
1550 MachineIRBuilder MIB(MI);
1551 auto &Value = MI.getOperand(3);
1552 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1553 Value.setReg(ExtValueReg);
1554 return true;
1555 }
1556 case Intrinsic::aarch64_prefetch: {
1557 MachineIRBuilder MIB(MI);
1558 auto &AddrVal = MI.getOperand(1);
1559
1560 int64_t IsWrite = MI.getOperand(2).getImm();
1561 int64_t Target = MI.getOperand(3).getImm();
1562 int64_t IsStream = MI.getOperand(4).getImm();
1563 int64_t IsData = MI.getOperand(5).getImm();
1564
1565 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1566 (!IsData << 3) | // IsDataCache bit
1567 (Target << 1) | // Cache level bits
1568 (unsigned)IsStream; // Stream bit
1569
1570 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1571 MI.eraseFromParent();
1572 return true;
1573 }
1574 case Intrinsic::aarch64_neon_uaddv:
1575 case Intrinsic::aarch64_neon_saddv:
1576 case Intrinsic::aarch64_neon_umaxv:
1577 case Intrinsic::aarch64_neon_smaxv:
1578 case Intrinsic::aarch64_neon_uminv:
1579 case Intrinsic::aarch64_neon_sminv: {
1580 MachineIRBuilder MIB(MI);
1581 MachineRegisterInfo &MRI = *MIB.getMRI();
1582 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1583 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1584 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1585
1586 auto OldDst = MI.getOperand(0).getReg();
1587 auto OldDstTy = MRI.getType(OldDst);
1588 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1589 if (OldDstTy == NewDstTy)
1590 return true;
1591
1592 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1593
1594 Helper.Observer.changingInstr(MI);
1595 MI.getOperand(0).setReg(NewDst);
1596 Helper.Observer.changedInstr(MI);
1597
1598 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1599 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1600 OldDst, NewDst);
1601
1602 return true;
1603 }
1604 case Intrinsic::aarch64_neon_uaddlp:
1605 case Intrinsic::aarch64_neon_saddlp: {
1606 MachineIRBuilder MIB(MI);
1607
1608 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1609 ? AArch64::G_UADDLP
1610 : AArch64::G_SADDLP;
1611 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1612 MI.eraseFromParent();
1613
1614 return true;
1615 }
1616 case Intrinsic::aarch64_neon_uaddlv:
1617 case Intrinsic::aarch64_neon_saddlv: {
1618 MachineIRBuilder MIB(MI);
1619 MachineRegisterInfo &MRI = *MIB.getMRI();
1620
1621 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1622 ? AArch64::G_UADDLV
1623 : AArch64::G_SADDLV;
1624 Register DstReg = MI.getOperand(0).getReg();
1625 Register SrcReg = MI.getOperand(2).getReg();
1626 LLT DstTy = MRI.getType(DstReg);
1627
1628 LLT MidTy, ExtTy;
1629 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1630 MidTy = LLT::fixed_vector(4, 32);
1631 ExtTy = LLT::scalar(32);
1632 } else {
1633 MidTy = LLT::fixed_vector(2, 64);
1634 ExtTy = LLT::scalar(64);
1635 }
1636
1637 Register MidReg =
1638 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1639 Register ZeroReg =
1640 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1641 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1642 {MidReg, ZeroReg})
1643 .getReg(0);
1644
1645 if (DstTy.getScalarSizeInBits() < 32)
1646 MIB.buildTrunc(DstReg, ExtReg);
1647 else
1648 MIB.buildCopy(DstReg, ExtReg);
1649
1650 MI.eraseFromParent();
1651
1652 return true;
1653 }
1654 case Intrinsic::aarch64_neon_smax:
1655 case Intrinsic::aarch64_neon_smin:
1656 case Intrinsic::aarch64_neon_umax:
1657 case Intrinsic::aarch64_neon_umin:
1658 case Intrinsic::aarch64_neon_fmax:
1659 case Intrinsic::aarch64_neon_fmin:
1660 case Intrinsic::aarch64_neon_fmaxnm:
1661 case Intrinsic::aarch64_neon_fminnm: {
1662 MachineIRBuilder MIB(MI);
1663 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1664 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1665 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1666 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1667 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1668 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1669 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1670 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1671 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1672 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1673 {MI.getOperand(2), MI.getOperand(3)});
1674 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1675 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1676 {MI.getOperand(2), MI.getOperand(3)});
1677 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1678 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1679 {MI.getOperand(2), MI.getOperand(3)});
1680 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1681 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1682 {MI.getOperand(2), MI.getOperand(3)});
1683 MI.eraseFromParent();
1684 return true;
1685 }
1686 case Intrinsic::vector_reverse:
1687 // TODO: Add support for vector_reverse
1688 return false;
1689 }
1690
1691 return true;
1692}
1693
1694bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1696 GISelChangeObserver &Observer) const {
1697 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1698 MI.getOpcode() == TargetOpcode::G_LSHR ||
1699 MI.getOpcode() == TargetOpcode::G_SHL);
1700 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1701 // imported patterns can select it later. Either way, it will be legal.
1702 Register AmtReg = MI.getOperand(2).getReg();
1703 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1704 if (!VRegAndVal)
1705 return true;
1706 // Check the shift amount is in range for an immediate form.
1707 int64_t Amount = VRegAndVal->Value.getSExtValue();
1708 if (Amount > 31)
1709 return true; // This will have to remain a register variant.
1710 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1711 Observer.changingInstr(MI);
1712 MI.getOperand(2).setReg(ExtCst.getReg(0));
1713 Observer.changedInstr(MI);
1714 return true;
1715}
1716
1719 Base = Root;
1720 Offset = 0;
1721
1722 Register NewBase;
1723 int64_t NewOffset;
1724 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1725 isShiftedInt<7, 3>(NewOffset)) {
1726 Base = NewBase;
1727 Offset = NewOffset;
1728 }
1729}
1730
1731// FIXME: This should be removed and replaced with the generic bitcast legalize
1732// action.
1733bool AArch64LegalizerInfo::legalizeLoadStore(
1735 GISelChangeObserver &Observer) const {
1736 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1737 MI.getOpcode() == TargetOpcode::G_LOAD);
1738 // Here we just try to handle vector loads/stores where our value type might
1739 // have pointer elements, which the SelectionDAG importer can't handle. To
1740 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1741 // the value to use s64 types.
1742
1743 // Custom legalization requires the instruction, if not deleted, must be fully
1744 // legalized. In order to allow further legalization of the inst, we create
1745 // a new instruction and erase the existing one.
1746
1747 Register ValReg = MI.getOperand(0).getReg();
1748 const LLT ValTy = MRI.getType(ValReg);
1749
1750 if (ValTy == LLT::scalar(128)) {
1751
1752 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1753 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1754 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1755 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1756 bool IsRcpC3 =
1757 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1758
1759 LLT s64 = LLT::scalar(64);
1760
1761 unsigned Opcode;
1762 if (IsRcpC3) {
1763 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1764 } else {
1765 // For LSE2, loads/stores should have been converted to monotonic and had
1766 // a fence inserted after them.
1767 assert(Ordering == AtomicOrdering::Monotonic ||
1768 Ordering == AtomicOrdering::Unordered);
1769 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1770
1771 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1772 }
1773
1775 if (IsLoad) {
1776 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1777 MIRBuilder.buildMergeLikeInstr(
1778 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1779 } else {
1780 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1781 NewI = MIRBuilder.buildInstr(
1782 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1783 }
1784
1785 if (IsRcpC3) {
1786 NewI.addUse(MI.getOperand(1).getReg());
1787 } else {
1788 Register Base;
1789 int Offset;
1790 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1791 NewI.addUse(Base);
1792 NewI.addImm(Offset / 8);
1793 }
1794
1795 NewI.cloneMemRefs(MI);
1797 *MRI.getTargetRegisterInfo(),
1798 *ST->getRegBankInfo());
1799 MI.eraseFromParent();
1800 return true;
1801 }
1802
1803 if (!ValTy.isPointerVector() ||
1804 ValTy.getElementType().getAddressSpace() != 0) {
1805 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1806 return false;
1807 }
1808
1809 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1810 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1811 auto &MMO = **MI.memoperands_begin();
1812 MMO.setType(NewTy);
1813
1814 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1815 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1816 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1817 } else {
1818 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1819 MIRBuilder.buildBitcast(ValReg, NewLoad);
1820 }
1821 MI.eraseFromParent();
1822 return true;
1823}
1824
1825bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1827 MachineIRBuilder &MIRBuilder) const {
1828 MachineFunction &MF = MIRBuilder.getMF();
1829 Align Alignment(MI.getOperand(2).getImm());
1830 Register Dst = MI.getOperand(0).getReg();
1831 Register ListPtr = MI.getOperand(1).getReg();
1832
1833 LLT PtrTy = MRI.getType(ListPtr);
1834 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1835
1836 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1837 const Align PtrAlign = Align(PtrSize);
1838 auto List = MIRBuilder.buildLoad(
1839 PtrTy, ListPtr,
1841 PtrTy, PtrAlign));
1842
1843 MachineInstrBuilder DstPtr;
1844 if (Alignment > PtrAlign) {
1845 // Realign the list to the actual required alignment.
1846 auto AlignMinus1 =
1847 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1848 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1849 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1850 } else
1851 DstPtr = List;
1852
1853 LLT ValTy = MRI.getType(Dst);
1854 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1855 MIRBuilder.buildLoad(
1856 Dst, DstPtr,
1858 ValTy, std::max(Alignment, PtrAlign)));
1859
1860 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1861
1862 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1863
1864 MIRBuilder.buildStore(NewList, ListPtr,
1867 PtrTy, PtrAlign));
1868
1869 MI.eraseFromParent();
1870 return true;
1871}
1872
1873bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1875 // Only legal if we can select immediate forms.
1876 // TODO: Lower this otherwise.
1877 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1878 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1879}
1880
1881bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1883 LegalizerHelper &Helper) const {
1884 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1885 // it can be more efficiently lowered to the following sequence that uses
1886 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1887 // registers are cheap.
1888 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1889 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1890 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1891 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1892 //
1893 // For 128 bit vector popcounts, we lower to the following sequence:
1894 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1895 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1896 // uaddlp.4s v0, v0 // v4s32, v2s64
1897 // uaddlp.2d v0, v0 // v2s64
1898 //
1899 // For 64 bit vector popcounts, we lower to the following sequence:
1900 // cnt.8b v0, v0 // v4s16, v2s32
1901 // uaddlp.4h v0, v0 // v4s16, v2s32
1902 // uaddlp.2s v0, v0 // v2s32
1903
1904 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1905 Register Dst = MI.getOperand(0).getReg();
1906 Register Val = MI.getOperand(1).getReg();
1907 LLT Ty = MRI.getType(Val);
1908 unsigned Size = Ty.getSizeInBits();
1909
1910 assert(Ty == MRI.getType(Dst) &&
1911 "Expected src and dst to have the same type!");
1912
1913 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1914 LLT s64 = LLT::scalar(64);
1915
1916 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1917 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1918 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1919 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1920
1921 MIRBuilder.buildZExt(Dst, Add);
1922 MI.eraseFromParent();
1923 return true;
1924 }
1925
1926 if (!ST->hasNEON() ||
1927 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1928 // Use generic lowering when custom lowering is not possible.
1929 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1930 Helper.lowerBitCount(MI) ==
1932 }
1933
1934 // Pre-conditioning: widen Val up to the nearest vector type.
1935 // s32,s64,v4s16,v2s32 -> v8i8
1936 // v8s16,v4s32,v2s64 -> v16i8
1937 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1938 if (Ty.isScalar()) {
1939 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1940 if (Size == 32) {
1941 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1942 }
1943 }
1944 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1945
1946 // Count bits in each byte-sized lane.
1947 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1948
1949 // Sum across lanes.
1950
1951 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
1952 Ty.getScalarSizeInBits() != 16) {
1953 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
1954 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
1955 auto Ones = MIRBuilder.buildConstant(VTy, 1);
1957
1958 if (Ty == LLT::fixed_vector(2, 64)) {
1959 auto UDOT =
1960 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1961 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
1962 } else if (Ty == LLT::fixed_vector(4, 32)) {
1963 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1964 } else if (Ty == LLT::fixed_vector(2, 32)) {
1965 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1966 } else {
1967 llvm_unreachable("unexpected vector shape");
1968 }
1969
1970 Sum->getOperand(0).setReg(Dst);
1971 MI.eraseFromParent();
1972 return true;
1973 }
1974
1975 Register HSum = CTPOP.getReg(0);
1976 unsigned Opc;
1977 SmallVector<LLT> HAddTys;
1978 if (Ty.isScalar()) {
1979 Opc = Intrinsic::aarch64_neon_uaddlv;
1980 HAddTys.push_back(LLT::scalar(32));
1981 } else if (Ty == LLT::fixed_vector(8, 16)) {
1982 Opc = Intrinsic::aarch64_neon_uaddlp;
1983 HAddTys.push_back(LLT::fixed_vector(8, 16));
1984 } else if (Ty == LLT::fixed_vector(4, 32)) {
1985 Opc = Intrinsic::aarch64_neon_uaddlp;
1986 HAddTys.push_back(LLT::fixed_vector(8, 16));
1987 HAddTys.push_back(LLT::fixed_vector(4, 32));
1988 } else if (Ty == LLT::fixed_vector(2, 64)) {
1989 Opc = Intrinsic::aarch64_neon_uaddlp;
1990 HAddTys.push_back(LLT::fixed_vector(8, 16));
1991 HAddTys.push_back(LLT::fixed_vector(4, 32));
1992 HAddTys.push_back(LLT::fixed_vector(2, 64));
1993 } else if (Ty == LLT::fixed_vector(4, 16)) {
1994 Opc = Intrinsic::aarch64_neon_uaddlp;
1995 HAddTys.push_back(LLT::fixed_vector(4, 16));
1996 } else if (Ty == LLT::fixed_vector(2, 32)) {
1997 Opc = Intrinsic::aarch64_neon_uaddlp;
1998 HAddTys.push_back(LLT::fixed_vector(4, 16));
1999 HAddTys.push_back(LLT::fixed_vector(2, 32));
2000 } else
2001 llvm_unreachable("unexpected vector shape");
2003 for (LLT HTy : HAddTys) {
2004 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2005 HSum = UADD.getReg(0);
2006 }
2007
2008 // Post-conditioning.
2009 if (Ty.isScalar() && (Size == 64 || Size == 128))
2010 MIRBuilder.buildZExt(Dst, UADD);
2011 else
2012 UADD->getOperand(0).setReg(Dst);
2013 MI.eraseFromParent();
2014 return true;
2015}
2016
2017bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2019 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2020 LLT s64 = LLT::scalar(64);
2021 auto Addr = MI.getOperand(1).getReg();
2022 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2023 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2024 auto DstLo = MRI.createGenericVirtualRegister(s64);
2025 auto DstHi = MRI.createGenericVirtualRegister(s64);
2026
2028 if (ST->hasLSE()) {
2029 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2030 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2031 // the rest of the MIR so we must reassemble the extracted registers into a
2032 // 128-bit known-regclass one with code like this:
2033 //
2034 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2035 // %out = CASP %in1, ...
2036 // %OldLo = G_EXTRACT %out, 0
2037 // %OldHi = G_EXTRACT %out, 64
2038 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2039 unsigned Opcode;
2040 switch (Ordering) {
2042 Opcode = AArch64::CASPAX;
2043 break;
2045 Opcode = AArch64::CASPLX;
2046 break;
2049 Opcode = AArch64::CASPALX;
2050 break;
2051 default:
2052 Opcode = AArch64::CASPX;
2053 break;
2054 }
2055
2056 LLT s128 = LLT::scalar(128);
2057 auto CASDst = MRI.createGenericVirtualRegister(s128);
2058 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2059 auto CASNew = MRI.createGenericVirtualRegister(s128);
2060 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2061 .addUse(DesiredI->getOperand(0).getReg())
2062 .addImm(AArch64::sube64)
2063 .addUse(DesiredI->getOperand(1).getReg())
2064 .addImm(AArch64::subo64);
2065 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2066 .addUse(NewI->getOperand(0).getReg())
2067 .addImm(AArch64::sube64)
2068 .addUse(NewI->getOperand(1).getReg())
2069 .addImm(AArch64::subo64);
2070
2071 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2072
2073 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2074 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2075 } else {
2076 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2077 // can take arbitrary registers so it just has the normal GPR64 operands the
2078 // rest of AArch64 is expecting.
2079 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2080 unsigned Opcode;
2081 switch (Ordering) {
2083 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2084 break;
2086 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2087 break;
2090 Opcode = AArch64::CMP_SWAP_128;
2091 break;
2092 default:
2093 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2094 break;
2095 }
2096
2097 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2098 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2099 {Addr, DesiredI->getOperand(0),
2100 DesiredI->getOperand(1), NewI->getOperand(0),
2101 NewI->getOperand(1)});
2102 }
2103
2104 CAS.cloneMemRefs(MI);
2106 *MRI.getTargetRegisterInfo(),
2107 *ST->getRegBankInfo());
2108
2109 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2110 MI.eraseFromParent();
2111 return true;
2112}
2113
2114bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2115 LegalizerHelper &Helper) const {
2116 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2117 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2118 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2119 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2120 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2121 MI.eraseFromParent();
2122 return true;
2123}
2124
2125bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2126 LegalizerHelper &Helper) const {
2127 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2128
2129 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2130 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2131 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2132 // the instruction).
2133 auto &Value = MI.getOperand(1);
2134 Register ExtValueReg =
2135 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2136 Value.setReg(ExtValueReg);
2137 return true;
2138 }
2139
2140 return false;
2141}
2142
2143bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2145 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2146 auto VRegAndVal =
2147 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2148 if (VRegAndVal)
2149 return true;
2150 return Helper.lowerExtractInsertVectorElt(MI) !=
2152}
2153
2154bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2155 MachineInstr &MI, LegalizerHelper &Helper) const {
2156 MachineFunction &MF = *MI.getParent()->getParent();
2157 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2158 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2159
2160 // If stack probing is not enabled for this function, use the default
2161 // lowering.
2162 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2163 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2164 "inline-asm") {
2165 Helper.lowerDynStackAlloc(MI);
2166 return true;
2167 }
2168
2169 Register Dst = MI.getOperand(0).getReg();
2170 Register AllocSize = MI.getOperand(1).getReg();
2171 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2172
2173 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2174 "Unexpected type for dynamic alloca");
2175 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2176 "Unexpected type for dynamic alloca");
2177
2178 LLT PtrTy = MRI.getType(Dst);
2179 Register SPReg =
2181 Register SPTmp =
2182 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2183 auto NewMI =
2184 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2185 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2186 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2187 MIRBuilder.buildCopy(Dst, SPTmp);
2188
2189 MI.eraseFromParent();
2190 return true;
2191}
2192
2193bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2194 LegalizerHelper &Helper) const {
2195 MachineIRBuilder &MIB = Helper.MIRBuilder;
2196 auto &AddrVal = MI.getOperand(0);
2197
2198 int64_t IsWrite = MI.getOperand(1).getImm();
2199 int64_t Locality = MI.getOperand(2).getImm();
2200 int64_t IsData = MI.getOperand(3).getImm();
2201
2202 bool IsStream = Locality == 0;
2203 if (Locality != 0) {
2204 assert(Locality <= 3 && "Prefetch locality out-of-range");
2205 // The locality degree is the opposite of the cache speed.
2206 // Put the number the other way around.
2207 // The encoding starts at 0 for level 1
2208 Locality = 3 - Locality;
2209 }
2210
2211 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2212
2213 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2214 MI.eraseFromParent();
2215 return true;
2216}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...