LLVM 20.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
65 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
66 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
67 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
68
69 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
70 v16s8, v8s16, v4s32,
71 v2s64, v2p0,
72 /* End 128bit types */
73 /* Begin 64bit types */
74 v8s8, v4s16, v2s32};
75 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
76 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
77 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
78
79 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
80
81 // FIXME: support subtargets which have neon/fp-armv8 disabled.
82 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
84 return;
85 }
86
87 // Some instructions only support s16 if the subtarget has full 16-bit FP
88 // support.
89 const bool HasFP16 = ST.hasFullFP16();
90 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
91
92 const bool HasCSSC = ST.hasCSSC();
93 const bool HasRCPC3 = ST.hasRCPC3();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor(PackedVectorAllTypeList)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampNumElements(0, v2s64, v2s64);
107
109 .legalFor({p0, s16, s32, s64})
110 .legalFor(PackedVectorAllTypeList)
112 .clampScalar(0, s16, s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(0, s8, 16)
115 .clampMaxNumElements(0, s16, 8)
116 .clampMaxNumElements(0, s32, 4)
117 .clampMaxNumElements(0, s64, 2)
118 .clampMaxNumElements(0, p0, 2);
119
121 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
123 .clampScalar(0, s32, s64)
124 .clampNumElements(0, v4s16, v8s16)
125 .clampNumElements(0, v2s32, v4s32)
126 .clampNumElements(0, v2s64, v2s64)
127 .moreElementsToNextPow2(0);
128
129 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
130 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
131 .widenScalarToNextPow2(0)
132 .clampScalar(0, s32, s64)
133 .clampMaxNumElements(0, s8, 16)
134 .clampMaxNumElements(0, s16, 8)
135 .clampNumElements(0, v2s32, v4s32)
136 .clampNumElements(0, v2s64, v2s64)
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 2;
140 },
141 0, s32)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 4;
145 },
146 0, s16)
147 .minScalarOrEltIf(
148 [=](const LegalityQuery &Query) {
149 return Query.Types[0].getNumElements() <= 16;
150 },
151 0, s8)
153
154 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
155 .customIf([=](const LegalityQuery &Query) {
156 const auto &SrcTy = Query.Types[0];
157 const auto &AmtTy = Query.Types[1];
158 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
159 AmtTy.getSizeInBits() == 32;
160 })
161 .legalFor({
162 {s32, s32},
163 {s32, s64},
164 {s64, s64},
165 {v8s8, v8s8},
166 {v16s8, v16s8},
167 {v4s16, v4s16},
168 {v8s16, v8s16},
169 {v2s32, v2s32},
170 {v4s32, v4s32},
171 {v2s64, v2s64},
172 })
173 .widenScalarToNextPow2(0)
174 .clampScalar(1, s32, s64)
175 .clampScalar(0, s32, s64)
176 .clampNumElements(0, v8s8, v16s8)
177 .clampNumElements(0, v4s16, v8s16)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 .minScalarSameAs(1, 0);
182
184 .legalFor({{p0, s64}, {v2p0, v2s64}})
185 .clampScalarOrElt(1, s64, s64)
186 .clampNumElements(0, v2p0, v2p0);
187
188 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
189
190 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
191 .legalFor({s32, s64})
192 .libcallFor({s128})
193 .clampScalar(0, s32, s64)
195 .scalarize(0);
196
197 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
198 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
200 .clampScalarOrElt(0, s32, s64)
201 .clampNumElements(0, v2s32, v4s32)
202 .clampNumElements(0, v2s64, v2s64)
203 .moreElementsToNextPow2(0);
204
205
206 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
207 .widenScalarToNextPow2(0, /*Min = */ 32)
208 .clampScalar(0, s32, s64)
209 .lower();
210
211 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
212 .legalFor({s64, v8s16, v16s8, v4s32})
213 .lower();
214
215 auto &MinMaxActions = getActionDefinitionsBuilder(
216 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
217 if (HasCSSC)
218 MinMaxActions
219 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
220 // Making clamping conditional on CSSC extension as without legal types we
221 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
222 // if we detect a type smaller than 32-bit.
223 .minScalar(0, s32);
224 else
225 MinMaxActions
226 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
227 MinMaxActions
228 .clampNumElements(0, v8s8, v16s8)
229 .clampNumElements(0, v4s16, v8s16)
230 .clampNumElements(0, v2s32, v4s32)
231 // FIXME: This sholdn't be needed as v2s64 types are going to
232 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
233 .clampNumElements(0, v2s64, v2s64)
234 .lower();
235
237 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
238 .legalFor({{s32, s32}, {s64, s32}})
239 .clampScalar(0, s32, s64)
240 .clampScalar(1, s32, s64)
242
243 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
244 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
245 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
246 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
247 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
248 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
249 .legalIf([=](const LegalityQuery &Query) {
250 const auto &Ty = Query.Types[0];
251 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
252 })
253 .libcallFor({s128})
254 .minScalarOrElt(0, MinFPScalar)
255 .clampNumElements(0, v4s16, v8s16)
256 .clampNumElements(0, v2s32, v4s32)
257 .clampNumElements(0, v2s64, v2s64)
259
261 .libcallFor({s32, s64})
262 .minScalar(0, s32)
263 .scalarize(0);
264
265 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
266 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
267 .libcallFor({{s64, s128}})
268 .minScalarOrElt(1, MinFPScalar);
269
271 {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
272 G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
273 // We need a call for these, so we always need to scalarize.
274 .scalarize(0)
275 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
276 .minScalar(0, s32)
277 .libcallFor({s32, s64});
279 .scalarize(0)
280 .minScalar(0, s32)
281 .libcallFor({{s32, s32}, {s64, s32}});
282
284 .legalIf(all(typeInSet(0, {s32, s64, p0}),
285 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
287 .clampScalar(0, s32, s64)
289 .minScalar(1, s8)
290 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
291 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
292
294 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
295 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
297 .clampScalar(1, s32, s128)
299 .minScalar(0, s16)
300 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
301 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
302 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
303
304
305 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
306 auto &Actions = getActionDefinitionsBuilder(Op);
307
308 if (Op == G_SEXTLOAD)
310
311 // Atomics have zero extending behavior.
312 Actions
313 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
314 {s32, p0, s16, 8},
315 {s32, p0, s32, 8},
316 {s64, p0, s8, 2},
317 {s64, p0, s16, 2},
318 {s64, p0, s32, 4},
319 {s64, p0, s64, 8},
320 {p0, p0, s64, 8},
321 {v2s32, p0, s64, 8}})
322 .widenScalarToNextPow2(0)
323 .clampScalar(0, s32, s64)
324 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
325 // how to do that yet.
326 .unsupportedIfMemSizeNotPow2()
327 // Lower anything left over into G_*EXT and G_LOAD
328 .lower();
329 }
330
331 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
332 const LLT &ValTy = Query.Types[0];
333 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
334 };
335
336 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
337 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
338
339 if (ST.hasSVE()) {
340 LoadActions.legalForTypesWithMemDesc({
341 // 128 bit base sizes
342 {nxv16s8, p0, nxv16s8, 8},
343 {nxv8s16, p0, nxv8s16, 8},
344 {nxv4s32, p0, nxv4s32, 8},
345 {nxv2s64, p0, nxv2s64, 8},
346 });
347
348 // TODO: Add nxv2p0. Consider bitcastIf.
349 // See #92130
350 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
351 StoreActions.legalForTypesWithMemDesc({
352 // 128 bit base sizes
353 {nxv16s8, p0, nxv16s8, 8},
354 {nxv8s16, p0, nxv8s16, 8},
355 {nxv4s32, p0, nxv4s32, 8},
356 {nxv2s64, p0, nxv2s64, 8},
357 });
358 }
359
360 LoadActions
361 .customIf([=](const LegalityQuery &Query) {
362 return HasRCPC3 && Query.Types[0] == s128 &&
363 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
364 })
365 .customIf([=](const LegalityQuery &Query) {
366 return Query.Types[0] == s128 &&
367 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
368 })
369 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
370 {s16, p0, s16, 8},
371 {s32, p0, s32, 8},
372 {s64, p0, s64, 8},
373 {p0, p0, s64, 8},
374 {s128, p0, s128, 8},
375 {v8s8, p0, s64, 8},
376 {v16s8, p0, s128, 8},
377 {v4s16, p0, s64, 8},
378 {v8s16, p0, s128, 8},
379 {v2s32, p0, s64, 8},
380 {v4s32, p0, s128, 8},
381 {v2s64, p0, s128, 8}})
382 // These extends are also legal
383 .legalForTypesWithMemDesc(
384 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
385 .widenScalarToNextPow2(0, /* MinSize = */ 8)
386 .clampMaxNumElements(0, s8, 16)
387 .clampMaxNumElements(0, s16, 8)
388 .clampMaxNumElements(0, s32, 4)
389 .clampMaxNumElements(0, s64, 2)
390 .clampMaxNumElements(0, p0, 2)
391 .lowerIfMemSizeNotByteSizePow2()
392 .clampScalar(0, s8, s64)
393 .narrowScalarIf(
394 [=](const LegalityQuery &Query) {
395 // Clamp extending load results to 32-bits.
396 return Query.Types[0].isScalar() &&
397 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
398 Query.Types[0].getSizeInBits() > 32;
399 },
400 changeTo(0, s32))
401 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
402 .bitcastIf(typeInSet(0, {v4s8}),
403 [=](const LegalityQuery &Query) {
404 const LLT VecTy = Query.Types[0];
405 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
406 })
407 .customIf(IsPtrVecPred)
408 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
409
410 StoreActions
411 .customIf([=](const LegalityQuery &Query) {
412 return HasRCPC3 && Query.Types[0] == s128 &&
413 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
414 })
415 .customIf([=](const LegalityQuery &Query) {
416 return Query.Types[0] == s128 &&
417 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
418 })
419 .legalForTypesWithMemDesc(
420 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
421 {s32, p0, s8, 8}, // truncstorei8 from s32
422 {s64, p0, s8, 8}, // truncstorei8 from s64
423 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
424 {s64, p0, s16, 8}, // truncstorei16 from s64
425 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
426 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
427 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
428 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
429 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
430 .clampScalar(0, s8, s64)
431 .lowerIf([=](const LegalityQuery &Query) {
432 return Query.Types[0].isScalar() &&
433 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
434 })
435 // Maximum: sN * k = 128
436 .clampMaxNumElements(0, s8, 16)
437 .clampMaxNumElements(0, s16, 8)
438 .clampMaxNumElements(0, s32, 4)
439 .clampMaxNumElements(0, s64, 2)
440 .clampMaxNumElements(0, p0, 2)
441 .lowerIfMemSizeNotPow2()
442 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
443 .bitcastIf(typeInSet(0, {v4s8}),
444 [=](const LegalityQuery &Query) {
445 const LLT VecTy = Query.Types[0];
446 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
447 })
448 .customIf(IsPtrVecPred)
449 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
450
451 getActionDefinitionsBuilder(G_INDEXED_STORE)
452 // Idx 0 == Ptr, Idx 1 == Val
453 // TODO: we can implement legalizations but as of now these are
454 // generated in a very specific way.
456 {p0, s8, s8, 8},
457 {p0, s16, s16, 8},
458 {p0, s32, s8, 8},
459 {p0, s32, s16, 8},
460 {p0, s32, s32, 8},
461 {p0, s64, s64, 8},
462 {p0, p0, p0, 8},
463 {p0, v8s8, v8s8, 8},
464 {p0, v16s8, v16s8, 8},
465 {p0, v4s16, v4s16, 8},
466 {p0, v8s16, v8s16, 8},
467 {p0, v2s32, v2s32, 8},
468 {p0, v4s32, v4s32, 8},
469 {p0, v2s64, v2s64, 8},
470 {p0, v2p0, v2p0, 8},
471 {p0, s128, s128, 8},
472 })
473 .unsupported();
474
475 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
476 LLT LdTy = Query.Types[0];
477 LLT PtrTy = Query.Types[1];
478 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
479 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
480 return false;
481 if (PtrTy != p0)
482 return false;
483 return true;
484 };
485 getActionDefinitionsBuilder(G_INDEXED_LOAD)
488 .legalIf(IndexedLoadBasicPred)
489 .unsupported();
490 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
491 .unsupportedIf(
493 .legalIf(all(typeInSet(0, {s16, s32, s64}),
494 LegalityPredicate([=](const LegalityQuery &Q) {
495 LLT LdTy = Q.Types[0];
496 LLT PtrTy = Q.Types[1];
497 LLT MemTy = Q.MMODescrs[0].MemoryTy;
498 if (PtrTy != p0)
499 return false;
500 if (LdTy == s16)
501 return MemTy == s8;
502 if (LdTy == s32)
503 return MemTy == s8 || MemTy == s16;
504 if (LdTy == s64)
505 return MemTy == s8 || MemTy == s16 || MemTy == s32;
506 return false;
507 })))
508 .unsupported();
509
510 // Constants
512 .legalFor({p0, s8, s16, s32, s64})
513 .widenScalarToNextPow2(0)
514 .clampScalar(0, s8, s64);
515 getActionDefinitionsBuilder(G_FCONSTANT)
516 .legalIf([=](const LegalityQuery &Query) {
517 const auto &Ty = Query.Types[0];
518 if (HasFP16 && Ty == s16)
519 return true;
520 return Ty == s32 || Ty == s64 || Ty == s128;
521 })
522 .clampScalar(0, MinFPScalar, s128);
523
524 // FIXME: fix moreElementsToNextPow2
526 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
528 .clampScalar(1, s32, s64)
529 .clampScalar(0, s32, s32)
530 .minScalarEltSameAsIf(
531 [=](const LegalityQuery &Query) {
532 const LLT &Ty = Query.Types[0];
533 const LLT &SrcTy = Query.Types[1];
534 return Ty.isVector() && !SrcTy.isPointerVector() &&
535 Ty.getElementType() != SrcTy.getElementType();
536 },
537 0, 1)
538 .minScalarOrEltIf(
539 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
540 1, s32)
541 .minScalarOrEltIf(
542 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
543 s64)
545 .clampNumElements(1, v8s8, v16s8)
546 .clampNumElements(1, v4s16, v8s16)
547 .clampNumElements(1, v2s32, v4s32)
548 .clampNumElements(1, v2s64, v2s64)
549 .customIf(isVector(0));
550
552 .legalFor({{s32, MinFPScalar},
553 {s32, s32},
554 {s32, s64},
555 {v4s32, v4s32},
556 {v2s32, v2s32},
557 {v2s64, v2s64}})
558 .legalIf([=](const LegalityQuery &Query) {
559 const auto &Ty = Query.Types[1];
560 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
561 })
563 .clampScalar(0, s32, s32)
564 .clampScalarOrElt(1, MinFPScalar, s64)
565 .minScalarEltSameAsIf(
566 [=](const LegalityQuery &Query) {
567 const LLT &Ty = Query.Types[0];
568 const LLT &SrcTy = Query.Types[1];
569 return Ty.isVector() && !SrcTy.isPointerVector() &&
570 Ty.getElementType() != SrcTy.getElementType();
571 },
572 0, 1)
573 .clampNumElements(1, v4s16, v8s16)
574 .clampNumElements(1, v2s32, v4s32)
575 .clampMaxNumElements(1, s64, 2)
576 .moreElementsToNextPow2(1);
577
578 // Extensions
579 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
580 unsigned DstSize = Query.Types[0].getSizeInBits();
581
582 // Handle legal vectors using legalFor
583 if (Query.Types[0].isVector())
584 return false;
585
586 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
587 return false; // Extending to a scalar s128 needs narrowing.
588
589 const LLT &SrcTy = Query.Types[1];
590
591 // Make sure we fit in a register otherwise. Don't bother checking that
592 // the source type is below 128 bits. We shouldn't be allowing anything
593 // through which is wider than the destination in the first place.
594 unsigned SrcSize = SrcTy.getSizeInBits();
595 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
596 return false;
597
598 return true;
599 };
600 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
601 .legalIf(ExtLegalFunc)
602 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
603 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
605 .clampMaxNumElements(1, s8, 8)
606 .clampMaxNumElements(1, s16, 4)
607 .clampMaxNumElements(1, s32, 2)
608 // Tries to convert a large EXTEND into two smaller EXTENDs
609 .lowerIf([=](const LegalityQuery &Query) {
610 return (Query.Types[0].getScalarSizeInBits() >
611 Query.Types[1].getScalarSizeInBits() * 2) &&
612 Query.Types[0].isVector() &&
613 (Query.Types[1].getScalarSizeInBits() == 8 ||
614 Query.Types[1].getScalarSizeInBits() == 16);
615 })
616 .clampMinNumElements(1, s8, 8)
617 .clampMinNumElements(1, s16, 4);
618
620 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
622 .clampMaxNumElements(0, s8, 8)
623 .clampMaxNumElements(0, s16, 4)
624 .clampMaxNumElements(0, s32, 2)
625 .minScalarOrEltIf(
626 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
627 0, s8)
628 .lowerIf([=](const LegalityQuery &Query) {
629 LLT DstTy = Query.Types[0];
630 LLT SrcTy = Query.Types[1];
631 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
632 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
633 })
634 .clampMinNumElements(0, s8, 8)
635 .clampMinNumElements(0, s16, 4)
636 .alwaysLegal();
637
638 getActionDefinitionsBuilder(G_SEXT_INREG)
639 .legalFor({s32, s64})
640 .legalFor(PackedVectorAllTypeList)
641 .maxScalar(0, s64)
642 .clampNumElements(0, v8s8, v16s8)
643 .clampNumElements(0, v4s16, v8s16)
644 .clampNumElements(0, v2s32, v4s32)
645 .clampMaxNumElements(0, s64, 2)
646 .lower();
647
648 // FP conversions
650 .legalFor(
651 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
652 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
653 .clampNumElements(0, v4s16, v4s16)
654 .clampNumElements(0, v2s32, v2s32)
655 .scalarize(0);
656
658 .legalFor(
659 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
660 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
661 .clampNumElements(0, v4s32, v4s32)
662 .clampNumElements(0, v2s64, v2s64)
663 .scalarize(0);
664
665 // Conversions
666 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
667 .legalFor({{s32, s32},
668 {s64, s32},
669 {s32, s64},
670 {s64, s64},
671 {v2s64, v2s64},
672 {v4s32, v4s32},
673 {v2s32, v2s32}})
674 .legalIf([=](const LegalityQuery &Query) {
675 return HasFP16 &&
676 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
677 Query.Types[1] == v8s16) &&
678 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
679 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
680 })
681 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
682 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
683 // The range of a fp16 value fits into an i17, so we can lower the width
684 // to i64.
685 .narrowScalarIf(
686 [=](const LegalityQuery &Query) {
687 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
688 },
689 changeTo(0, s64))
691 .widenScalarOrEltToNextPow2OrMinSize(0)
692 .minScalar(0, s32)
693 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
694 .widenScalarIf(
695 [=](const LegalityQuery &Query) {
696 return Query.Types[0].getScalarSizeInBits() <= 64 &&
697 Query.Types[0].getScalarSizeInBits() >
698 Query.Types[1].getScalarSizeInBits();
699 },
701 .widenScalarIf(
702 [=](const LegalityQuery &Query) {
703 return Query.Types[1].getScalarSizeInBits() <= 64 &&
704 Query.Types[0].getScalarSizeInBits() <
705 Query.Types[1].getScalarSizeInBits();
706 },
708 .clampNumElements(0, v4s16, v8s16)
709 .clampNumElements(0, v2s32, v4s32)
710 .clampMaxNumElements(0, s64, 2)
711 .libcallFor(
712 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
713
714 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
715 .legalFor({{s32, s32},
716 {s64, s32},
717 {s32, s64},
718 {s64, s64},
719 {v2s64, v2s64},
720 {v4s32, v4s32},
721 {v2s32, v2s32}})
722 .legalIf([=](const LegalityQuery &Query) {
723 return HasFP16 &&
724 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
725 Query.Types[0] == v8s16) &&
726 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
727 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
728 })
729 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
730 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
732 .widenScalarOrEltToNextPow2OrMinSize(1)
733 .minScalar(1, s32)
734 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
735 .widenScalarIf(
736 [=](const LegalityQuery &Query) {
737 return Query.Types[1].getScalarSizeInBits() <= 64 &&
738 Query.Types[0].getScalarSizeInBits() <
739 Query.Types[1].getScalarSizeInBits();
740 },
742 .widenScalarIf(
743 [=](const LegalityQuery &Query) {
744 return Query.Types[0].getScalarSizeInBits() <= 64 &&
745 Query.Types[0].getScalarSizeInBits() >
746 Query.Types[1].getScalarSizeInBits();
747 },
749 .clampNumElements(0, v4s16, v8s16)
750 .clampNumElements(0, v2s32, v4s32)
751 .clampMaxNumElements(0, s64, 2)
752 .libcallFor({{s16, s128},
753 {s32, s128},
754 {s64, s128},
755 {s128, s128},
756 {s128, s32},
757 {s128, s64}});
758
759 // Control-flow
761 .legalFor({s32})
762 .clampScalar(0, s32, s32);
763 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
764
766 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
767 .widenScalarToNextPow2(0)
768 .clampScalar(0, s32, s64)
769 .clampScalar(1, s32, s32)
771 .lowerIf(isVector(0));
772
773 // Pointer-handling
774 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
775
776 if (TM.getCodeModel() == CodeModel::Small)
777 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
778 else
779 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
780
781 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
782 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
783
785 .legalFor({{s64, p0}, {v2s64, v2p0}})
786 .widenScalarToNextPow2(0, 64)
787 .clampScalar(0, s64, s64);
788
790 .unsupportedIf([&](const LegalityQuery &Query) {
791 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
792 })
793 .legalFor({{p0, s64}, {v2p0, v2s64}});
794
795 // Casts for 32 and 64-bit width type are just copies.
796 // Same for 128-bit width type, except they are on the FPR bank.
798 // Keeping 32-bit instructions legal to prevent regression in some tests
799 .legalForCartesianProduct({s32, v2s16, v4s8})
800 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
801 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
802 .lowerIf([=](const LegalityQuery &Query) {
803 return Query.Types[0].isVector() != Query.Types[1].isVector();
804 })
806 .clampNumElements(0, v8s8, v16s8)
807 .clampNumElements(0, v4s16, v8s16)
808 .clampNumElements(0, v2s32, v4s32)
809 .lower();
810
811 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
812
813 // va_list must be a pointer, but most sized types are pretty easy to handle
814 // as the destination.
816 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
817 .clampScalar(0, s8, s64)
818 .widenScalarToNextPow2(0, /*Min*/ 8);
819
820 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
821 .lowerIf(
822 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
823
824 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
825 return ST.outlineAtomics() && !ST.hasLSE();
826 };
827
828 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
829 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
830 predNot(UseOutlineAtomics)))
831 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
832 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
833 return Query.Types[0].getSizeInBits() == 128 &&
834 !UseOutlineAtomics(Query);
835 })
836 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
837 UseOutlineAtomics))
838 .clampScalar(0, s32, s64);
839
840 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
841 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
842 G_ATOMICRMW_XOR})
843 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
844 predNot(UseOutlineAtomics)))
845 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
846 UseOutlineAtomics))
847 .clampScalar(0, s32, s64);
848
849 // Do not outline these atomics operations, as per comment in
850 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
852 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
853 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
854 .clampScalar(0, s32, s64);
855
856 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
857
858 // Merge/Unmerge
859 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
860 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
861 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
863 .widenScalarToNextPow2(LitTyIdx, 8)
864 .widenScalarToNextPow2(BigTyIdx, 32)
865 .clampScalar(LitTyIdx, s8, s64)
866 .clampScalar(BigTyIdx, s32, s128)
867 .legalIf([=](const LegalityQuery &Q) {
868 switch (Q.Types[BigTyIdx].getSizeInBits()) {
869 case 32:
870 case 64:
871 case 128:
872 break;
873 default:
874 return false;
875 }
876 switch (Q.Types[LitTyIdx].getSizeInBits()) {
877 case 8:
878 case 16:
879 case 32:
880 case 64:
881 return true;
882 default:
883 return false;
884 }
885 });
886 }
887
888 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
889 .unsupportedIf([=](const LegalityQuery &Query) {
890 const LLT &EltTy = Query.Types[1].getElementType();
891 return Query.Types[0] != EltTy;
892 })
893 .minScalar(2, s64)
894 .customIf([=](const LegalityQuery &Query) {
895 const LLT &VecTy = Query.Types[1];
896 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
897 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
898 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
899 })
900 .minScalarOrEltIf(
901 [=](const LegalityQuery &Query) {
902 // We want to promote to <M x s1> to <M x s64> if that wouldn't
903 // cause the total vec size to be > 128b.
904 return Query.Types[1].getNumElements() <= 2;
905 },
906 0, s64)
907 .minScalarOrEltIf(
908 [=](const LegalityQuery &Query) {
909 return Query.Types[1].getNumElements() <= 4;
910 },
911 0, s32)
912 .minScalarOrEltIf(
913 [=](const LegalityQuery &Query) {
914 return Query.Types[1].getNumElements() <= 8;
915 },
916 0, s16)
917 .minScalarOrEltIf(
918 [=](const LegalityQuery &Query) {
919 return Query.Types[1].getNumElements() <= 16;
920 },
921 0, s8)
922 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
924 .clampMaxNumElements(1, s64, 2)
925 .clampMaxNumElements(1, s32, 4)
926 .clampMaxNumElements(1, s16, 8)
927 .clampMaxNumElements(1, s8, 16)
928 .clampMaxNumElements(1, p0, 2);
929
930 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
931 .legalIf(
932 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
935 .clampNumElements(0, v8s8, v16s8)
936 .clampNumElements(0, v4s16, v8s16)
937 .clampNumElements(0, v2s32, v4s32)
938 .clampMaxNumElements(0, s64, 2)
939 .clampMaxNumElements(0, p0, 2);
940
941 getActionDefinitionsBuilder(G_BUILD_VECTOR)
942 .legalFor({{v8s8, s8},
943 {v16s8, s8},
944 {v4s16, s16},
945 {v8s16, s16},
946 {v2s32, s32},
947 {v4s32, s32},
948 {v2p0, p0},
949 {v2s64, s64}})
950 .clampNumElements(0, v4s32, v4s32)
951 .clampNumElements(0, v2s64, v2s64)
952 .minScalarOrElt(0, s8)
954 .minScalarSameAs(1, 0);
955
956 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
957
960 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
961 .scalarize(1)
962 .widenScalarToNextPow2(1, /*Min=*/32)
963 .clampScalar(1, s32, s64)
964 .scalarSameSizeAs(0, 1);
965 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
966
967 // TODO: Custom lowering for v2s32, v4s32, v2s64.
968 getActionDefinitionsBuilder(G_BITREVERSE)
969 .legalFor({s32, s64, v8s8, v16s8})
970 .widenScalarToNextPow2(0, /*Min = */ 32)
971 .clampScalar(0, s32, s64);
972
973 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
974
976 .lowerIf(isVector(0))
977 .widenScalarToNextPow2(1, /*Min=*/32)
978 .clampScalar(1, s32, s64)
979 .scalarSameSizeAs(0, 1)
980 .legalIf([=](const LegalityQuery &Query) {
981 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
982 })
983 .customIf([=](const LegalityQuery &Query) {
984 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
985 });
986
987 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
988 .legalIf([=](const LegalityQuery &Query) {
989 const LLT &DstTy = Query.Types[0];
990 const LLT &SrcTy = Query.Types[1];
991 // For now just support the TBL2 variant which needs the source vectors
992 // to be the same size as the dest.
993 if (DstTy != SrcTy)
994 return false;
995 return llvm::is_contained(
996 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
997 })
998 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
999 // just want those lowered into G_BUILD_VECTOR
1000 .lowerIf([=](const LegalityQuery &Query) {
1001 return !Query.Types[1].isVector();
1002 })
1003 .moreElementsIf(
1004 [](const LegalityQuery &Query) {
1005 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1006 Query.Types[0].getNumElements() >
1007 Query.Types[1].getNumElements();
1008 },
1009 changeTo(1, 0))
1011 .moreElementsIf(
1012 [](const LegalityQuery &Query) {
1013 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1014 Query.Types[0].getNumElements() <
1015 Query.Types[1].getNumElements();
1016 },
1017 changeTo(0, 1))
1018 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1019 .clampNumElements(0, v8s8, v16s8)
1020 .clampNumElements(0, v4s16, v8s16)
1021 .clampNumElements(0, v4s32, v4s32)
1022 .clampNumElements(0, v2s64, v2s64);
1023
1024 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1025 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1026 .bitcastIf(
1027 [=](const LegalityQuery &Query) {
1028 return Query.Types[0].getSizeInBits() <= 128 &&
1029 Query.Types[1].getSizeInBits() <= 64;
1030 },
1031 [=](const LegalityQuery &Query) {
1032 const LLT DstTy = Query.Types[0];
1033 const LLT SrcTy = Query.Types[1];
1034 return std::pair(
1035 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1038 SrcTy.getNumElements())));
1039 });
1040
1041 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1042
1043 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1044
1045 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1046
1047 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1048
1049 if (ST.hasMOPS()) {
1050 // G_BZERO is not supported. Currently it is only emitted by
1051 // PreLegalizerCombiner for G_MEMSET with zero constant.
1053
1055 .legalForCartesianProduct({p0}, {s64}, {s64})
1056 .customForCartesianProduct({p0}, {s8}, {s64})
1057 .immIdx(0); // Inform verifier imm idx 0 is handled.
1058
1059 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1060 .legalForCartesianProduct({p0}, {p0}, {s64})
1061 .immIdx(0); // Inform verifier imm idx 0 is handled.
1062
1063 // G_MEMCPY_INLINE does not have a tailcall immediate
1064 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1065 .legalForCartesianProduct({p0}, {p0}, {s64});
1066
1067 } else {
1068 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1069 .libcall();
1070 }
1071
1072 // FIXME: Legal vector types are only legal with NEON.
1073 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
1074 if (HasCSSC)
1075 ABSActions
1076 .legalFor({s32, s64});
1077 ABSActions.legalFor(PackedVectorAllTypeList)
1078 .customIf([=](const LegalityQuery &Q) {
1079 // TODO: Fix suboptimal codegen for 128+ bit types.
1080 LLT SrcTy = Q.Types[0];
1081 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1082 })
1083 .widenScalarIf(
1084 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1085 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1086 .widenScalarIf(
1087 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1088 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1089 .clampNumElements(0, v8s8, v16s8)
1090 .clampNumElements(0, v4s16, v8s16)
1091 .clampNumElements(0, v2s32, v4s32)
1092 .clampNumElements(0, v2s64, v2s64)
1093 .moreElementsToNextPow2(0)
1094 .lower();
1095
1096 // For fadd reductions we have pairwise operations available. We treat the
1097 // usual legal types as legal and handle the lowering to pairwise instructions
1098 // later.
1099 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1100 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1101 .legalIf([=](const LegalityQuery &Query) {
1102 const auto &Ty = Query.Types[1];
1103 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1104 })
1105 .minScalarOrElt(0, MinFPScalar)
1106 .clampMaxNumElements(1, s64, 2)
1107 .clampMaxNumElements(1, s32, 4)
1108 .clampMaxNumElements(1, s16, 8)
1109 .lower();
1110
1111 // For fmul reductions we need to split up into individual operations. We
1112 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1113 // smaller types, followed by scalarizing what remains.
1114 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1115 .minScalarOrElt(0, MinFPScalar)
1116 .clampMaxNumElements(1, s64, 2)
1117 .clampMaxNumElements(1, s32, 4)
1118 .clampMaxNumElements(1, s16, 8)
1119 .clampMaxNumElements(1, s32, 2)
1120 .clampMaxNumElements(1, s16, 4)
1121 .scalarize(1)
1122 .lower();
1123
1124 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1125 .scalarize(2)
1126 .lower();
1127
1128 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1129 .legalFor({{s8, v16s8},
1130 {s8, v8s8},
1131 {s16, v8s16},
1132 {s16, v4s16},
1133 {s32, v4s32},
1134 {s32, v2s32},
1135 {s64, v2s64}})
1136 .clampMaxNumElements(1, s64, 2)
1137 .clampMaxNumElements(1, s32, 4)
1138 .clampMaxNumElements(1, s16, 8)
1139 .clampMaxNumElements(1, s8, 16)
1140 .lower();
1141
1142 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1143 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1144 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1145 .legalIf([=](const LegalityQuery &Query) {
1146 const auto &Ty = Query.Types[1];
1147 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1148 })
1149 .minScalarOrElt(0, MinFPScalar)
1150 .clampMaxNumElements(1, s64, 2)
1151 .clampMaxNumElements(1, s32, 4)
1152 .clampMaxNumElements(1, s16, 8)
1153 .lower();
1154
1155 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1156 .clampMaxNumElements(1, s32, 2)
1157 .clampMaxNumElements(1, s16, 4)
1158 .clampMaxNumElements(1, s8, 8)
1159 .scalarize(1)
1160 .lower();
1161
1163 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1164 .legalFor({{s8, v8s8},
1165 {s8, v16s8},
1166 {s16, v4s16},
1167 {s16, v8s16},
1168 {s32, v2s32},
1169 {s32, v4s32}})
1170 .moreElementsIf(
1171 [=](const LegalityQuery &Query) {
1172 return Query.Types[1].isVector() &&
1173 Query.Types[1].getElementType() != s8 &&
1174 Query.Types[1].getNumElements() & 1;
1175 },
1177 .clampMaxNumElements(1, s64, 2)
1178 .clampMaxNumElements(1, s32, 4)
1179 .clampMaxNumElements(1, s16, 8)
1180 .clampMaxNumElements(1, s8, 16)
1181 .scalarize(1)
1182 .lower();
1183
1185 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1186 // Try to break down into smaller vectors as long as they're at least 64
1187 // bits. This lets us use vector operations for some parts of the
1188 // reduction.
1189 .fewerElementsIf(
1190 [=](const LegalityQuery &Q) {
1191 LLT SrcTy = Q.Types[1];
1192 if (SrcTy.isScalar())
1193 return false;
1194 if (!isPowerOf2_32(SrcTy.getNumElements()))
1195 return false;
1196 // We can usually perform 64b vector operations.
1197 return SrcTy.getSizeInBits() > 64;
1198 },
1199 [=](const LegalityQuery &Q) {
1200 LLT SrcTy = Q.Types[1];
1201 return std::make_pair(1, SrcTy.divide(2));
1202 })
1203 .scalarize(1)
1204 .lower();
1205
1206 // TODO: Update this to correct handling when adding AArch64/SVE support.
1207 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1208
1209 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1210 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1211 .lower();
1212
1214 .legalFor({{s32, s64}, {s64, s64}})
1215 .customIf([=](const LegalityQuery &Q) {
1216 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1217 })
1218 .lower();
1220
1221 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1222 .customFor({{s32, s32}, {s64, s64}});
1223
1224 auto always = [=](const LegalityQuery &Q) { return true; };
1225 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1226 if (HasCSSC)
1227 CTPOPActions
1228 .legalFor({{s32, s32},
1229 {s64, s64},
1230 {v8s8, v8s8},
1231 {v16s8, v16s8}})
1232 .customFor({{s128, s128},
1233 {v2s64, v2s64},
1234 {v2s32, v2s32},
1235 {v4s32, v4s32},
1236 {v4s16, v4s16},
1237 {v8s16, v8s16}});
1238 else
1239 CTPOPActions
1240 .legalFor({{v8s8, v8s8},
1241 {v16s8, v16s8}})
1242 .customFor({{s32, s32},
1243 {s64, s64},
1244 {s128, s128},
1245 {v2s64, v2s64},
1246 {v2s32, v2s32},
1247 {v4s32, v4s32},
1248 {v4s16, v4s16},
1249 {v8s16, v8s16}});
1250 CTPOPActions
1251 .clampScalar(0, s32, s128)
1252 .widenScalarToNextPow2(0)
1253 .minScalarEltSameAsIf(always, 1, 0)
1254 .maxScalarEltSameAsIf(always, 1, 0);
1255
1256 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1257 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1258 .clampNumElements(0, v8s8, v16s8)
1259 .clampNumElements(0, v4s16, v8s16)
1260 .clampNumElements(0, v2s32, v4s32)
1261 .clampMaxNumElements(0, s64, 2)
1263 .lower();
1264
1265 // TODO: Libcall support for s128.
1266 // TODO: s16 should be legal with full FP16 support.
1267 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1268 .legalFor({{s64, s32}, {s64, s64}});
1269
1270 // TODO: Custom legalization for mismatched types.
1271 getActionDefinitionsBuilder(G_FCOPYSIGN)
1273 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1274 [=](const LegalityQuery &Query) {
1275 const LLT Ty = Query.Types[0];
1276 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1277 })
1278 .lower();
1279
1281
1282 // Access to floating-point environment.
1283 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1284 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1285 .libcall();
1286
1287 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1288
1289 getActionDefinitionsBuilder(G_PREFETCH).custom();
1290
1291 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1292
1294 verify(*ST.getInstrInfo());
1295}
1296
1299 LostDebugLocObserver &LocObserver) const {
1300 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1301 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1302 GISelChangeObserver &Observer = Helper.Observer;
1303 switch (MI.getOpcode()) {
1304 default:
1305 // No idea what to do.
1306 return false;
1307 case TargetOpcode::G_VAARG:
1308 return legalizeVaArg(MI, MRI, MIRBuilder);
1309 case TargetOpcode::G_LOAD:
1310 case TargetOpcode::G_STORE:
1311 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1312 case TargetOpcode::G_SHL:
1313 case TargetOpcode::G_ASHR:
1314 case TargetOpcode::G_LSHR:
1315 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1316 case TargetOpcode::G_GLOBAL_VALUE:
1317 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1318 case TargetOpcode::G_SBFX:
1319 case TargetOpcode::G_UBFX:
1320 return legalizeBitfieldExtract(MI, MRI, Helper);
1321 case TargetOpcode::G_FSHL:
1322 case TargetOpcode::G_FSHR:
1323 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1324 case TargetOpcode::G_ROTR:
1325 return legalizeRotate(MI, MRI, Helper);
1326 case TargetOpcode::G_CTPOP:
1327 return legalizeCTPOP(MI, MRI, Helper);
1328 case TargetOpcode::G_ATOMIC_CMPXCHG:
1329 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1330 case TargetOpcode::G_CTTZ:
1331 return legalizeCTTZ(MI, Helper);
1332 case TargetOpcode::G_BZERO:
1333 case TargetOpcode::G_MEMCPY:
1334 case TargetOpcode::G_MEMMOVE:
1335 case TargetOpcode::G_MEMSET:
1336 return legalizeMemOps(MI, Helper);
1337 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1338 return legalizeExtractVectorElt(MI, MRI, Helper);
1339 case TargetOpcode::G_DYN_STACKALLOC:
1340 return legalizeDynStackAlloc(MI, Helper);
1341 case TargetOpcode::G_PREFETCH:
1342 return legalizePrefetch(MI, Helper);
1343 case TargetOpcode::G_ABS:
1344 return Helper.lowerAbsToCNeg(MI);
1345 case TargetOpcode::G_ICMP:
1346 return legalizeICMP(MI, MRI, MIRBuilder);
1347 }
1348
1349 llvm_unreachable("expected switch to return");
1350}
1351
1352bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1354 MachineIRBuilder &MIRBuilder,
1355 GISelChangeObserver &Observer,
1356 LegalizerHelper &Helper) const {
1357 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1358 MI.getOpcode() == TargetOpcode::G_FSHR);
1359
1360 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1361 // lowering
1362 Register ShiftNo = MI.getOperand(3).getReg();
1363 LLT ShiftTy = MRI.getType(ShiftNo);
1364 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1365
1366 // Adjust shift amount according to Opcode (FSHL/FSHR)
1367 // Convert FSHL to FSHR
1368 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1369 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1370
1371 // Lower non-constant shifts and leave zero shifts to the optimizer.
1372 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1373 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1375
1376 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1377
1378 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1379
1380 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1381 // in the range of 0 <-> BitWidth, it is legal
1382 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1383 VRegAndVal->Value.ult(BitWidth))
1384 return true;
1385
1386 // Cast the ShiftNumber to a 64-bit type
1387 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1388
1389 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1390 Observer.changingInstr(MI);
1391 MI.getOperand(3).setReg(Cast64.getReg(0));
1392 Observer.changedInstr(MI);
1393 }
1394 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1395 // instruction
1396 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1397 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1398 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1399 Cast64.getReg(0)});
1400 MI.eraseFromParent();
1401 }
1402 return true;
1403}
1404
1405bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1407 MachineIRBuilder &MIRBuilder) const {
1408 Register DstReg = MI.getOperand(0).getReg();
1409 Register SrcReg1 = MI.getOperand(2).getReg();
1410 Register SrcReg2 = MI.getOperand(3).getReg();
1411 LLT DstTy = MRI.getType(DstReg);
1412 LLT SrcTy = MRI.getType(SrcReg1);
1413
1414 // Check the vector types are legal
1415 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1416 DstTy.getNumElements() != SrcTy.getNumElements() ||
1417 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1418 return false;
1419
1420 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1421 // following passes
1422 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1423 if (Pred != CmpInst::ICMP_NE)
1424 return true;
1425 Register CmpReg =
1426 MIRBuilder
1427 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1428 .getReg(0);
1429 MIRBuilder.buildNot(DstReg, CmpReg);
1430
1431 MI.eraseFromParent();
1432 return true;
1433}
1434
1435bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1437 LegalizerHelper &Helper) const {
1438 // To allow for imported patterns to match, we ensure that the rotate amount
1439 // is 64b with an extension.
1440 Register AmtReg = MI.getOperand(2).getReg();
1441 LLT AmtTy = MRI.getType(AmtReg);
1442 (void)AmtTy;
1443 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1444 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1445 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1446 Helper.Observer.changingInstr(MI);
1447 MI.getOperand(2).setReg(NewAmt.getReg(0));
1448 Helper.Observer.changedInstr(MI);
1449 return true;
1450}
1451
1452bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1454 GISelChangeObserver &Observer) const {
1455 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1456 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1457 // G_ADD_LOW instructions.
1458 // By splitting this here, we can optimize accesses in the small code model by
1459 // folding in the G_ADD_LOW into the load/store offset.
1460 auto &GlobalOp = MI.getOperand(1);
1461 // Don't modify an intrinsic call.
1462 if (GlobalOp.isSymbol())
1463 return true;
1464 const auto* GV = GlobalOp.getGlobal();
1465 if (GV->isThreadLocal())
1466 return true; // Don't want to modify TLS vars.
1467
1468 auto &TM = ST->getTargetLowering()->getTargetMachine();
1469 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1470
1471 if (OpFlags & AArch64II::MO_GOT)
1472 return true;
1473
1474 auto Offset = GlobalOp.getOffset();
1475 Register DstReg = MI.getOperand(0).getReg();
1476 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1477 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1478 // Set the regclass on the dest reg too.
1479 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1480
1481 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1482 // by creating a MOVK that sets bits 48-63 of the register to (global address
1483 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1484 // prevent an incorrect tag being generated during relocation when the
1485 // global appears before the code section. Without the offset, a global at
1486 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1487 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1488 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1489 // instead of `0xf`.
1490 // This assumes that we're in the small code model so we can assume a binary
1491 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1492 // binary must also be loaded into address range [0, 2^48). Both of these
1493 // properties need to be ensured at runtime when using tagged addresses.
1494 if (OpFlags & AArch64II::MO_TAGGED) {
1495 assert(!Offset &&
1496 "Should not have folded in an offset for a tagged global!");
1497 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1498 .addGlobalAddress(GV, 0x100000000,
1500 .addImm(48);
1501 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1502 }
1503
1504 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1505 .addGlobalAddress(GV, Offset,
1507 MI.eraseFromParent();
1508 return true;
1509}
1510
1512 MachineInstr &MI) const {
1513 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1514 switch (IntrinsicID) {
1515 case Intrinsic::vacopy: {
1516 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1517 unsigned VaListSize =
1518 (ST->isTargetDarwin() || ST->isTargetWindows())
1519 ? PtrSize
1520 : ST->isTargetILP32() ? 20 : 32;
1521
1522 MachineFunction &MF = *MI.getMF();
1524 LLT::scalar(VaListSize * 8));
1525 MachineIRBuilder MIB(MI);
1526 MIB.buildLoad(Val, MI.getOperand(2),
1529 VaListSize, Align(PtrSize)));
1530 MIB.buildStore(Val, MI.getOperand(1),
1533 VaListSize, Align(PtrSize)));
1534 MI.eraseFromParent();
1535 return true;
1536 }
1537 case Intrinsic::get_dynamic_area_offset: {
1538 MachineIRBuilder &MIB = Helper.MIRBuilder;
1539 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1540 MI.eraseFromParent();
1541 return true;
1542 }
1543 case Intrinsic::aarch64_mops_memset_tag: {
1544 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1545 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1546 // the instruction).
1547 MachineIRBuilder MIB(MI);
1548 auto &Value = MI.getOperand(3);
1549 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1550 Value.setReg(ExtValueReg);
1551 return true;
1552 }
1553 case Intrinsic::aarch64_prefetch: {
1554 MachineIRBuilder MIB(MI);
1555 auto &AddrVal = MI.getOperand(1);
1556
1557 int64_t IsWrite = MI.getOperand(2).getImm();
1558 int64_t Target = MI.getOperand(3).getImm();
1559 int64_t IsStream = MI.getOperand(4).getImm();
1560 int64_t IsData = MI.getOperand(5).getImm();
1561
1562 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1563 (!IsData << 3) | // IsDataCache bit
1564 (Target << 1) | // Cache level bits
1565 (unsigned)IsStream; // Stream bit
1566
1567 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1568 MI.eraseFromParent();
1569 return true;
1570 }
1571 case Intrinsic::aarch64_neon_uaddv:
1572 case Intrinsic::aarch64_neon_saddv:
1573 case Intrinsic::aarch64_neon_umaxv:
1574 case Intrinsic::aarch64_neon_smaxv:
1575 case Intrinsic::aarch64_neon_uminv:
1576 case Intrinsic::aarch64_neon_sminv: {
1577 MachineIRBuilder MIB(MI);
1578 MachineRegisterInfo &MRI = *MIB.getMRI();
1579 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1580 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1581 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1582
1583 auto OldDst = MI.getOperand(0).getReg();
1584 auto OldDstTy = MRI.getType(OldDst);
1585 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1586 if (OldDstTy == NewDstTy)
1587 return true;
1588
1589 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1590
1591 Helper.Observer.changingInstr(MI);
1592 MI.getOperand(0).setReg(NewDst);
1593 Helper.Observer.changedInstr(MI);
1594
1595 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1596 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1597 OldDst, NewDst);
1598
1599 return true;
1600 }
1601 case Intrinsic::aarch64_neon_uaddlp:
1602 case Intrinsic::aarch64_neon_saddlp: {
1603 MachineIRBuilder MIB(MI);
1604
1605 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1606 ? AArch64::G_UADDLP
1607 : AArch64::G_SADDLP;
1608 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1609 MI.eraseFromParent();
1610
1611 return true;
1612 }
1613 case Intrinsic::aarch64_neon_uaddlv:
1614 case Intrinsic::aarch64_neon_saddlv: {
1615 MachineIRBuilder MIB(MI);
1616 MachineRegisterInfo &MRI = *MIB.getMRI();
1617
1618 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1619 ? AArch64::G_UADDLV
1620 : AArch64::G_SADDLV;
1621 Register DstReg = MI.getOperand(0).getReg();
1622 Register SrcReg = MI.getOperand(2).getReg();
1623 LLT DstTy = MRI.getType(DstReg);
1624
1625 LLT MidTy, ExtTy;
1626 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1627 MidTy = LLT::fixed_vector(4, 32);
1628 ExtTy = LLT::scalar(32);
1629 } else {
1630 MidTy = LLT::fixed_vector(2, 64);
1631 ExtTy = LLT::scalar(64);
1632 }
1633
1634 Register MidReg =
1635 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1636 Register ZeroReg =
1637 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1638 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1639 {MidReg, ZeroReg})
1640 .getReg(0);
1641
1642 if (DstTy.getScalarSizeInBits() < 32)
1643 MIB.buildTrunc(DstReg, ExtReg);
1644 else
1645 MIB.buildCopy(DstReg, ExtReg);
1646
1647 MI.eraseFromParent();
1648
1649 return true;
1650 }
1651 case Intrinsic::aarch64_neon_smax:
1652 case Intrinsic::aarch64_neon_smin:
1653 case Intrinsic::aarch64_neon_umax:
1654 case Intrinsic::aarch64_neon_umin:
1655 case Intrinsic::aarch64_neon_fmax:
1656 case Intrinsic::aarch64_neon_fmin:
1657 case Intrinsic::aarch64_neon_fmaxnm:
1658 case Intrinsic::aarch64_neon_fminnm: {
1659 MachineIRBuilder MIB(MI);
1660 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1661 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1662 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1663 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1664 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1665 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1666 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1667 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1668 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1669 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1670 {MI.getOperand(2), MI.getOperand(3)});
1671 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1672 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1673 {MI.getOperand(2), MI.getOperand(3)});
1674 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1675 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1676 {MI.getOperand(2), MI.getOperand(3)});
1677 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1678 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1679 {MI.getOperand(2), MI.getOperand(3)});
1680 MI.eraseFromParent();
1681 return true;
1682 }
1683 case Intrinsic::vector_reverse:
1684 // TODO: Add support for vector_reverse
1685 return false;
1686 }
1687
1688 return true;
1689}
1690
1691bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1693 GISelChangeObserver &Observer) const {
1694 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1695 MI.getOpcode() == TargetOpcode::G_LSHR ||
1696 MI.getOpcode() == TargetOpcode::G_SHL);
1697 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1698 // imported patterns can select it later. Either way, it will be legal.
1699 Register AmtReg = MI.getOperand(2).getReg();
1700 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1701 if (!VRegAndVal)
1702 return true;
1703 // Check the shift amount is in range for an immediate form.
1704 int64_t Amount = VRegAndVal->Value.getSExtValue();
1705 if (Amount > 31)
1706 return true; // This will have to remain a register variant.
1707 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1708 Observer.changingInstr(MI);
1709 MI.getOperand(2).setReg(ExtCst.getReg(0));
1710 Observer.changedInstr(MI);
1711 return true;
1712}
1713
1716 Base = Root;
1717 Offset = 0;
1718
1719 Register NewBase;
1720 int64_t NewOffset;
1721 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1722 isShiftedInt<7, 3>(NewOffset)) {
1723 Base = NewBase;
1724 Offset = NewOffset;
1725 }
1726}
1727
1728// FIXME: This should be removed and replaced with the generic bitcast legalize
1729// action.
1730bool AArch64LegalizerInfo::legalizeLoadStore(
1732 GISelChangeObserver &Observer) const {
1733 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1734 MI.getOpcode() == TargetOpcode::G_LOAD);
1735 // Here we just try to handle vector loads/stores where our value type might
1736 // have pointer elements, which the SelectionDAG importer can't handle. To
1737 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1738 // the value to use s64 types.
1739
1740 // Custom legalization requires the instruction, if not deleted, must be fully
1741 // legalized. In order to allow further legalization of the inst, we create
1742 // a new instruction and erase the existing one.
1743
1744 Register ValReg = MI.getOperand(0).getReg();
1745 const LLT ValTy = MRI.getType(ValReg);
1746
1747 if (ValTy == LLT::scalar(128)) {
1748
1749 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1750 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1751 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1752 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1753 bool IsRcpC3 =
1754 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1755
1756 LLT s64 = LLT::scalar(64);
1757
1758 unsigned Opcode;
1759 if (IsRcpC3) {
1760 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1761 } else {
1762 // For LSE2, loads/stores should have been converted to monotonic and had
1763 // a fence inserted after them.
1764 assert(Ordering == AtomicOrdering::Monotonic ||
1765 Ordering == AtomicOrdering::Unordered);
1766 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1767
1768 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1769 }
1770
1772 if (IsLoad) {
1773 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1774 MIRBuilder.buildMergeLikeInstr(
1775 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1776 } else {
1777 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1778 NewI = MIRBuilder.buildInstr(
1779 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1780 }
1781
1782 if (IsRcpC3) {
1783 NewI.addUse(MI.getOperand(1).getReg());
1784 } else {
1785 Register Base;
1786 int Offset;
1787 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1788 NewI.addUse(Base);
1789 NewI.addImm(Offset / 8);
1790 }
1791
1792 NewI.cloneMemRefs(MI);
1794 *MRI.getTargetRegisterInfo(),
1795 *ST->getRegBankInfo());
1796 MI.eraseFromParent();
1797 return true;
1798 }
1799
1800 if (!ValTy.isPointerVector() ||
1801 ValTy.getElementType().getAddressSpace() != 0) {
1802 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1803 return false;
1804 }
1805
1806 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1807 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1808 auto &MMO = **MI.memoperands_begin();
1809 MMO.setType(NewTy);
1810
1811 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1812 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1813 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1814 } else {
1815 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1816 MIRBuilder.buildBitcast(ValReg, NewLoad);
1817 }
1818 MI.eraseFromParent();
1819 return true;
1820}
1821
1822bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1824 MachineIRBuilder &MIRBuilder) const {
1825 MachineFunction &MF = MIRBuilder.getMF();
1826 Align Alignment(MI.getOperand(2).getImm());
1827 Register Dst = MI.getOperand(0).getReg();
1828 Register ListPtr = MI.getOperand(1).getReg();
1829
1830 LLT PtrTy = MRI.getType(ListPtr);
1831 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1832
1833 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1834 const Align PtrAlign = Align(PtrSize);
1835 auto List = MIRBuilder.buildLoad(
1836 PtrTy, ListPtr,
1838 PtrTy, PtrAlign));
1839
1840 MachineInstrBuilder DstPtr;
1841 if (Alignment > PtrAlign) {
1842 // Realign the list to the actual required alignment.
1843 auto AlignMinus1 =
1844 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1845 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1846 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1847 } else
1848 DstPtr = List;
1849
1850 LLT ValTy = MRI.getType(Dst);
1851 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1852 MIRBuilder.buildLoad(
1853 Dst, DstPtr,
1855 ValTy, std::max(Alignment, PtrAlign)));
1856
1857 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1858
1859 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1860
1861 MIRBuilder.buildStore(NewList, ListPtr,
1864 PtrTy, PtrAlign));
1865
1866 MI.eraseFromParent();
1867 return true;
1868}
1869
1870bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1872 // Only legal if we can select immediate forms.
1873 // TODO: Lower this otherwise.
1874 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1875 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1876}
1877
1878bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1880 LegalizerHelper &Helper) const {
1881 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1882 // it can be more efficiently lowered to the following sequence that uses
1883 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1884 // registers are cheap.
1885 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1886 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1887 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1888 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1889 //
1890 // For 128 bit vector popcounts, we lower to the following sequence:
1891 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1892 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1893 // uaddlp.4s v0, v0 // v4s32, v2s64
1894 // uaddlp.2d v0, v0 // v2s64
1895 //
1896 // For 64 bit vector popcounts, we lower to the following sequence:
1897 // cnt.8b v0, v0 // v4s16, v2s32
1898 // uaddlp.4h v0, v0 // v4s16, v2s32
1899 // uaddlp.2s v0, v0 // v2s32
1900
1901 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1902 Register Dst = MI.getOperand(0).getReg();
1903 Register Val = MI.getOperand(1).getReg();
1904 LLT Ty = MRI.getType(Val);
1905 unsigned Size = Ty.getSizeInBits();
1906
1907 assert(Ty == MRI.getType(Dst) &&
1908 "Expected src and dst to have the same type!");
1909
1910 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1911 LLT s64 = LLT::scalar(64);
1912
1913 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1914 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1915 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1916 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1917
1918 MIRBuilder.buildZExt(Dst, Add);
1919 MI.eraseFromParent();
1920 return true;
1921 }
1922
1923 if (!ST->hasNEON() ||
1924 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1925 // Use generic lowering when custom lowering is not possible.
1926 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1927 Helper.lowerBitCount(MI) ==
1929 }
1930
1931 // Pre-conditioning: widen Val up to the nearest vector type.
1932 // s32,s64,v4s16,v2s32 -> v8i8
1933 // v8s16,v4s32,v2s64 -> v16i8
1934 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1935 if (Ty.isScalar()) {
1936 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1937 if (Size == 32) {
1938 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1939 }
1940 }
1941 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1942
1943 // Count bits in each byte-sized lane.
1944 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1945
1946 // Sum across lanes.
1947
1948 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
1949 Ty.getScalarSizeInBits() != 16) {
1950 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
1951 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
1952 auto Ones = MIRBuilder.buildConstant(VTy, 1);
1954
1955 if (Ty == LLT::fixed_vector(2, 64)) {
1956 auto UDOT =
1957 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1958 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
1959 } else if (Ty == LLT::fixed_vector(4, 32)) {
1960 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1961 } else if (Ty == LLT::fixed_vector(2, 32)) {
1962 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1963 } else {
1964 llvm_unreachable("unexpected vector shape");
1965 }
1966
1967 Sum->getOperand(0).setReg(Dst);
1968 MI.eraseFromParent();
1969 return true;
1970 }
1971
1972 Register HSum = CTPOP.getReg(0);
1973 unsigned Opc;
1974 SmallVector<LLT> HAddTys;
1975 if (Ty.isScalar()) {
1976 Opc = Intrinsic::aarch64_neon_uaddlv;
1977 HAddTys.push_back(LLT::scalar(32));
1978 } else if (Ty == LLT::fixed_vector(8, 16)) {
1979 Opc = Intrinsic::aarch64_neon_uaddlp;
1980 HAddTys.push_back(LLT::fixed_vector(8, 16));
1981 } else if (Ty == LLT::fixed_vector(4, 32)) {
1982 Opc = Intrinsic::aarch64_neon_uaddlp;
1983 HAddTys.push_back(LLT::fixed_vector(8, 16));
1984 HAddTys.push_back(LLT::fixed_vector(4, 32));
1985 } else if (Ty == LLT::fixed_vector(2, 64)) {
1986 Opc = Intrinsic::aarch64_neon_uaddlp;
1987 HAddTys.push_back(LLT::fixed_vector(8, 16));
1988 HAddTys.push_back(LLT::fixed_vector(4, 32));
1989 HAddTys.push_back(LLT::fixed_vector(2, 64));
1990 } else if (Ty == LLT::fixed_vector(4, 16)) {
1991 Opc = Intrinsic::aarch64_neon_uaddlp;
1992 HAddTys.push_back(LLT::fixed_vector(4, 16));
1993 } else if (Ty == LLT::fixed_vector(2, 32)) {
1994 Opc = Intrinsic::aarch64_neon_uaddlp;
1995 HAddTys.push_back(LLT::fixed_vector(4, 16));
1996 HAddTys.push_back(LLT::fixed_vector(2, 32));
1997 } else
1998 llvm_unreachable("unexpected vector shape");
2000 for (LLT HTy : HAddTys) {
2001 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2002 HSum = UADD.getReg(0);
2003 }
2004
2005 // Post-conditioning.
2006 if (Ty.isScalar() && (Size == 64 || Size == 128))
2007 MIRBuilder.buildZExt(Dst, UADD);
2008 else
2009 UADD->getOperand(0).setReg(Dst);
2010 MI.eraseFromParent();
2011 return true;
2012}
2013
2014bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2016 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2017 LLT s64 = LLT::scalar(64);
2018 auto Addr = MI.getOperand(1).getReg();
2019 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2020 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2021 auto DstLo = MRI.createGenericVirtualRegister(s64);
2022 auto DstHi = MRI.createGenericVirtualRegister(s64);
2023
2025 if (ST->hasLSE()) {
2026 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2027 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2028 // the rest of the MIR so we must reassemble the extracted registers into a
2029 // 128-bit known-regclass one with code like this:
2030 //
2031 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2032 // %out = CASP %in1, ...
2033 // %OldLo = G_EXTRACT %out, 0
2034 // %OldHi = G_EXTRACT %out, 64
2035 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2036 unsigned Opcode;
2037 switch (Ordering) {
2039 Opcode = AArch64::CASPAX;
2040 break;
2042 Opcode = AArch64::CASPLX;
2043 break;
2046 Opcode = AArch64::CASPALX;
2047 break;
2048 default:
2049 Opcode = AArch64::CASPX;
2050 break;
2051 }
2052
2053 LLT s128 = LLT::scalar(128);
2054 auto CASDst = MRI.createGenericVirtualRegister(s128);
2055 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2056 auto CASNew = MRI.createGenericVirtualRegister(s128);
2057 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2058 .addUse(DesiredI->getOperand(0).getReg())
2059 .addImm(AArch64::sube64)
2060 .addUse(DesiredI->getOperand(1).getReg())
2061 .addImm(AArch64::subo64);
2062 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2063 .addUse(NewI->getOperand(0).getReg())
2064 .addImm(AArch64::sube64)
2065 .addUse(NewI->getOperand(1).getReg())
2066 .addImm(AArch64::subo64);
2067
2068 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2069
2070 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2071 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2072 } else {
2073 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2074 // can take arbitrary registers so it just has the normal GPR64 operands the
2075 // rest of AArch64 is expecting.
2076 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2077 unsigned Opcode;
2078 switch (Ordering) {
2080 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2081 break;
2083 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2084 break;
2087 Opcode = AArch64::CMP_SWAP_128;
2088 break;
2089 default:
2090 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2091 break;
2092 }
2093
2094 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2095 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2096 {Addr, DesiredI->getOperand(0),
2097 DesiredI->getOperand(1), NewI->getOperand(0),
2098 NewI->getOperand(1)});
2099 }
2100
2101 CAS.cloneMemRefs(MI);
2103 *MRI.getTargetRegisterInfo(),
2104 *ST->getRegBankInfo());
2105
2106 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2107 MI.eraseFromParent();
2108 return true;
2109}
2110
2111bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2112 LegalizerHelper &Helper) const {
2113 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2114 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2115 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2116 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2117 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2118 MI.eraseFromParent();
2119 return true;
2120}
2121
2122bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2123 LegalizerHelper &Helper) const {
2124 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2125
2126 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2127 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2128 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2129 // the instruction).
2130 auto &Value = MI.getOperand(1);
2131 Register ExtValueReg =
2132 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2133 Value.setReg(ExtValueReg);
2134 return true;
2135 }
2136
2137 return false;
2138}
2139
2140bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2142 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2143 auto VRegAndVal =
2144 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2145 if (VRegAndVal)
2146 return true;
2147 return Helper.lowerExtractInsertVectorElt(MI) !=
2149}
2150
2151bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2152 MachineInstr &MI, LegalizerHelper &Helper) const {
2153 MachineFunction &MF = *MI.getParent()->getParent();
2154 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2155 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2156
2157 // If stack probing is not enabled for this function, use the default
2158 // lowering.
2159 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2160 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2161 "inline-asm") {
2162 Helper.lowerDynStackAlloc(MI);
2163 return true;
2164 }
2165
2166 Register Dst = MI.getOperand(0).getReg();
2167 Register AllocSize = MI.getOperand(1).getReg();
2168 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2169
2170 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2171 "Unexpected type for dynamic alloca");
2172 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2173 "Unexpected type for dynamic alloca");
2174
2175 LLT PtrTy = MRI.getType(Dst);
2176 Register SPReg =
2178 Register SPTmp =
2179 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2180 auto NewMI =
2181 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2182 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2183 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2184 MIRBuilder.buildCopy(Dst, SPTmp);
2185
2186 MI.eraseFromParent();
2187 return true;
2188}
2189
2190bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2191 LegalizerHelper &Helper) const {
2192 MachineIRBuilder &MIB = Helper.MIRBuilder;
2193 auto &AddrVal = MI.getOperand(0);
2194
2195 int64_t IsWrite = MI.getOperand(1).getImm();
2196 int64_t Locality = MI.getOperand(2).getImm();
2197 int64_t IsData = MI.getOperand(3).getImm();
2198
2199 bool IsStream = Locality == 0;
2200 if (Locality != 0) {
2201 assert(Locality <= 3 && "Prefetch locality out-of-range");
2202 // The locality degree is the opposite of the cache speed.
2203 // Put the number the other way around.
2204 // The encoding starts at 0 for level 1
2205 Locality = 3 - Locality;
2206 }
2207
2208 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2209
2210 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2211 MI.eraseFromParent();
2212 return true;
2213}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...