LLVM 19.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
65 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
66 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
67 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
68
69 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
70 v16s8, v8s16, v4s32,
71 v2s64, v2p0,
72 /* End 128bit types */
73 /* Begin 64bit types */
74 v8s8, v4s16, v2s32};
75 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
76 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
77 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
78
79 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
80
81 // FIXME: support subtargets which have neon/fp-armv8 disabled.
82 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
84 return;
85 }
86
87 // Some instructions only support s16 if the subtarget has full 16-bit FP
88 // support.
89 const bool HasFP16 = ST.hasFullFP16();
90 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
91
92 const bool HasCSSC = ST.hasCSSC();
93 const bool HasRCPC3 = ST.hasRCPC3();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor(PackedVectorAllTypeList)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampNumElements(0, v2s64, v2s64);
107
109 .legalFor({p0, s16, s32, s64})
110 .legalFor(PackedVectorAllTypeList)
112 .clampScalar(0, s16, s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(0, s8, 16)
115 .clampMaxNumElements(0, s16, 8)
116 .clampMaxNumElements(0, s32, 4)
117 .clampMaxNumElements(0, s64, 2)
118 .clampMaxNumElements(0, p0, 2);
119
121 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
123 .clampScalar(0, s32, s64)
124 .clampNumElements(0, v4s16, v8s16)
125 .clampNumElements(0, v2s32, v4s32)
126 .clampNumElements(0, v2s64, v2s64)
127 .moreElementsToNextPow2(0);
128
129 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
130 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
131 .widenScalarToNextPow2(0)
132 .clampScalar(0, s32, s64)
133 .clampMaxNumElements(0, s8, 16)
134 .clampMaxNumElements(0, s16, 8)
135 .clampNumElements(0, v2s32, v4s32)
136 .clampNumElements(0, v2s64, v2s64)
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 2;
140 },
141 0, s32)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 4;
145 },
146 0, s16)
147 .minScalarOrEltIf(
148 [=](const LegalityQuery &Query) {
149 return Query.Types[0].getNumElements() <= 16;
150 },
151 0, s8)
153
154 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
155 .customIf([=](const LegalityQuery &Query) {
156 const auto &SrcTy = Query.Types[0];
157 const auto &AmtTy = Query.Types[1];
158 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
159 AmtTy.getSizeInBits() == 32;
160 })
161 .legalFor({
162 {s32, s32},
163 {s32, s64},
164 {s64, s64},
165 {v8s8, v8s8},
166 {v16s8, v16s8},
167 {v4s16, v4s16},
168 {v8s16, v8s16},
169 {v2s32, v2s32},
170 {v4s32, v4s32},
171 {v2s64, v2s64},
172 })
173 .widenScalarToNextPow2(0)
174 .clampScalar(1, s32, s64)
175 .clampScalar(0, s32, s64)
176 .clampNumElements(0, v8s8, v16s8)
177 .clampNumElements(0, v4s16, v8s16)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 .minScalarSameAs(1, 0);
182
184 .legalFor({{p0, s64}, {v2p0, v2s64}})
185 .clampScalarOrElt(1, s64, s64)
186 .clampNumElements(0, v2p0, v2p0);
187
188 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
189
190 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
191 .legalFor({s32, s64})
192 .libcallFor({s128})
193 .clampScalar(0, s32, s64)
195 .scalarize(0);
196
197 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
198 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
200 .clampScalarOrElt(0, s32, s64)
201 .clampNumElements(0, v2s32, v4s32)
202 .clampNumElements(0, v2s64, v2s64)
203 .moreElementsToNextPow2(0);
204
205
206 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
207 .widenScalarToNextPow2(0, /*Min = */ 32)
208 .clampScalar(0, s32, s64)
209 .lower();
210
211 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
212 .legalFor({s64, v8s16, v16s8, v4s32})
213 .lower();
214
215 auto &MinMaxActions = getActionDefinitionsBuilder(
216 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
217 if (HasCSSC)
218 MinMaxActions
219 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
220 // Making clamping conditional on CSSC extension as without legal types we
221 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
222 // if we detect a type smaller than 32-bit.
223 .minScalar(0, s32);
224 else
225 MinMaxActions
226 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
227 MinMaxActions
228 .clampNumElements(0, v8s8, v16s8)
229 .clampNumElements(0, v4s16, v8s16)
230 .clampNumElements(0, v2s32, v4s32)
231 // FIXME: This sholdn't be needed as v2s64 types are going to
232 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
233 .clampNumElements(0, v2s64, v2s64)
234 .lower();
235
237 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
238 .legalFor({{s32, s32}, {s64, s32}})
239 .clampScalar(0, s32, s64)
240 .clampScalar(1, s32, s64)
242
243 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
244 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
245 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
246 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
247 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
248 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
249 .legalIf([=](const LegalityQuery &Query) {
250 const auto &Ty = Query.Types[0];
251 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
252 })
253 .libcallFor({s128})
254 .minScalarOrElt(0, MinFPScalar)
255 .clampNumElements(0, v4s16, v8s16)
256 .clampNumElements(0, v2s32, v4s32)
257 .clampNumElements(0, v2s64, v2s64)
259
261 .libcallFor({s32, s64})
262 .minScalar(0, s32)
263 .scalarize(0);
264
265 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
266 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
267 .libcallFor({{s64, s128}})
268 .minScalarOrElt(1, MinFPScalar);
269
270 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
271 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10})
272 // We need a call for these, so we always need to scalarize.
273 .scalarize(0)
274 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
275 .minScalar(0, s32)
276 .libcallFor({s32, s64});
278 .scalarize(0)
279 .minScalar(0, s32)
280 .libcallFor({{s32, s32}, {s64, s32}});
281
283 .legalIf(all(typeInSet(0, {s32, s64, p0}),
284 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
286 .clampScalar(0, s32, s64)
288 .minScalar(1, s8)
289 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
290 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
291
293 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
294 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
296 .clampScalar(1, s32, s128)
298 .minScalar(0, s16)
299 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
300 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
301 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
302
303
304 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
305 auto &Actions = getActionDefinitionsBuilder(Op);
306
307 if (Op == G_SEXTLOAD)
309
310 // Atomics have zero extending behavior.
311 Actions
312 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
313 {s32, p0, s16, 8},
314 {s32, p0, s32, 8},
315 {s64, p0, s8, 2},
316 {s64, p0, s16, 2},
317 {s64, p0, s32, 4},
318 {s64, p0, s64, 8},
319 {p0, p0, s64, 8},
320 {v2s32, p0, s64, 8}})
321 .widenScalarToNextPow2(0)
322 .clampScalar(0, s32, s64)
323 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
324 // how to do that yet.
325 .unsupportedIfMemSizeNotPow2()
326 // Lower anything left over into G_*EXT and G_LOAD
327 .lower();
328 }
329
330 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
331 const LLT &ValTy = Query.Types[0];
332 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
333 };
334
335 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
336 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
337
338 if (ST.hasSVE()) {
339 LoadActions.legalForTypesWithMemDesc({
340 // 128 bit base sizes
341 {nxv16s8, p0, nxv16s8, 8},
342 {nxv8s16, p0, nxv8s16, 8},
343 {nxv4s32, p0, nxv4s32, 8},
344 {nxv2s64, p0, nxv2s64, 8},
345 });
346
347 // TODO: Add nxv2p0. Consider bitcastIf.
348 // See #92130
349 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
350 StoreActions.legalForTypesWithMemDesc({
351 // 128 bit base sizes
352 {nxv16s8, p0, nxv16s8, 8},
353 {nxv8s16, p0, nxv8s16, 8},
354 {nxv4s32, p0, nxv4s32, 8},
355 {nxv2s64, p0, nxv2s64, 8},
356 });
357 }
358
359 LoadActions
360 .customIf([=](const LegalityQuery &Query) {
361 return HasRCPC3 && Query.Types[0] == s128 &&
362 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
363 })
364 .customIf([=](const LegalityQuery &Query) {
365 return Query.Types[0] == s128 &&
366 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
367 })
368 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
369 {s16, p0, s16, 8},
370 {s32, p0, s32, 8},
371 {s64, p0, s64, 8},
372 {p0, p0, s64, 8},
373 {s128, p0, s128, 8},
374 {v8s8, p0, s64, 8},
375 {v16s8, p0, s128, 8},
376 {v4s16, p0, s64, 8},
377 {v8s16, p0, s128, 8},
378 {v2s32, p0, s64, 8},
379 {v4s32, p0, s128, 8},
380 {v2s64, p0, s128, 8}})
381 // These extends are also legal
382 .legalForTypesWithMemDesc(
383 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
384 .widenScalarToNextPow2(0, /* MinSize = */ 8)
385 .clampMaxNumElements(0, s8, 16)
386 .clampMaxNumElements(0, s16, 8)
387 .clampMaxNumElements(0, s32, 4)
388 .clampMaxNumElements(0, s64, 2)
389 .clampMaxNumElements(0, p0, 2)
390 .lowerIfMemSizeNotByteSizePow2()
391 .clampScalar(0, s8, s64)
392 .narrowScalarIf(
393 [=](const LegalityQuery &Query) {
394 // Clamp extending load results to 32-bits.
395 return Query.Types[0].isScalar() &&
396 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
397 Query.Types[0].getSizeInBits() > 32;
398 },
399 changeTo(0, s32))
400 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
401 .bitcastIf(typeInSet(0, {v4s8}),
402 [=](const LegalityQuery &Query) {
403 const LLT VecTy = Query.Types[0];
404 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
405 })
406 .customIf(IsPtrVecPred)
407 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
408
409 StoreActions
410 .customIf([=](const LegalityQuery &Query) {
411 return HasRCPC3 && Query.Types[0] == s128 &&
412 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
413 })
414 .customIf([=](const LegalityQuery &Query) {
415 return Query.Types[0] == s128 &&
416 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
417 })
418 .legalForTypesWithMemDesc(
419 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
420 {s32, p0, s8, 8}, // truncstorei8 from s32
421 {s64, p0, s8, 8}, // truncstorei8 from s64
422 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
423 {s64, p0, s16, 8}, // truncstorei16 from s64
424 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
425 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
426 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
427 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
428 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
429 .clampScalar(0, s8, s64)
430 .lowerIf([=](const LegalityQuery &Query) {
431 return Query.Types[0].isScalar() &&
432 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
433 })
434 // Maximum: sN * k = 128
435 .clampMaxNumElements(0, s8, 16)
436 .clampMaxNumElements(0, s16, 8)
437 .clampMaxNumElements(0, s32, 4)
438 .clampMaxNumElements(0, s64, 2)
439 .clampMaxNumElements(0, p0, 2)
440 .lowerIfMemSizeNotPow2()
441 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
442 .bitcastIf(typeInSet(0, {v4s8}),
443 [=](const LegalityQuery &Query) {
444 const LLT VecTy = Query.Types[0];
445 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
446 })
447 .customIf(IsPtrVecPred)
448 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
449
450 getActionDefinitionsBuilder(G_INDEXED_STORE)
451 // Idx 0 == Ptr, Idx 1 == Val
452 // TODO: we can implement legalizations but as of now these are
453 // generated in a very specific way.
455 {p0, s8, s8, 8},
456 {p0, s16, s16, 8},
457 {p0, s32, s8, 8},
458 {p0, s32, s16, 8},
459 {p0, s32, s32, 8},
460 {p0, s64, s64, 8},
461 {p0, p0, p0, 8},
462 {p0, v8s8, v8s8, 8},
463 {p0, v16s8, v16s8, 8},
464 {p0, v4s16, v4s16, 8},
465 {p0, v8s16, v8s16, 8},
466 {p0, v2s32, v2s32, 8},
467 {p0, v4s32, v4s32, 8},
468 {p0, v2s64, v2s64, 8},
469 {p0, v2p0, v2p0, 8},
470 {p0, s128, s128, 8},
471 })
472 .unsupported();
473
474 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
475 LLT LdTy = Query.Types[0];
476 LLT PtrTy = Query.Types[1];
477 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
478 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
479 return false;
480 if (PtrTy != p0)
481 return false;
482 return true;
483 };
484 getActionDefinitionsBuilder(G_INDEXED_LOAD)
487 .legalIf(IndexedLoadBasicPred)
488 .unsupported();
489 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
490 .unsupportedIf(
492 .legalIf(all(typeInSet(0, {s16, s32, s64}),
493 LegalityPredicate([=](const LegalityQuery &Q) {
494 LLT LdTy = Q.Types[0];
495 LLT PtrTy = Q.Types[1];
496 LLT MemTy = Q.MMODescrs[0].MemoryTy;
497 if (PtrTy != p0)
498 return false;
499 if (LdTy == s16)
500 return MemTy == s8;
501 if (LdTy == s32)
502 return MemTy == s8 || MemTy == s16;
503 if (LdTy == s64)
504 return MemTy == s8 || MemTy == s16 || MemTy == s32;
505 return false;
506 })))
507 .unsupported();
508
509 // Constants
511 .legalFor({p0, s8, s16, s32, s64})
512 .widenScalarToNextPow2(0)
513 .clampScalar(0, s8, s64);
514 getActionDefinitionsBuilder(G_FCONSTANT)
515 .legalIf([=](const LegalityQuery &Query) {
516 const auto &Ty = Query.Types[0];
517 if (HasFP16 && Ty == s16)
518 return true;
519 return Ty == s32 || Ty == s64 || Ty == s128;
520 })
521 .clampScalar(0, MinFPScalar, s128);
522
523 // FIXME: fix moreElementsToNextPow2
525 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
527 .clampScalar(1, s32, s64)
528 .clampScalar(0, s32, s32)
529 .minScalarEltSameAsIf(
530 [=](const LegalityQuery &Query) {
531 const LLT &Ty = Query.Types[0];
532 const LLT &SrcTy = Query.Types[1];
533 return Ty.isVector() && !SrcTy.isPointerVector() &&
534 Ty.getElementType() != SrcTy.getElementType();
535 },
536 0, 1)
537 .minScalarOrEltIf(
538 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
539 1, s32)
540 .minScalarOrEltIf(
541 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
542 s64)
544 .clampNumElements(1, v8s8, v16s8)
545 .clampNumElements(1, v4s16, v8s16)
546 .clampNumElements(1, v2s32, v4s32)
547 .clampNumElements(1, v2s64, v2s64)
548 .customIf(isVector(0));
549
551 .legalFor({{s32, MinFPScalar},
552 {s32, s32},
553 {s32, s64},
554 {v4s32, v4s32},
555 {v2s32, v2s32},
556 {v2s64, v2s64}})
557 .legalIf([=](const LegalityQuery &Query) {
558 const auto &Ty = Query.Types[1];
559 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
560 })
562 .clampScalar(0, s32, s32)
563 .clampScalarOrElt(1, MinFPScalar, s64)
564 .minScalarEltSameAsIf(
565 [=](const LegalityQuery &Query) {
566 const LLT &Ty = Query.Types[0];
567 const LLT &SrcTy = Query.Types[1];
568 return Ty.isVector() && !SrcTy.isPointerVector() &&
569 Ty.getElementType() != SrcTy.getElementType();
570 },
571 0, 1)
572 .clampNumElements(1, v4s16, v8s16)
573 .clampNumElements(1, v2s32, v4s32)
574 .clampMaxNumElements(1, s64, 2)
575 .moreElementsToNextPow2(1);
576
577 // Extensions
578 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
579 unsigned DstSize = Query.Types[0].getSizeInBits();
580
581 // Handle legal vectors using legalFor
582 if (Query.Types[0].isVector())
583 return false;
584
585 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
586 return false; // Extending to a scalar s128 needs narrowing.
587
588 const LLT &SrcTy = Query.Types[1];
589
590 // Make sure we fit in a register otherwise. Don't bother checking that
591 // the source type is below 128 bits. We shouldn't be allowing anything
592 // through which is wider than the destination in the first place.
593 unsigned SrcSize = SrcTy.getSizeInBits();
594 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
595 return false;
596
597 return true;
598 };
599 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
600 .legalIf(ExtLegalFunc)
601 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
602 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
604 .clampMaxNumElements(1, s8, 8)
605 .clampMaxNumElements(1, s16, 4)
606 .clampMaxNumElements(1, s32, 2)
607 // Tries to convert a large EXTEND into two smaller EXTENDs
608 .lowerIf([=](const LegalityQuery &Query) {
609 return (Query.Types[0].getScalarSizeInBits() >
610 Query.Types[1].getScalarSizeInBits() * 2) &&
611 Query.Types[0].isVector() &&
612 (Query.Types[1].getScalarSizeInBits() == 8 ||
613 Query.Types[1].getScalarSizeInBits() == 16);
614 })
615 .clampMinNumElements(1, s8, 8)
616 .clampMinNumElements(1, s16, 4);
617
619 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
621 .clampMaxNumElements(0, s8, 8)
622 .clampMaxNumElements(0, s16, 4)
623 .clampMaxNumElements(0, s32, 2)
624 .minScalarOrEltIf(
625 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
626 0, s8)
627 .lowerIf([=](const LegalityQuery &Query) {
628 LLT DstTy = Query.Types[0];
629 LLT SrcTy = Query.Types[1];
630 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
631 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
632 })
633 .clampMinNumElements(0, s8, 8)
634 .clampMinNumElements(0, s16, 4)
635 .alwaysLegal();
636
637 getActionDefinitionsBuilder(G_SEXT_INREG)
638 .legalFor({s32, s64})
639 .legalFor(PackedVectorAllTypeList)
640 .maxScalar(0, s64)
641 .clampNumElements(0, v8s8, v16s8)
642 .clampNumElements(0, v4s16, v8s16)
643 .clampNumElements(0, v2s32, v4s32)
644 .clampMaxNumElements(0, s64, 2)
645 .lower();
646
647 // FP conversions
649 .legalFor(
650 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
651 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
652 .clampNumElements(0, v4s16, v4s16)
653 .clampNumElements(0, v2s32, v2s32)
654 .scalarize(0);
655
657 .legalFor(
658 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
659 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
660 .clampNumElements(0, v4s32, v4s32)
661 .clampNumElements(0, v2s64, v2s64)
662 .scalarize(0);
663
664 // Conversions
665 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
666 .legalFor({{s32, s32},
667 {s64, s32},
668 {s32, s64},
669 {s64, s64},
670 {v2s64, v2s64},
671 {v4s32, v4s32},
672 {v2s32, v2s32}})
673 .legalIf([=](const LegalityQuery &Query) {
674 return HasFP16 &&
675 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
676 Query.Types[1] == v8s16) &&
677 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
678 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
679 })
680 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
681 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
682 // The range of a fp16 value fits into an i17, so we can lower the width
683 // to i64.
684 .narrowScalarIf(
685 [=](const LegalityQuery &Query) {
686 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
687 },
688 changeTo(0, s64))
690 .widenScalarOrEltToNextPow2OrMinSize(0)
691 .minScalar(0, s32)
692 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
693 .widenScalarIf(
694 [=](const LegalityQuery &Query) {
695 return Query.Types[0].getScalarSizeInBits() <= 64 &&
696 Query.Types[0].getScalarSizeInBits() >
697 Query.Types[1].getScalarSizeInBits();
698 },
700 .widenScalarIf(
701 [=](const LegalityQuery &Query) {
702 return Query.Types[1].getScalarSizeInBits() <= 64 &&
703 Query.Types[0].getScalarSizeInBits() <
704 Query.Types[1].getScalarSizeInBits();
705 },
707 .clampNumElements(0, v4s16, v8s16)
708 .clampNumElements(0, v2s32, v4s32)
709 .clampMaxNumElements(0, s64, 2)
710 .libcallFor(
711 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
712
713 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
714 .legalFor({{s32, s32},
715 {s64, s32},
716 {s32, s64},
717 {s64, s64},
718 {v2s64, v2s64},
719 {v4s32, v4s32},
720 {v2s32, v2s32}})
721 .legalIf([=](const LegalityQuery &Query) {
722 return HasFP16 &&
723 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
724 Query.Types[0] == v8s16) &&
725 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
726 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
727 })
728 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
729 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
731 .widenScalarOrEltToNextPow2OrMinSize(1)
732 .minScalar(1, s32)
733 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
734 .widenScalarIf(
735 [=](const LegalityQuery &Query) {
736 return Query.Types[1].getScalarSizeInBits() <= 64 &&
737 Query.Types[0].getScalarSizeInBits() <
738 Query.Types[1].getScalarSizeInBits();
739 },
741 .widenScalarIf(
742 [=](const LegalityQuery &Query) {
743 return Query.Types[0].getScalarSizeInBits() <= 64 &&
744 Query.Types[0].getScalarSizeInBits() >
745 Query.Types[1].getScalarSizeInBits();
746 },
748 .clampNumElements(0, v4s16, v8s16)
749 .clampNumElements(0, v2s32, v4s32)
750 .clampMaxNumElements(0, s64, 2)
751 .libcallFor({{s16, s128},
752 {s32, s128},
753 {s64, s128},
754 {s128, s128},
755 {s128, s32},
756 {s128, s64}});
757
758 // Control-flow
760 .legalFor({s32})
761 .clampScalar(0, s32, s32);
762 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
763
765 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
766 .widenScalarToNextPow2(0)
767 .clampScalar(0, s32, s64)
768 .clampScalar(1, s32, s32)
770 .lowerIf(isVector(0));
771
772 // Pointer-handling
773 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
774
775 if (TM.getCodeModel() == CodeModel::Small)
776 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
777 else
778 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
779
780 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
781 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
782
784 .legalFor({{s64, p0}, {v2s64, v2p0}})
785 .widenScalarToNextPow2(0, 64)
786 .clampScalar(0, s64, s64);
787
789 .unsupportedIf([&](const LegalityQuery &Query) {
790 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
791 })
792 .legalFor({{p0, s64}, {v2p0, v2s64}});
793
794 // Casts for 32 and 64-bit width type are just copies.
795 // Same for 128-bit width type, except they are on the FPR bank.
797 // Keeping 32-bit instructions legal to prevent regression in some tests
798 .legalForCartesianProduct({s32, v2s16, v4s8})
799 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
800 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
801 .lowerIf([=](const LegalityQuery &Query) {
802 return Query.Types[0].isVector() != Query.Types[1].isVector();
803 })
805 .clampNumElements(0, v8s8, v16s8)
806 .clampNumElements(0, v4s16, v8s16)
807 .clampNumElements(0, v2s32, v4s32)
808 .lower();
809
810 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
811
812 // va_list must be a pointer, but most sized types are pretty easy to handle
813 // as the destination.
815 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
816 .clampScalar(0, s8, s64)
817 .widenScalarToNextPow2(0, /*Min*/ 8);
818
819 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
820 .lowerIf(
821 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
822
823 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
824 return ST.outlineAtomics() && !ST.hasLSE();
825 };
826
827 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
828 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
829 predNot(UseOutlineAtomics)))
830 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
831 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
832 return Query.Types[0].getSizeInBits() == 128 &&
833 !UseOutlineAtomics(Query);
834 })
835 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
836 UseOutlineAtomics))
837 .clampScalar(0, s32, s64);
838
839 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
840 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
841 G_ATOMICRMW_XOR})
842 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
843 predNot(UseOutlineAtomics)))
844 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
845 UseOutlineAtomics))
846 .clampScalar(0, s32, s64);
847
848 // Do not outline these atomics operations, as per comment in
849 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
851 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
852 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
853 .clampScalar(0, s32, s64);
854
855 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
856
857 // Merge/Unmerge
858 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
859 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
860 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
862 .widenScalarToNextPow2(LitTyIdx, 8)
863 .widenScalarToNextPow2(BigTyIdx, 32)
864 .clampScalar(LitTyIdx, s8, s64)
865 .clampScalar(BigTyIdx, s32, s128)
866 .legalIf([=](const LegalityQuery &Q) {
867 switch (Q.Types[BigTyIdx].getSizeInBits()) {
868 case 32:
869 case 64:
870 case 128:
871 break;
872 default:
873 return false;
874 }
875 switch (Q.Types[LitTyIdx].getSizeInBits()) {
876 case 8:
877 case 16:
878 case 32:
879 case 64:
880 return true;
881 default:
882 return false;
883 }
884 });
885 }
886
887 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
888 .unsupportedIf([=](const LegalityQuery &Query) {
889 const LLT &EltTy = Query.Types[1].getElementType();
890 return Query.Types[0] != EltTy;
891 })
892 .minScalar(2, s64)
893 .customIf([=](const LegalityQuery &Query) {
894 const LLT &VecTy = Query.Types[1];
895 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
896 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
897 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
898 })
899 .minScalarOrEltIf(
900 [=](const LegalityQuery &Query) {
901 // We want to promote to <M x s1> to <M x s64> if that wouldn't
902 // cause the total vec size to be > 128b.
903 return Query.Types[1].getNumElements() <= 2;
904 },
905 0, s64)
906 .minScalarOrEltIf(
907 [=](const LegalityQuery &Query) {
908 return Query.Types[1].getNumElements() <= 4;
909 },
910 0, s32)
911 .minScalarOrEltIf(
912 [=](const LegalityQuery &Query) {
913 return Query.Types[1].getNumElements() <= 8;
914 },
915 0, s16)
916 .minScalarOrEltIf(
917 [=](const LegalityQuery &Query) {
918 return Query.Types[1].getNumElements() <= 16;
919 },
920 0, s8)
921 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
923 .clampMaxNumElements(1, s64, 2)
924 .clampMaxNumElements(1, s32, 4)
925 .clampMaxNumElements(1, s16, 8)
926 .clampMaxNumElements(1, s8, 16)
927 .clampMaxNumElements(1, p0, 2);
928
929 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
930 .legalIf(
931 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
934 .clampNumElements(0, v8s8, v16s8)
935 .clampNumElements(0, v4s16, v8s16)
936 .clampNumElements(0, v2s32, v4s32)
937 .clampMaxNumElements(0, s64, 2)
938 .clampMaxNumElements(0, p0, 2);
939
940 getActionDefinitionsBuilder(G_BUILD_VECTOR)
941 .legalFor({{v8s8, s8},
942 {v16s8, s8},
943 {v4s16, s16},
944 {v8s16, s16},
945 {v2s32, s32},
946 {v4s32, s32},
947 {v2p0, p0},
948 {v2s64, s64}})
949 .clampNumElements(0, v4s32, v4s32)
950 .clampNumElements(0, v2s64, v2s64)
951 .minScalarOrElt(0, s8)
953 .minScalarSameAs(1, 0);
954
955 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
956
959 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
960 .scalarize(1)
961 .widenScalarToNextPow2(1, /*Min=*/32)
962 .clampScalar(1, s32, s64)
963 .scalarSameSizeAs(0, 1);
964 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
965
966 // TODO: Custom lowering for v2s32, v4s32, v2s64.
967 getActionDefinitionsBuilder(G_BITREVERSE)
968 .legalFor({s32, s64, v8s8, v16s8})
969 .widenScalarToNextPow2(0, /*Min = */ 32)
970 .clampScalar(0, s32, s64);
971
972 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
973
975 .lowerIf(isVector(0))
976 .widenScalarToNextPow2(1, /*Min=*/32)
977 .clampScalar(1, s32, s64)
978 .scalarSameSizeAs(0, 1)
979 .legalIf([=](const LegalityQuery &Query) {
980 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
981 })
982 .customIf([=](const LegalityQuery &Query) {
983 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
984 });
985
986 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
987 .legalIf([=](const LegalityQuery &Query) {
988 const LLT &DstTy = Query.Types[0];
989 const LLT &SrcTy = Query.Types[1];
990 // For now just support the TBL2 variant which needs the source vectors
991 // to be the same size as the dest.
992 if (DstTy != SrcTy)
993 return false;
994 return llvm::is_contained(
995 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
996 })
997 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
998 // just want those lowered into G_BUILD_VECTOR
999 .lowerIf([=](const LegalityQuery &Query) {
1000 return !Query.Types[1].isVector();
1001 })
1002 .moreElementsIf(
1003 [](const LegalityQuery &Query) {
1004 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1005 Query.Types[0].getNumElements() >
1006 Query.Types[1].getNumElements();
1007 },
1008 changeTo(1, 0))
1010 .moreElementsIf(
1011 [](const LegalityQuery &Query) {
1012 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1013 Query.Types[0].getNumElements() <
1014 Query.Types[1].getNumElements();
1015 },
1016 changeTo(0, 1))
1017 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1018 .clampNumElements(0, v8s8, v16s8)
1019 .clampNumElements(0, v4s16, v8s16)
1020 .clampNumElements(0, v4s32, v4s32)
1021 .clampNumElements(0, v2s64, v2s64);
1022
1023 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1024 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1025 .bitcastIf(
1026 [=](const LegalityQuery &Query) {
1027 return Query.Types[0].getSizeInBits() <= 128 &&
1028 Query.Types[1].getSizeInBits() <= 64;
1029 },
1030 [=](const LegalityQuery &Query) {
1031 const LLT DstTy = Query.Types[0];
1032 const LLT SrcTy = Query.Types[1];
1033 return std::pair(
1034 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1037 SrcTy.getNumElements())));
1038 });
1039
1040 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1041
1042 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1043
1044 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1045
1046 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1047
1048 if (ST.hasMOPS()) {
1049 // G_BZERO is not supported. Currently it is only emitted by
1050 // PreLegalizerCombiner for G_MEMSET with zero constant.
1052
1054 .legalForCartesianProduct({p0}, {s64}, {s64})
1055 .customForCartesianProduct({p0}, {s8}, {s64})
1056 .immIdx(0); // Inform verifier imm idx 0 is handled.
1057
1058 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1059 .legalForCartesianProduct({p0}, {p0}, {s64})
1060 .immIdx(0); // Inform verifier imm idx 0 is handled.
1061
1062 // G_MEMCPY_INLINE does not have a tailcall immediate
1063 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1064 .legalForCartesianProduct({p0}, {p0}, {s64});
1065
1066 } else {
1067 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1068 .libcall();
1069 }
1070
1071 // FIXME: Legal vector types are only legal with NEON.
1072 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
1073 if (HasCSSC)
1074 ABSActions
1075 .legalFor({s32, s64});
1076 ABSActions.legalFor(PackedVectorAllTypeList)
1077 .customIf([=](const LegalityQuery &Q) {
1078 // TODO: Fix suboptimal codegen for 128+ bit types.
1079 LLT SrcTy = Q.Types[0];
1080 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1081 })
1082 .widenScalarIf(
1083 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1084 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1085 .widenScalarIf(
1086 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1087 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1088 .clampNumElements(0, v8s8, v16s8)
1089 .clampNumElements(0, v4s16, v8s16)
1090 .clampNumElements(0, v2s32, v4s32)
1091 .clampNumElements(0, v2s64, v2s64)
1092 .moreElementsToNextPow2(0)
1093 .lower();
1094
1095 // For fadd reductions we have pairwise operations available. We treat the
1096 // usual legal types as legal and handle the lowering to pairwise instructions
1097 // later.
1098 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1099 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1100 .legalIf([=](const LegalityQuery &Query) {
1101 const auto &Ty = Query.Types[1];
1102 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1103 })
1104 .minScalarOrElt(0, MinFPScalar)
1105 .clampMaxNumElements(1, s64, 2)
1106 .clampMaxNumElements(1, s32, 4)
1107 .clampMaxNumElements(1, s16, 8)
1108 .lower();
1109
1110 // For fmul reductions we need to split up into individual operations. We
1111 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1112 // smaller types, followed by scalarizing what remains.
1113 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1114 .minScalarOrElt(0, MinFPScalar)
1115 .clampMaxNumElements(1, s64, 2)
1116 .clampMaxNumElements(1, s32, 4)
1117 .clampMaxNumElements(1, s16, 8)
1118 .clampMaxNumElements(1, s32, 2)
1119 .clampMaxNumElements(1, s16, 4)
1120 .scalarize(1)
1121 .lower();
1122
1123 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1124 .scalarize(2)
1125 .lower();
1126
1127 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1128 .legalFor({{s8, v16s8},
1129 {s8, v8s8},
1130 {s16, v8s16},
1131 {s16, v4s16},
1132 {s32, v4s32},
1133 {s32, v2s32},
1134 {s64, v2s64}})
1135 .clampMaxNumElements(1, s64, 2)
1136 .clampMaxNumElements(1, s32, 4)
1137 .clampMaxNumElements(1, s16, 8)
1138 .clampMaxNumElements(1, s8, 16)
1139 .lower();
1140
1141 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1142 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1143 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1144 .legalIf([=](const LegalityQuery &Query) {
1145 const auto &Ty = Query.Types[1];
1146 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1147 })
1148 .minScalarOrElt(0, MinFPScalar)
1149 .clampMaxNumElements(1, s64, 2)
1150 .clampMaxNumElements(1, s32, 4)
1151 .clampMaxNumElements(1, s16, 8)
1152 .lower();
1153
1154 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1155 .clampMaxNumElements(1, s32, 2)
1156 .clampMaxNumElements(1, s16, 4)
1157 .clampMaxNumElements(1, s8, 8)
1158 .scalarize(1)
1159 .lower();
1160
1162 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1163 .legalFor({{s8, v8s8},
1164 {s8, v16s8},
1165 {s16, v4s16},
1166 {s16, v8s16},
1167 {s32, v2s32},
1168 {s32, v4s32}})
1169 .moreElementsIf(
1170 [=](const LegalityQuery &Query) {
1171 return Query.Types[1].isVector() &&
1172 Query.Types[1].getElementType() != s8 &&
1173 Query.Types[1].getNumElements() & 1;
1174 },
1176 .clampMaxNumElements(1, s64, 2)
1177 .clampMaxNumElements(1, s32, 4)
1178 .clampMaxNumElements(1, s16, 8)
1179 .clampMaxNumElements(1, s8, 16)
1180 .scalarize(1)
1181 .lower();
1182
1184 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1185 // Try to break down into smaller vectors as long as they're at least 64
1186 // bits. This lets us use vector operations for some parts of the
1187 // reduction.
1188 .fewerElementsIf(
1189 [=](const LegalityQuery &Q) {
1190 LLT SrcTy = Q.Types[1];
1191 if (SrcTy.isScalar())
1192 return false;
1193 if (!isPowerOf2_32(SrcTy.getNumElements()))
1194 return false;
1195 // We can usually perform 64b vector operations.
1196 return SrcTy.getSizeInBits() > 64;
1197 },
1198 [=](const LegalityQuery &Q) {
1199 LLT SrcTy = Q.Types[1];
1200 return std::make_pair(1, SrcTy.divide(2));
1201 })
1202 .scalarize(1)
1203 .lower();
1204
1205 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1206 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1207 .lower();
1208
1210 .legalFor({{s32, s64}, {s64, s64}})
1211 .customIf([=](const LegalityQuery &Q) {
1212 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1213 })
1214 .lower();
1216
1217 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1218 .customFor({{s32, s32}, {s64, s64}});
1219
1220 auto always = [=](const LegalityQuery &Q) { return true; };
1221 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1222 if (HasCSSC)
1223 CTPOPActions
1224 .legalFor({{s32, s32},
1225 {s64, s64},
1226 {v8s8, v8s8},
1227 {v16s8, v16s8}})
1228 .customFor({{s128, s128},
1229 {v2s64, v2s64},
1230 {v2s32, v2s32},
1231 {v4s32, v4s32},
1232 {v4s16, v4s16},
1233 {v8s16, v8s16}});
1234 else
1235 CTPOPActions
1236 .legalFor({{v8s8, v8s8},
1237 {v16s8, v16s8}})
1238 .customFor({{s32, s32},
1239 {s64, s64},
1240 {s128, s128},
1241 {v2s64, v2s64},
1242 {v2s32, v2s32},
1243 {v4s32, v4s32},
1244 {v4s16, v4s16},
1245 {v8s16, v8s16}});
1246 CTPOPActions
1247 .clampScalar(0, s32, s128)
1248 .widenScalarToNextPow2(0)
1249 .minScalarEltSameAsIf(always, 1, 0)
1250 .maxScalarEltSameAsIf(always, 1, 0);
1251
1252 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1253 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1254 .clampNumElements(0, v8s8, v16s8)
1255 .clampNumElements(0, v4s16, v8s16)
1256 .clampNumElements(0, v2s32, v4s32)
1257 .clampMaxNumElements(0, s64, 2)
1259 .lower();
1260
1261 // TODO: Libcall support for s128.
1262 // TODO: s16 should be legal with full FP16 support.
1263 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1264 .legalFor({{s64, s32}, {s64, s64}});
1265
1266 // TODO: Custom legalization for mismatched types.
1267 getActionDefinitionsBuilder(G_FCOPYSIGN)
1269 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1270 [=](const LegalityQuery &Query) {
1271 const LLT Ty = Query.Types[0];
1272 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1273 })
1274 .lower();
1275
1277
1278 // Access to floating-point environment.
1279 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1280 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1281 .libcall();
1282
1283 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1284
1285 getActionDefinitionsBuilder(G_PREFETCH).custom();
1286
1288 verify(*ST.getInstrInfo());
1289}
1290
1293 LostDebugLocObserver &LocObserver) const {
1294 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1295 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1296 GISelChangeObserver &Observer = Helper.Observer;
1297 switch (MI.getOpcode()) {
1298 default:
1299 // No idea what to do.
1300 return false;
1301 case TargetOpcode::G_VAARG:
1302 return legalizeVaArg(MI, MRI, MIRBuilder);
1303 case TargetOpcode::G_LOAD:
1304 case TargetOpcode::G_STORE:
1305 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1306 case TargetOpcode::G_SHL:
1307 case TargetOpcode::G_ASHR:
1308 case TargetOpcode::G_LSHR:
1309 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1310 case TargetOpcode::G_GLOBAL_VALUE:
1311 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1312 case TargetOpcode::G_SBFX:
1313 case TargetOpcode::G_UBFX:
1314 return legalizeBitfieldExtract(MI, MRI, Helper);
1315 case TargetOpcode::G_FSHL:
1316 case TargetOpcode::G_FSHR:
1317 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1318 case TargetOpcode::G_ROTR:
1319 return legalizeRotate(MI, MRI, Helper);
1320 case TargetOpcode::G_CTPOP:
1321 return legalizeCTPOP(MI, MRI, Helper);
1322 case TargetOpcode::G_ATOMIC_CMPXCHG:
1323 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1324 case TargetOpcode::G_CTTZ:
1325 return legalizeCTTZ(MI, Helper);
1326 case TargetOpcode::G_BZERO:
1327 case TargetOpcode::G_MEMCPY:
1328 case TargetOpcode::G_MEMMOVE:
1329 case TargetOpcode::G_MEMSET:
1330 return legalizeMemOps(MI, Helper);
1331 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1332 return legalizeExtractVectorElt(MI, MRI, Helper);
1333 case TargetOpcode::G_DYN_STACKALLOC:
1334 return legalizeDynStackAlloc(MI, Helper);
1335 case TargetOpcode::G_PREFETCH:
1336 return legalizePrefetch(MI, Helper);
1337 case TargetOpcode::G_ABS:
1338 return Helper.lowerAbsToCNeg(MI);
1339 case TargetOpcode::G_ICMP:
1340 return legalizeICMP(MI, MRI, MIRBuilder);
1341 }
1342
1343 llvm_unreachable("expected switch to return");
1344}
1345
1346bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1348 MachineIRBuilder &MIRBuilder,
1349 GISelChangeObserver &Observer,
1350 LegalizerHelper &Helper) const {
1351 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1352 MI.getOpcode() == TargetOpcode::G_FSHR);
1353
1354 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1355 // lowering
1356 Register ShiftNo = MI.getOperand(3).getReg();
1357 LLT ShiftTy = MRI.getType(ShiftNo);
1358 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1359
1360 // Adjust shift amount according to Opcode (FSHL/FSHR)
1361 // Convert FSHL to FSHR
1362 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1363 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1364
1365 // Lower non-constant shifts and leave zero shifts to the optimizer.
1366 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1367 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1369
1370 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1371
1372 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1373
1374 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1375 // in the range of 0 <-> BitWidth, it is legal
1376 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1377 VRegAndVal->Value.ult(BitWidth))
1378 return true;
1379
1380 // Cast the ShiftNumber to a 64-bit type
1381 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1382
1383 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1384 Observer.changingInstr(MI);
1385 MI.getOperand(3).setReg(Cast64.getReg(0));
1386 Observer.changedInstr(MI);
1387 }
1388 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1389 // instruction
1390 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1391 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1392 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1393 Cast64.getReg(0)});
1394 MI.eraseFromParent();
1395 }
1396 return true;
1397}
1398
1399bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1401 MachineIRBuilder &MIRBuilder) const {
1402 Register DstReg = MI.getOperand(0).getReg();
1403 Register SrcReg1 = MI.getOperand(2).getReg();
1404 Register SrcReg2 = MI.getOperand(3).getReg();
1405 LLT DstTy = MRI.getType(DstReg);
1406 LLT SrcTy = MRI.getType(SrcReg1);
1407
1408 // Check the vector types are legal
1409 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1410 DstTy.getNumElements() != SrcTy.getNumElements() ||
1411 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1412 return false;
1413
1414 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1415 // following passes
1416 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1417 if (Pred != CmpInst::ICMP_NE)
1418 return true;
1419 Register CmpReg =
1420 MIRBuilder
1421 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1422 .getReg(0);
1423 MIRBuilder.buildNot(DstReg, CmpReg);
1424
1425 MI.eraseFromParent();
1426 return true;
1427}
1428
1429bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1431 LegalizerHelper &Helper) const {
1432 // To allow for imported patterns to match, we ensure that the rotate amount
1433 // is 64b with an extension.
1434 Register AmtReg = MI.getOperand(2).getReg();
1435 LLT AmtTy = MRI.getType(AmtReg);
1436 (void)AmtTy;
1437 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1438 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1439 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1440 Helper.Observer.changingInstr(MI);
1441 MI.getOperand(2).setReg(NewAmt.getReg(0));
1442 Helper.Observer.changedInstr(MI);
1443 return true;
1444}
1445
1446bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1448 GISelChangeObserver &Observer) const {
1449 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1450 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1451 // G_ADD_LOW instructions.
1452 // By splitting this here, we can optimize accesses in the small code model by
1453 // folding in the G_ADD_LOW into the load/store offset.
1454 auto &GlobalOp = MI.getOperand(1);
1455 // Don't modify an intrinsic call.
1456 if (GlobalOp.isSymbol())
1457 return true;
1458 const auto* GV = GlobalOp.getGlobal();
1459 if (GV->isThreadLocal())
1460 return true; // Don't want to modify TLS vars.
1461
1462 auto &TM = ST->getTargetLowering()->getTargetMachine();
1463 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1464
1465 if (OpFlags & AArch64II::MO_GOT)
1466 return true;
1467
1468 auto Offset = GlobalOp.getOffset();
1469 Register DstReg = MI.getOperand(0).getReg();
1470 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1471 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1472 // Set the regclass on the dest reg too.
1473 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1474
1475 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1476 // by creating a MOVK that sets bits 48-63 of the register to (global address
1477 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1478 // prevent an incorrect tag being generated during relocation when the
1479 // global appears before the code section. Without the offset, a global at
1480 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1481 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1482 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1483 // instead of `0xf`.
1484 // This assumes that we're in the small code model so we can assume a binary
1485 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1486 // binary must also be loaded into address range [0, 2^48). Both of these
1487 // properties need to be ensured at runtime when using tagged addresses.
1488 if (OpFlags & AArch64II::MO_TAGGED) {
1489 assert(!Offset &&
1490 "Should not have folded in an offset for a tagged global!");
1491 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1492 .addGlobalAddress(GV, 0x100000000,
1494 .addImm(48);
1495 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1496 }
1497
1498 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1499 .addGlobalAddress(GV, Offset,
1501 MI.eraseFromParent();
1502 return true;
1503}
1504
1506 MachineInstr &MI) const {
1507 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1508 switch (IntrinsicID) {
1509 case Intrinsic::vacopy: {
1510 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1511 unsigned VaListSize =
1512 (ST->isTargetDarwin() || ST->isTargetWindows())
1513 ? PtrSize
1514 : ST->isTargetILP32() ? 20 : 32;
1515
1516 MachineFunction &MF = *MI.getMF();
1518 LLT::scalar(VaListSize * 8));
1519 MachineIRBuilder MIB(MI);
1520 MIB.buildLoad(Val, MI.getOperand(2),
1523 VaListSize, Align(PtrSize)));
1524 MIB.buildStore(Val, MI.getOperand(1),
1527 VaListSize, Align(PtrSize)));
1528 MI.eraseFromParent();
1529 return true;
1530 }
1531 case Intrinsic::get_dynamic_area_offset: {
1532 MachineIRBuilder &MIB = Helper.MIRBuilder;
1533 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1534 MI.eraseFromParent();
1535 return true;
1536 }
1537 case Intrinsic::aarch64_mops_memset_tag: {
1538 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1539 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1540 // the instruction).
1541 MachineIRBuilder MIB(MI);
1542 auto &Value = MI.getOperand(3);
1543 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1544 Value.setReg(ExtValueReg);
1545 return true;
1546 }
1547 case Intrinsic::aarch64_prefetch: {
1548 MachineIRBuilder MIB(MI);
1549 auto &AddrVal = MI.getOperand(1);
1550
1551 int64_t IsWrite = MI.getOperand(2).getImm();
1552 int64_t Target = MI.getOperand(3).getImm();
1553 int64_t IsStream = MI.getOperand(4).getImm();
1554 int64_t IsData = MI.getOperand(5).getImm();
1555
1556 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1557 (!IsData << 3) | // IsDataCache bit
1558 (Target << 1) | // Cache level bits
1559 (unsigned)IsStream; // Stream bit
1560
1561 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1562 MI.eraseFromParent();
1563 return true;
1564 }
1565 case Intrinsic::aarch64_neon_uaddv:
1566 case Intrinsic::aarch64_neon_saddv:
1567 case Intrinsic::aarch64_neon_umaxv:
1568 case Intrinsic::aarch64_neon_smaxv:
1569 case Intrinsic::aarch64_neon_uminv:
1570 case Intrinsic::aarch64_neon_sminv: {
1571 MachineIRBuilder MIB(MI);
1572 MachineRegisterInfo &MRI = *MIB.getMRI();
1573 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1574 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1575 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1576
1577 auto OldDst = MI.getOperand(0).getReg();
1578 auto OldDstTy = MRI.getType(OldDst);
1579 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1580 if (OldDstTy == NewDstTy)
1581 return true;
1582
1583 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1584
1585 Helper.Observer.changingInstr(MI);
1586 MI.getOperand(0).setReg(NewDst);
1587 Helper.Observer.changedInstr(MI);
1588
1589 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1590 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1591 OldDst, NewDst);
1592
1593 return true;
1594 }
1595 case Intrinsic::aarch64_neon_uaddlp:
1596 case Intrinsic::aarch64_neon_saddlp: {
1597 MachineIRBuilder MIB(MI);
1598
1599 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1600 ? AArch64::G_UADDLP
1601 : AArch64::G_SADDLP;
1602 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1603 MI.eraseFromParent();
1604
1605 return true;
1606 }
1607 case Intrinsic::aarch64_neon_uaddlv:
1608 case Intrinsic::aarch64_neon_saddlv: {
1609 MachineIRBuilder MIB(MI);
1610 MachineRegisterInfo &MRI = *MIB.getMRI();
1611
1612 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1613 ? AArch64::G_UADDLV
1614 : AArch64::G_SADDLV;
1615 Register DstReg = MI.getOperand(0).getReg();
1616 Register SrcReg = MI.getOperand(2).getReg();
1617 LLT DstTy = MRI.getType(DstReg);
1618
1619 LLT MidTy, ExtTy;
1620 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1621 MidTy = LLT::fixed_vector(4, 32);
1622 ExtTy = LLT::scalar(32);
1623 } else {
1624 MidTy = LLT::fixed_vector(2, 64);
1625 ExtTy = LLT::scalar(64);
1626 }
1627
1628 Register MidReg =
1629 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1630 Register ZeroReg =
1631 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1632 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1633 {MidReg, ZeroReg})
1634 .getReg(0);
1635
1636 if (DstTy.getScalarSizeInBits() < 32)
1637 MIB.buildTrunc(DstReg, ExtReg);
1638 else
1639 MIB.buildCopy(DstReg, ExtReg);
1640
1641 MI.eraseFromParent();
1642
1643 return true;
1644 }
1645 case Intrinsic::aarch64_neon_smax:
1646 case Intrinsic::aarch64_neon_smin:
1647 case Intrinsic::aarch64_neon_umax:
1648 case Intrinsic::aarch64_neon_umin:
1649 case Intrinsic::aarch64_neon_fmax:
1650 case Intrinsic::aarch64_neon_fmin:
1651 case Intrinsic::aarch64_neon_fmaxnm:
1652 case Intrinsic::aarch64_neon_fminnm: {
1653 MachineIRBuilder MIB(MI);
1654 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1655 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1656 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1657 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1658 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1659 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1660 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1661 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1662 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1663 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1664 {MI.getOperand(2), MI.getOperand(3)});
1665 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1666 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1667 {MI.getOperand(2), MI.getOperand(3)});
1668 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1669 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1670 {MI.getOperand(2), MI.getOperand(3)});
1671 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1672 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1673 {MI.getOperand(2), MI.getOperand(3)});
1674 MI.eraseFromParent();
1675 return true;
1676 }
1677 case Intrinsic::vector_reverse:
1678 // TODO: Add support for vector_reverse
1679 return false;
1680 }
1681
1682 return true;
1683}
1684
1685bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1687 GISelChangeObserver &Observer) const {
1688 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1689 MI.getOpcode() == TargetOpcode::G_LSHR ||
1690 MI.getOpcode() == TargetOpcode::G_SHL);
1691 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1692 // imported patterns can select it later. Either way, it will be legal.
1693 Register AmtReg = MI.getOperand(2).getReg();
1694 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1695 if (!VRegAndVal)
1696 return true;
1697 // Check the shift amount is in range for an immediate form.
1698 int64_t Amount = VRegAndVal->Value.getSExtValue();
1699 if (Amount > 31)
1700 return true; // This will have to remain a register variant.
1701 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1702 Observer.changingInstr(MI);
1703 MI.getOperand(2).setReg(ExtCst.getReg(0));
1704 Observer.changedInstr(MI);
1705 return true;
1706}
1707
1710 Base = Root;
1711 Offset = 0;
1712
1713 Register NewBase;
1714 int64_t NewOffset;
1715 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1716 isShiftedInt<7, 3>(NewOffset)) {
1717 Base = NewBase;
1718 Offset = NewOffset;
1719 }
1720}
1721
1722// FIXME: This should be removed and replaced with the generic bitcast legalize
1723// action.
1724bool AArch64LegalizerInfo::legalizeLoadStore(
1726 GISelChangeObserver &Observer) const {
1727 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1728 MI.getOpcode() == TargetOpcode::G_LOAD);
1729 // Here we just try to handle vector loads/stores where our value type might
1730 // have pointer elements, which the SelectionDAG importer can't handle. To
1731 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1732 // the value to use s64 types.
1733
1734 // Custom legalization requires the instruction, if not deleted, must be fully
1735 // legalized. In order to allow further legalization of the inst, we create
1736 // a new instruction and erase the existing one.
1737
1738 Register ValReg = MI.getOperand(0).getReg();
1739 const LLT ValTy = MRI.getType(ValReg);
1740
1741 if (ValTy == LLT::scalar(128)) {
1742
1743 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1744 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1745 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1746 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1747 bool IsRcpC3 =
1748 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1749
1750 LLT s64 = LLT::scalar(64);
1751
1752 unsigned Opcode;
1753 if (IsRcpC3) {
1754 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1755 } else {
1756 // For LSE2, loads/stores should have been converted to monotonic and had
1757 // a fence inserted after them.
1758 assert(Ordering == AtomicOrdering::Monotonic ||
1759 Ordering == AtomicOrdering::Unordered);
1760 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1761
1762 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1763 }
1764
1766 if (IsLoad) {
1767 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1768 MIRBuilder.buildMergeLikeInstr(
1769 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1770 } else {
1771 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1772 NewI = MIRBuilder.buildInstr(
1773 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1774 }
1775
1776 if (IsRcpC3) {
1777 NewI.addUse(MI.getOperand(1).getReg());
1778 } else {
1779 Register Base;
1780 int Offset;
1781 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1782 NewI.addUse(Base);
1783 NewI.addImm(Offset / 8);
1784 }
1785
1786 NewI.cloneMemRefs(MI);
1788 *MRI.getTargetRegisterInfo(),
1789 *ST->getRegBankInfo());
1790 MI.eraseFromParent();
1791 return true;
1792 }
1793
1794 if (!ValTy.isPointerVector() ||
1795 ValTy.getElementType().getAddressSpace() != 0) {
1796 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1797 return false;
1798 }
1799
1800 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1801 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1802 auto &MMO = **MI.memoperands_begin();
1803 MMO.setType(NewTy);
1804
1805 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1806 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1807 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1808 } else {
1809 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1810 MIRBuilder.buildBitcast(ValReg, NewLoad);
1811 }
1812 MI.eraseFromParent();
1813 return true;
1814}
1815
1816bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1818 MachineIRBuilder &MIRBuilder) const {
1819 MachineFunction &MF = MIRBuilder.getMF();
1820 Align Alignment(MI.getOperand(2).getImm());
1821 Register Dst = MI.getOperand(0).getReg();
1822 Register ListPtr = MI.getOperand(1).getReg();
1823
1824 LLT PtrTy = MRI.getType(ListPtr);
1825 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1826
1827 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1828 const Align PtrAlign = Align(PtrSize);
1829 auto List = MIRBuilder.buildLoad(
1830 PtrTy, ListPtr,
1832 PtrTy, PtrAlign));
1833
1834 MachineInstrBuilder DstPtr;
1835 if (Alignment > PtrAlign) {
1836 // Realign the list to the actual required alignment.
1837 auto AlignMinus1 =
1838 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1839 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1840 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1841 } else
1842 DstPtr = List;
1843
1844 LLT ValTy = MRI.getType(Dst);
1845 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1846 MIRBuilder.buildLoad(
1847 Dst, DstPtr,
1849 ValTy, std::max(Alignment, PtrAlign)));
1850
1851 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1852
1853 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1854
1855 MIRBuilder.buildStore(NewList, ListPtr,
1858 PtrTy, PtrAlign));
1859
1860 MI.eraseFromParent();
1861 return true;
1862}
1863
1864bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1866 // Only legal if we can select immediate forms.
1867 // TODO: Lower this otherwise.
1868 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1869 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1870}
1871
1872bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1874 LegalizerHelper &Helper) const {
1875 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1876 // it can be more efficiently lowered to the following sequence that uses
1877 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1878 // registers are cheap.
1879 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1880 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1881 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1882 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1883 //
1884 // For 128 bit vector popcounts, we lower to the following sequence:
1885 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1886 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1887 // uaddlp.4s v0, v0 // v4s32, v2s64
1888 // uaddlp.2d v0, v0 // v2s64
1889 //
1890 // For 64 bit vector popcounts, we lower to the following sequence:
1891 // cnt.8b v0, v0 // v4s16, v2s32
1892 // uaddlp.4h v0, v0 // v4s16, v2s32
1893 // uaddlp.2s v0, v0 // v2s32
1894
1895 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1896 Register Dst = MI.getOperand(0).getReg();
1897 Register Val = MI.getOperand(1).getReg();
1898 LLT Ty = MRI.getType(Val);
1899 unsigned Size = Ty.getSizeInBits();
1900
1901 assert(Ty == MRI.getType(Dst) &&
1902 "Expected src and dst to have the same type!");
1903
1904 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1905 LLT s64 = LLT::scalar(64);
1906
1907 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1908 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1909 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1910 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1911
1912 MIRBuilder.buildZExt(Dst, Add);
1913 MI.eraseFromParent();
1914 return true;
1915 }
1916
1917 if (!ST->hasNEON() ||
1918 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1919 // Use generic lowering when custom lowering is not possible.
1920 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1921 Helper.lowerBitCount(MI) ==
1923 }
1924
1925 // Pre-conditioning: widen Val up to the nearest vector type.
1926 // s32,s64,v4s16,v2s32 -> v8i8
1927 // v8s16,v4s32,v2s64 -> v16i8
1928 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1929 if (Ty.isScalar()) {
1930 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1931 if (Size == 32) {
1932 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1933 }
1934 }
1935 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1936
1937 // Count bits in each byte-sized lane.
1938 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1939
1940 // Sum across lanes.
1941
1942 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
1943 Ty.getScalarSizeInBits() != 16) {
1944 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
1945 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
1946 auto Ones = MIRBuilder.buildConstant(VTy, 1);
1948
1949 if (Ty == LLT::fixed_vector(2, 64)) {
1950 auto UDOT =
1951 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1952 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
1953 } else if (Ty == LLT::fixed_vector(4, 32)) {
1954 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1955 } else if (Ty == LLT::fixed_vector(2, 32)) {
1956 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1957 } else {
1958 llvm_unreachable("unexpected vector shape");
1959 }
1960
1961 Sum->getOperand(0).setReg(Dst);
1962 MI.eraseFromParent();
1963 return true;
1964 }
1965
1966 Register HSum = CTPOP.getReg(0);
1967 unsigned Opc;
1968 SmallVector<LLT> HAddTys;
1969 if (Ty.isScalar()) {
1970 Opc = Intrinsic::aarch64_neon_uaddlv;
1971 HAddTys.push_back(LLT::scalar(32));
1972 } else if (Ty == LLT::fixed_vector(8, 16)) {
1973 Opc = Intrinsic::aarch64_neon_uaddlp;
1974 HAddTys.push_back(LLT::fixed_vector(8, 16));
1975 } else if (Ty == LLT::fixed_vector(4, 32)) {
1976 Opc = Intrinsic::aarch64_neon_uaddlp;
1977 HAddTys.push_back(LLT::fixed_vector(8, 16));
1978 HAddTys.push_back(LLT::fixed_vector(4, 32));
1979 } else if (Ty == LLT::fixed_vector(2, 64)) {
1980 Opc = Intrinsic::aarch64_neon_uaddlp;
1981 HAddTys.push_back(LLT::fixed_vector(8, 16));
1982 HAddTys.push_back(LLT::fixed_vector(4, 32));
1983 HAddTys.push_back(LLT::fixed_vector(2, 64));
1984 } else if (Ty == LLT::fixed_vector(4, 16)) {
1985 Opc = Intrinsic::aarch64_neon_uaddlp;
1986 HAddTys.push_back(LLT::fixed_vector(4, 16));
1987 } else if (Ty == LLT::fixed_vector(2, 32)) {
1988 Opc = Intrinsic::aarch64_neon_uaddlp;
1989 HAddTys.push_back(LLT::fixed_vector(4, 16));
1990 HAddTys.push_back(LLT::fixed_vector(2, 32));
1991 } else
1992 llvm_unreachable("unexpected vector shape");
1994 for (LLT HTy : HAddTys) {
1995 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
1996 HSum = UADD.getReg(0);
1997 }
1998
1999 // Post-conditioning.
2000 if (Ty.isScalar() && (Size == 64 || Size == 128))
2001 MIRBuilder.buildZExt(Dst, UADD);
2002 else
2003 UADD->getOperand(0).setReg(Dst);
2004 MI.eraseFromParent();
2005 return true;
2006}
2007
2008bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2010 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2011 LLT s64 = LLT::scalar(64);
2012 auto Addr = MI.getOperand(1).getReg();
2013 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2014 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2015 auto DstLo = MRI.createGenericVirtualRegister(s64);
2016 auto DstHi = MRI.createGenericVirtualRegister(s64);
2017
2019 if (ST->hasLSE()) {
2020 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2021 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2022 // the rest of the MIR so we must reassemble the extracted registers into a
2023 // 128-bit known-regclass one with code like this:
2024 //
2025 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2026 // %out = CASP %in1, ...
2027 // %OldLo = G_EXTRACT %out, 0
2028 // %OldHi = G_EXTRACT %out, 64
2029 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2030 unsigned Opcode;
2031 switch (Ordering) {
2033 Opcode = AArch64::CASPAX;
2034 break;
2036 Opcode = AArch64::CASPLX;
2037 break;
2040 Opcode = AArch64::CASPALX;
2041 break;
2042 default:
2043 Opcode = AArch64::CASPX;
2044 break;
2045 }
2046
2047 LLT s128 = LLT::scalar(128);
2048 auto CASDst = MRI.createGenericVirtualRegister(s128);
2049 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2050 auto CASNew = MRI.createGenericVirtualRegister(s128);
2051 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2052 .addUse(DesiredI->getOperand(0).getReg())
2053 .addImm(AArch64::sube64)
2054 .addUse(DesiredI->getOperand(1).getReg())
2055 .addImm(AArch64::subo64);
2056 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2057 .addUse(NewI->getOperand(0).getReg())
2058 .addImm(AArch64::sube64)
2059 .addUse(NewI->getOperand(1).getReg())
2060 .addImm(AArch64::subo64);
2061
2062 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2063
2064 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2065 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2066 } else {
2067 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2068 // can take arbitrary registers so it just has the normal GPR64 operands the
2069 // rest of AArch64 is expecting.
2070 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2071 unsigned Opcode;
2072 switch (Ordering) {
2074 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2075 break;
2077 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2078 break;
2081 Opcode = AArch64::CMP_SWAP_128;
2082 break;
2083 default:
2084 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2085 break;
2086 }
2087
2088 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2089 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2090 {Addr, DesiredI->getOperand(0),
2091 DesiredI->getOperand(1), NewI->getOperand(0),
2092 NewI->getOperand(1)});
2093 }
2094
2095 CAS.cloneMemRefs(MI);
2097 *MRI.getTargetRegisterInfo(),
2098 *ST->getRegBankInfo());
2099
2100 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2101 MI.eraseFromParent();
2102 return true;
2103}
2104
2105bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2106 LegalizerHelper &Helper) const {
2107 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2108 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2109 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2110 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2111 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2112 MI.eraseFromParent();
2113 return true;
2114}
2115
2116bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2117 LegalizerHelper &Helper) const {
2118 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2119
2120 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2121 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2122 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2123 // the instruction).
2124 auto &Value = MI.getOperand(1);
2125 Register ExtValueReg =
2126 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2127 Value.setReg(ExtValueReg);
2128 return true;
2129 }
2130
2131 return false;
2132}
2133
2134bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2136 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2137 auto VRegAndVal =
2138 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2139 if (VRegAndVal)
2140 return true;
2141 return Helper.lowerExtractInsertVectorElt(MI) !=
2143}
2144
2145bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2146 MachineInstr &MI, LegalizerHelper &Helper) const {
2147 MachineFunction &MF = *MI.getParent()->getParent();
2148 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2149 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2150
2151 // If stack probing is not enabled for this function, use the default
2152 // lowering.
2153 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2154 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2155 "inline-asm") {
2156 Helper.lowerDynStackAlloc(MI);
2157 return true;
2158 }
2159
2160 Register Dst = MI.getOperand(0).getReg();
2161 Register AllocSize = MI.getOperand(1).getReg();
2162 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2163
2164 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2165 "Unexpected type for dynamic alloca");
2166 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2167 "Unexpected type for dynamic alloca");
2168
2169 LLT PtrTy = MRI.getType(Dst);
2170 Register SPReg =
2172 Register SPTmp =
2173 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2174 auto NewMI =
2175 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2176 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2177 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2178 MIRBuilder.buildCopy(Dst, SPTmp);
2179
2180 MI.eraseFromParent();
2181 return true;
2182}
2183
2184bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2185 LegalizerHelper &Helper) const {
2186 MachineIRBuilder &MIB = Helper.MIRBuilder;
2187 auto &AddrVal = MI.getOperand(0);
2188
2189 int64_t IsWrite = MI.getOperand(1).getImm();
2190 int64_t Locality = MI.getOperand(2).getImm();
2191 int64_t IsData = MI.getOperand(3).getImm();
2192
2193 bool IsStream = Locality == 0;
2194 if (Locality != 0) {
2195 assert(Locality <= 3 && "Prefetch locality out-of-range");
2196 // The locality degree is the opposite of the cache speed.
2197 // Put the number the other way around.
2198 // The encoding starts at 0 for level 1
2199 Locality = 3 - Locality;
2200 }
2201
2202 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2203
2204 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2205 MI.eraseFromParent();
2206 return true;
2207}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...