LLVM 19.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
65 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
66 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
67 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
68
69 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
70 v16s8, v8s16, v4s32,
71 v2s64, v2p0,
72 /* End 128bit types */
73 /* Begin 64bit types */
74 v8s8, v4s16, v2s32};
75 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
76 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
77 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
78
79 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
80
81 // FIXME: support subtargets which have neon/fp-armv8 disabled.
82 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
84 return;
85 }
86
87 // Some instructions only support s16 if the subtarget has full 16-bit FP
88 // support.
89 const bool HasFP16 = ST.hasFullFP16();
90 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
91
92 const bool HasCSSC = ST.hasCSSC();
93 const bool HasRCPC3 = ST.hasRCPC3();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor(PackedVectorAllTypeList)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampNumElements(0, v2s64, v2s64);
107
109 .legalFor({p0, s16, s32, s64})
110 .legalFor(PackedVectorAllTypeList)
112 .clampScalar(0, s16, s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(0, s8, 16)
115 .clampMaxNumElements(0, s16, 8)
116 .clampMaxNumElements(0, s32, 4)
117 .clampMaxNumElements(0, s64, 2)
118 .clampMaxNumElements(0, p0, 2);
119
121 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
123 .clampScalar(0, s32, s64)
124 .clampNumElements(0, v4s16, v8s16)
125 .clampNumElements(0, v2s32, v4s32)
126 .clampNumElements(0, v2s64, v2s64)
127 .moreElementsToNextPow2(0);
128
129 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
130 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
131 .widenScalarToNextPow2(0)
132 .clampScalar(0, s32, s64)
133 .clampMaxNumElements(0, s8, 16)
134 .clampMaxNumElements(0, s16, 8)
135 .clampNumElements(0, v2s32, v4s32)
136 .clampNumElements(0, v2s64, v2s64)
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 2;
140 },
141 0, s32)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 4;
145 },
146 0, s16)
147 .minScalarOrEltIf(
148 [=](const LegalityQuery &Query) {
149 return Query.Types[0].getNumElements() <= 16;
150 },
151 0, s8)
153
154 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
155 .customIf([=](const LegalityQuery &Query) {
156 const auto &SrcTy = Query.Types[0];
157 const auto &AmtTy = Query.Types[1];
158 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
159 AmtTy.getSizeInBits() == 32;
160 })
161 .legalFor({
162 {s32, s32},
163 {s32, s64},
164 {s64, s64},
165 {v8s8, v8s8},
166 {v16s8, v16s8},
167 {v4s16, v4s16},
168 {v8s16, v8s16},
169 {v2s32, v2s32},
170 {v4s32, v4s32},
171 {v2s64, v2s64},
172 })
173 .widenScalarToNextPow2(0)
174 .clampScalar(1, s32, s64)
175 .clampScalar(0, s32, s64)
176 .clampNumElements(0, v8s8, v16s8)
177 .clampNumElements(0, v4s16, v8s16)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 .minScalarSameAs(1, 0);
182
184 .legalFor({{p0, s64}, {v2p0, v2s64}})
185 .clampScalarOrElt(1, s64, s64)
186 .clampNumElements(0, v2p0, v2p0);
187
188 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
189
190 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
191 .legalFor({s32, s64})
192 .libcallFor({s128})
193 .clampScalar(0, s32, s64)
195 .scalarize(0);
196
197 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
198 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
200 .clampScalarOrElt(0, s32, s64)
201 .clampNumElements(0, v2s32, v4s32)
202 .clampNumElements(0, v2s64, v2s64)
203 .moreElementsToNextPow2(0);
204
205
206 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
207 .widenScalarToNextPow2(0, /*Min = */ 32)
208 .clampScalar(0, s32, s64)
209 .lower();
210
211 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
212 .legalFor({s64, v8s16, v16s8, v4s32})
213 .lower();
214
215 auto &MinMaxActions = getActionDefinitionsBuilder(
216 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
217 if (HasCSSC)
218 MinMaxActions
219 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
220 // Making clamping conditional on CSSC extension as without legal types we
221 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
222 // if we detect a type smaller than 32-bit.
223 .minScalar(0, s32);
224 else
225 MinMaxActions
226 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
227 MinMaxActions
228 .clampNumElements(0, v8s8, v16s8)
229 .clampNumElements(0, v4s16, v8s16)
230 .clampNumElements(0, v2s32, v4s32)
231 // FIXME: This sholdn't be needed as v2s64 types are going to
232 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
233 .clampNumElements(0, v2s64, v2s64)
234 .lower();
235
237 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
238 .legalFor({{s32, s32}, {s64, s32}})
239 .clampScalar(0, s32, s64)
240 .clampScalar(1, s32, s64)
242
243 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
244 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
245 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
246 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
247 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
248 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
249 .legalIf([=](const LegalityQuery &Query) {
250 const auto &Ty = Query.Types[0];
251 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
252 })
253 .libcallFor({s128})
254 .minScalarOrElt(0, MinFPScalar)
255 .clampNumElements(0, v4s16, v8s16)
256 .clampNumElements(0, v2s32, v4s32)
257 .clampNumElements(0, v2s64, v2s64)
259
261 .libcallFor({s32, s64})
262 .minScalar(0, s32)
263 .scalarize(0);
264
265 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
266 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
267 .libcallFor({{s64, s128}})
268 .minScalarOrElt(1, MinFPScalar);
269
270 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
271 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10})
272 // We need a call for these, so we always need to scalarize.
273 .scalarize(0)
274 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
275 .minScalar(0, s32)
276 .libcallFor({s32, s64});
278 .scalarize(0)
279 .minScalar(0, s32)
280 .libcallFor({{s32, s32}, {s64, s32}});
281
283 .legalIf(all(typeInSet(0, {s32, s64, p0}),
284 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
286 .clampScalar(0, s32, s64)
288 .minScalar(1, s8)
289 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
290 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
291
293 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
294 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
296 .clampScalar(1, s32, s128)
298 .minScalar(0, s16)
299 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
300 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
301 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
302
303
304 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
305 auto &Actions = getActionDefinitionsBuilder(Op);
306
307 if (Op == G_SEXTLOAD)
309
310 // Atomics have zero extending behavior.
311 Actions
312 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
313 {s32, p0, s16, 8},
314 {s32, p0, s32, 8},
315 {s64, p0, s8, 2},
316 {s64, p0, s16, 2},
317 {s64, p0, s32, 4},
318 {s64, p0, s64, 8},
319 {p0, p0, s64, 8},
320 {v2s32, p0, s64, 8}})
321 .widenScalarToNextPow2(0)
322 .clampScalar(0, s32, s64)
323 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
324 // how to do that yet.
325 .unsupportedIfMemSizeNotPow2()
326 // Lower anything left over into G_*EXT and G_LOAD
327 .lower();
328 }
329
330 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
331 const LLT &ValTy = Query.Types[0];
332 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
333 };
334
335 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
336 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
337
338 if (ST.hasSVE()) {
339 LoadActions.legalForTypesWithMemDesc({
340 // 128 bit base sizes
341 {nxv16s8, p0, nxv16s8, 8},
342 {nxv8s16, p0, nxv8s16, 8},
343 {nxv4s32, p0, nxv4s32, 8},
344 {nxv2s64, p0, nxv2s64, 8},
345 });
346
347 // TODO: Add nxv2p0. Consider bitcastIf.
348 // See #92130
349 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
350 StoreActions.legalForTypesWithMemDesc({
351 // 128 bit base sizes
352 {nxv16s8, p0, nxv16s8, 8},
353 {nxv8s16, p0, nxv8s16, 8},
354 {nxv4s32, p0, nxv4s32, 8},
355 {nxv2s64, p0, nxv2s64, 8},
356 });
357 }
358
359 LoadActions
360 .customIf([=](const LegalityQuery &Query) {
361 return HasRCPC3 && Query.Types[0] == s128 &&
362 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
363 })
364 .customIf([=](const LegalityQuery &Query) {
365 return Query.Types[0] == s128 &&
366 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
367 })
368 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
369 {s16, p0, s16, 8},
370 {s32, p0, s32, 8},
371 {s64, p0, s64, 8},
372 {p0, p0, s64, 8},
373 {s128, p0, s128, 8},
374 {v8s8, p0, s64, 8},
375 {v16s8, p0, s128, 8},
376 {v4s16, p0, s64, 8},
377 {v8s16, p0, s128, 8},
378 {v2s32, p0, s64, 8},
379 {v4s32, p0, s128, 8},
380 {v2s64, p0, s128, 8}})
381 // These extends are also legal
382 .legalForTypesWithMemDesc(
383 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
384 .widenScalarToNextPow2(0, /* MinSize = */ 8)
385 .clampMaxNumElements(0, s8, 16)
386 .clampMaxNumElements(0, s16, 8)
387 .clampMaxNumElements(0, s32, 4)
388 .clampMaxNumElements(0, s64, 2)
389 .clampMaxNumElements(0, p0, 2)
390 .lowerIfMemSizeNotByteSizePow2()
391 .clampScalar(0, s8, s64)
392 .narrowScalarIf(
393 [=](const LegalityQuery &Query) {
394 // Clamp extending load results to 32-bits.
395 return Query.Types[0].isScalar() &&
396 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
397 Query.Types[0].getSizeInBits() > 32;
398 },
399 changeTo(0, s32))
400 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
401 .bitcastIf(typeInSet(0, {v4s8}),
402 [=](const LegalityQuery &Query) {
403 const LLT VecTy = Query.Types[0];
404 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
405 })
406 .customIf(IsPtrVecPred)
407 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
408
409 StoreActions
410 .customIf([=](const LegalityQuery &Query) {
411 return HasRCPC3 && Query.Types[0] == s128 &&
412 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
413 })
414 .customIf([=](const LegalityQuery &Query) {
415 return Query.Types[0] == s128 &&
416 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
417 })
418 .legalForTypesWithMemDesc(
419 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
420 {s32, p0, s8, 8}, // truncstorei8 from s32
421 {s64, p0, s8, 8}, // truncstorei8 from s64
422 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
423 {s64, p0, s16, 8}, // truncstorei16 from s64
424 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
425 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
426 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
427 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
428 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
429 .clampScalar(0, s8, s64)
430 .lowerIf([=](const LegalityQuery &Query) {
431 return Query.Types[0].isScalar() &&
432 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
433 })
434 // Maximum: sN * k = 128
435 .clampMaxNumElements(0, s8, 16)
436 .clampMaxNumElements(0, s16, 8)
437 .clampMaxNumElements(0, s32, 4)
438 .clampMaxNumElements(0, s64, 2)
439 .clampMaxNumElements(0, p0, 2)
440 .lowerIfMemSizeNotPow2()
441 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
442 .bitcastIf(typeInSet(0, {v4s8}),
443 [=](const LegalityQuery &Query) {
444 const LLT VecTy = Query.Types[0];
445 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
446 })
447 .customIf(IsPtrVecPred)
448 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
449
450 getActionDefinitionsBuilder(G_INDEXED_STORE)
451 // Idx 0 == Ptr, Idx 1 == Val
452 // TODO: we can implement legalizations but as of now these are
453 // generated in a very specific way.
455 {p0, s8, s8, 8},
456 {p0, s16, s16, 8},
457 {p0, s32, s8, 8},
458 {p0, s32, s16, 8},
459 {p0, s32, s32, 8},
460 {p0, s64, s64, 8},
461 {p0, p0, p0, 8},
462 {p0, v8s8, v8s8, 8},
463 {p0, v16s8, v16s8, 8},
464 {p0, v4s16, v4s16, 8},
465 {p0, v8s16, v8s16, 8},
466 {p0, v2s32, v2s32, 8},
467 {p0, v4s32, v4s32, 8},
468 {p0, v2s64, v2s64, 8},
469 {p0, v2p0, v2p0, 8},
470 {p0, s128, s128, 8},
471 })
472 .unsupported();
473
474 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
475 LLT LdTy = Query.Types[0];
476 LLT PtrTy = Query.Types[1];
477 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
478 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
479 return false;
480 if (PtrTy != p0)
481 return false;
482 return true;
483 };
484 getActionDefinitionsBuilder(G_INDEXED_LOAD)
487 .legalIf(IndexedLoadBasicPred)
488 .unsupported();
489 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
490 .unsupportedIf(
492 .legalIf(all(typeInSet(0, {s16, s32, s64}),
493 LegalityPredicate([=](const LegalityQuery &Q) {
494 LLT LdTy = Q.Types[0];
495 LLT PtrTy = Q.Types[1];
496 LLT MemTy = Q.MMODescrs[0].MemoryTy;
497 if (PtrTy != p0)
498 return false;
499 if (LdTy == s16)
500 return MemTy == s8;
501 if (LdTy == s32)
502 return MemTy == s8 || MemTy == s16;
503 if (LdTy == s64)
504 return MemTy == s8 || MemTy == s16 || MemTy == s32;
505 return false;
506 })))
507 .unsupported();
508
509 // Constants
511 .legalFor({p0, s8, s16, s32, s64})
512 .widenScalarToNextPow2(0)
513 .clampScalar(0, s8, s64);
514 getActionDefinitionsBuilder(G_FCONSTANT)
515 .legalIf([=](const LegalityQuery &Query) {
516 const auto &Ty = Query.Types[0];
517 if (HasFP16 && Ty == s16)
518 return true;
519 return Ty == s32 || Ty == s64 || Ty == s128;
520 })
521 .clampScalar(0, MinFPScalar, s128);
522
523 // FIXME: fix moreElementsToNextPow2
525 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
527 .clampScalar(1, s32, s64)
528 .clampScalar(0, s32, s32)
529 .minScalarEltSameAsIf(
530 [=](const LegalityQuery &Query) {
531 const LLT &Ty = Query.Types[0];
532 const LLT &SrcTy = Query.Types[1];
533 return Ty.isVector() && !SrcTy.isPointerVector() &&
534 Ty.getElementType() != SrcTy.getElementType();
535 },
536 0, 1)
537 .minScalarOrEltIf(
538 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
539 1, s32)
540 .minScalarOrEltIf(
541 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
542 s64)
544 .clampNumElements(1, v8s8, v16s8)
545 .clampNumElements(1, v4s16, v8s16)
546 .clampNumElements(1, v2s32, v4s32)
547 .clampNumElements(1, v2s64, v2s64)
548 .customIf(isVector(0));
549
551 .legalFor({{s32, MinFPScalar},
552 {s32, s32},
553 {s32, s64},
554 {v4s32, v4s32},
555 {v2s32, v2s32},
556 {v2s64, v2s64}})
557 .legalIf([=](const LegalityQuery &Query) {
558 const auto &Ty = Query.Types[1];
559 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
560 })
562 .clampScalar(0, s32, s32)
563 .clampScalarOrElt(1, MinFPScalar, s64)
564 .minScalarEltSameAsIf(
565 [=](const LegalityQuery &Query) {
566 const LLT &Ty = Query.Types[0];
567 const LLT &SrcTy = Query.Types[1];
568 return Ty.isVector() && !SrcTy.isPointerVector() &&
569 Ty.getElementType() != SrcTy.getElementType();
570 },
571 0, 1)
572 .clampNumElements(1, v4s16, v8s16)
573 .clampNumElements(1, v2s32, v4s32)
574 .clampMaxNumElements(1, s64, 2)
575 .moreElementsToNextPow2(1);
576
577 // Extensions
578 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
579 unsigned DstSize = Query.Types[0].getSizeInBits();
580
581 // Handle legal vectors using legalFor
582 if (Query.Types[0].isVector())
583 return false;
584
585 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
586 return false; // Extending to a scalar s128 needs narrowing.
587
588 const LLT &SrcTy = Query.Types[1];
589
590 // Make sure we fit in a register otherwise. Don't bother checking that
591 // the source type is below 128 bits. We shouldn't be allowing anything
592 // through which is wider than the destination in the first place.
593 unsigned SrcSize = SrcTy.getSizeInBits();
594 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
595 return false;
596
597 return true;
598 };
599 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
600 .legalIf(ExtLegalFunc)
601 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
602 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
604 .clampMaxNumElements(1, s8, 8)
605 .clampMaxNumElements(1, s16, 4)
606 .clampMaxNumElements(1, s32, 2)
607 // Tries to convert a large EXTEND into two smaller EXTENDs
608 .lowerIf([=](const LegalityQuery &Query) {
609 return (Query.Types[0].getScalarSizeInBits() >
610 Query.Types[1].getScalarSizeInBits() * 2) &&
611 Query.Types[0].isVector() &&
612 (Query.Types[1].getScalarSizeInBits() == 8 ||
613 Query.Types[1].getScalarSizeInBits() == 16);
614 })
615 .clampMinNumElements(1, s8, 8)
616 .clampMinNumElements(1, s16, 4);
617
619 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
621 .clampMaxNumElements(0, s8, 8)
622 .clampMaxNumElements(0, s16, 4)
623 .clampMaxNumElements(0, s32, 2)
624 .minScalarOrEltIf(
625 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
626 0, s8)
627 .lowerIf([=](const LegalityQuery &Query) {
628 LLT DstTy = Query.Types[0];
629 LLT SrcTy = Query.Types[1];
630 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
631 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
632 })
633 .clampMinNumElements(0, s8, 8)
634 .clampMinNumElements(0, s16, 4)
635 .alwaysLegal();
636
637 getActionDefinitionsBuilder(G_SEXT_INREG)
638 .legalFor({s32, s64})
639 .legalFor(PackedVectorAllTypeList)
640 .maxScalar(0, s64)
641 .clampNumElements(0, v8s8, v16s8)
642 .clampNumElements(0, v4s16, v8s16)
643 .clampNumElements(0, v2s32, v4s32)
644 .clampMaxNumElements(0, s64, 2)
645 .lower();
646
647 // FP conversions
649 .legalFor(
650 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
651 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
652 .clampNumElements(0, v4s16, v4s16)
653 .clampNumElements(0, v2s32, v2s32)
654 .scalarize(0);
655
657 .legalFor(
658 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
659 .clampNumElements(0, v4s32, v4s32)
660 .clampNumElements(0, v2s64, v2s64)
661 .scalarize(0);
662
663 // Conversions
664 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
665 .legalFor({{s32, s32},
666 {s64, s32},
667 {s32, s64},
668 {s64, s64},
669 {v2s64, v2s64},
670 {v4s32, v4s32},
671 {v2s32, v2s32}})
672 .legalIf([=](const LegalityQuery &Query) {
673 return HasFP16 &&
674 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
675 Query.Types[1] == v8s16) &&
676 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
677 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
678 })
679 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
680 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
681 // The range of a fp16 value fits into an i17, so we can lower the width
682 // to i64.
683 .narrowScalarIf(
684 [=](const LegalityQuery &Query) {
685 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
686 },
687 changeTo(0, s64))
689 .widenScalarOrEltToNextPow2OrMinSize(0)
690 .minScalar(0, s32)
691 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
692 .widenScalarIf(
693 [=](const LegalityQuery &Query) {
694 return Query.Types[0].getScalarSizeInBits() <= 64 &&
695 Query.Types[0].getScalarSizeInBits() >
696 Query.Types[1].getScalarSizeInBits();
697 },
699 .widenScalarIf(
700 [=](const LegalityQuery &Query) {
701 return Query.Types[1].getScalarSizeInBits() <= 64 &&
702 Query.Types[0].getScalarSizeInBits() <
703 Query.Types[1].getScalarSizeInBits();
704 },
706 .clampNumElements(0, v4s16, v8s16)
707 .clampNumElements(0, v2s32, v4s32)
708 .clampMaxNumElements(0, s64, 2)
709 .libcallFor(
710 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
711
712 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
713 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
714 .legalIf([=](const LegalityQuery &Query) {
715 return HasFP16 &&
716 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
717 Query.Types[0] == v8s16) &&
718 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
719 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
720 })
721 .widenScalarToNextPow2(1)
722 .clampScalar(1, s32, s64)
724 .clampScalarOrElt(0, MinFPScalar, s64)
727 [=](const LegalityQuery &Query) {
728 return Query.Types[0].getScalarSizeInBits() <
729 Query.Types[1].getScalarSizeInBits();
730 },
732 .widenScalarIf(
733 [=](const LegalityQuery &Query) {
734 return Query.Types[0].getScalarSizeInBits() >
735 Query.Types[1].getScalarSizeInBits();
736 },
738 .clampNumElements(0, v4s16, v8s16)
739 .clampNumElements(0, v2s32, v4s32)
740 .clampMaxNumElements(0, s64, 2);
741
742 // Control-flow
744 .legalFor({s32})
745 .clampScalar(0, s32, s32);
746 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
747
749 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
750 .widenScalarToNextPow2(0)
751 .clampScalar(0, s32, s64)
752 .clampScalar(1, s32, s32)
754 .lowerIf(isVector(0));
755
756 // Pointer-handling
757 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
758
759 if (TM.getCodeModel() == CodeModel::Small)
760 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
761 else
762 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
763
764 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
765 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
766
768 .legalFor({{s64, p0}, {v2s64, v2p0}})
769 .widenScalarToNextPow2(0, 64)
770 .clampScalar(0, s64, s64);
771
773 .unsupportedIf([&](const LegalityQuery &Query) {
774 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
775 })
776 .legalFor({{p0, s64}, {v2p0, v2s64}});
777
778 // Casts for 32 and 64-bit width type are just copies.
779 // Same for 128-bit width type, except they are on the FPR bank.
781 // Keeping 32-bit instructions legal to prevent regression in some tests
782 .legalForCartesianProduct({s32, v2s16, v4s8})
783 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
784 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
785 .lowerIf([=](const LegalityQuery &Query) {
786 return Query.Types[0].isVector() != Query.Types[1].isVector();
787 })
789 .clampNumElements(0, v8s8, v16s8)
790 .clampNumElements(0, v4s16, v8s16)
791 .clampNumElements(0, v2s32, v4s32)
792 .lower();
793
794 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
795
796 // va_list must be a pointer, but most sized types are pretty easy to handle
797 // as the destination.
799 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
800 .clampScalar(0, s8, s64)
801 .widenScalarToNextPow2(0, /*Min*/ 8);
802
803 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
804 .lowerIf(
805 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
806
807 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
808 return ST.outlineAtomics() && !ST.hasLSE();
809 };
810
811 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
812 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
813 predNot(UseOutlineAtomics)))
814 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
815 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
816 return Query.Types[0].getSizeInBits() == 128 &&
817 !UseOutlineAtomics(Query);
818 })
819 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
820 UseOutlineAtomics))
821 .clampScalar(0, s32, s64);
822
823 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
824 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
825 G_ATOMICRMW_XOR})
826 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
827 predNot(UseOutlineAtomics)))
828 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
829 UseOutlineAtomics))
830 .clampScalar(0, s32, s64);
831
832 // Do not outline these atomics operations, as per comment in
833 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
835 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
836 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
837 .clampScalar(0, s32, s64);
838
839 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
840
841 // Merge/Unmerge
842 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
843 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
844 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
846 .widenScalarToNextPow2(LitTyIdx, 8)
847 .widenScalarToNextPow2(BigTyIdx, 32)
848 .clampScalar(LitTyIdx, s8, s64)
849 .clampScalar(BigTyIdx, s32, s128)
850 .legalIf([=](const LegalityQuery &Q) {
851 switch (Q.Types[BigTyIdx].getSizeInBits()) {
852 case 32:
853 case 64:
854 case 128:
855 break;
856 default:
857 return false;
858 }
859 switch (Q.Types[LitTyIdx].getSizeInBits()) {
860 case 8:
861 case 16:
862 case 32:
863 case 64:
864 return true;
865 default:
866 return false;
867 }
868 });
869 }
870
871 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
872 .unsupportedIf([=](const LegalityQuery &Query) {
873 const LLT &EltTy = Query.Types[1].getElementType();
874 return Query.Types[0] != EltTy;
875 })
876 .minScalar(2, s64)
877 .customIf([=](const LegalityQuery &Query) {
878 const LLT &VecTy = Query.Types[1];
879 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
880 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
881 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
882 })
883 .minScalarOrEltIf(
884 [=](const LegalityQuery &Query) {
885 // We want to promote to <M x s1> to <M x s64> if that wouldn't
886 // cause the total vec size to be > 128b.
887 return Query.Types[1].getNumElements() <= 2;
888 },
889 0, s64)
890 .minScalarOrEltIf(
891 [=](const LegalityQuery &Query) {
892 return Query.Types[1].getNumElements() <= 4;
893 },
894 0, s32)
895 .minScalarOrEltIf(
896 [=](const LegalityQuery &Query) {
897 return Query.Types[1].getNumElements() <= 8;
898 },
899 0, s16)
900 .minScalarOrEltIf(
901 [=](const LegalityQuery &Query) {
902 return Query.Types[1].getNumElements() <= 16;
903 },
904 0, s8)
905 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
907 .clampMaxNumElements(1, s64, 2)
908 .clampMaxNumElements(1, s32, 4)
909 .clampMaxNumElements(1, s16, 8)
910 .clampMaxNumElements(1, s8, 16)
911 .clampMaxNumElements(1, p0, 2);
912
913 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
914 .legalIf(
915 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
918 .clampNumElements(0, v8s8, v16s8)
919 .clampNumElements(0, v4s16, v8s16)
920 .clampNumElements(0, v2s32, v4s32)
921 .clampMaxNumElements(0, s64, 2)
922 .clampMaxNumElements(0, p0, 2);
923
924 getActionDefinitionsBuilder(G_BUILD_VECTOR)
925 .legalFor({{v8s8, s8},
926 {v16s8, s8},
927 {v4s16, s16},
928 {v8s16, s16},
929 {v2s32, s32},
930 {v4s32, s32},
931 {v2p0, p0},
932 {v2s64, s64}})
933 .clampNumElements(0, v4s32, v4s32)
934 .clampNumElements(0, v2s64, v2s64)
935 .minScalarOrElt(0, s8)
937 .minScalarSameAs(1, 0);
938
939 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
940
943 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
944 .scalarize(1)
945 .widenScalarToNextPow2(1, /*Min=*/32)
946 .clampScalar(1, s32, s64)
947 .scalarSameSizeAs(0, 1);
948 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
949
950 // TODO: Custom lowering for v2s32, v4s32, v2s64.
951 getActionDefinitionsBuilder(G_BITREVERSE)
952 .legalFor({s32, s64, v8s8, v16s8})
953 .widenScalarToNextPow2(0, /*Min = */ 32)
954 .clampScalar(0, s32, s64);
955
956 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
957
959 .lowerIf(isVector(0))
960 .widenScalarToNextPow2(1, /*Min=*/32)
961 .clampScalar(1, s32, s64)
962 .scalarSameSizeAs(0, 1)
963 .legalIf([=](const LegalityQuery &Query) {
964 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
965 })
966 .customIf([=](const LegalityQuery &Query) {
967 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
968 });
969
970 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
971 .legalIf([=](const LegalityQuery &Query) {
972 const LLT &DstTy = Query.Types[0];
973 const LLT &SrcTy = Query.Types[1];
974 // For now just support the TBL2 variant which needs the source vectors
975 // to be the same size as the dest.
976 if (DstTy != SrcTy)
977 return false;
978 return llvm::is_contained(
979 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
980 })
981 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
982 // just want those lowered into G_BUILD_VECTOR
983 .lowerIf([=](const LegalityQuery &Query) {
984 return !Query.Types[1].isVector();
985 })
986 .moreElementsIf(
987 [](const LegalityQuery &Query) {
988 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
989 Query.Types[0].getNumElements() >
990 Query.Types[1].getNumElements();
991 },
992 changeTo(1, 0))
994 .moreElementsIf(
995 [](const LegalityQuery &Query) {
996 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
997 Query.Types[0].getNumElements() <
998 Query.Types[1].getNumElements();
999 },
1000 changeTo(0, 1))
1001 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1002 .clampNumElements(0, v8s8, v16s8)
1003 .clampNumElements(0, v4s16, v8s16)
1004 .clampNumElements(0, v4s32, v4s32)
1005 .clampNumElements(0, v2s64, v2s64);
1006
1007 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1008 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
1009
1010 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1011
1012 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1013
1014 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1015
1016 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1017
1018 if (ST.hasMOPS()) {
1019 // G_BZERO is not supported. Currently it is only emitted by
1020 // PreLegalizerCombiner for G_MEMSET with zero constant.
1022
1024 .legalForCartesianProduct({p0}, {s64}, {s64})
1025 .customForCartesianProduct({p0}, {s8}, {s64})
1026 .immIdx(0); // Inform verifier imm idx 0 is handled.
1027
1028 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1029 .legalForCartesianProduct({p0}, {p0}, {s64})
1030 .immIdx(0); // Inform verifier imm idx 0 is handled.
1031
1032 // G_MEMCPY_INLINE does not have a tailcall immediate
1033 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1034 .legalForCartesianProduct({p0}, {p0}, {s64});
1035
1036 } else {
1037 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1038 .libcall();
1039 }
1040
1041 // FIXME: Legal vector types are only legal with NEON.
1042 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
1043 if (HasCSSC)
1044 ABSActions
1045 .legalFor({s32, s64});
1046 ABSActions.legalFor(PackedVectorAllTypeList)
1047 .customIf([=](const LegalityQuery &Q) {
1048 // TODO: Fix suboptimal codegen for 128+ bit types.
1049 LLT SrcTy = Q.Types[0];
1050 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1051 })
1052 .widenScalarIf(
1053 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1054 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1055 .widenScalarIf(
1056 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1057 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1058 .clampNumElements(0, v8s8, v16s8)
1059 .clampNumElements(0, v4s16, v8s16)
1060 .clampNumElements(0, v2s32, v4s32)
1061 .clampNumElements(0, v2s64, v2s64)
1062 .moreElementsToNextPow2(0)
1063 .lower();
1064
1065 // For fadd reductions we have pairwise operations available. We treat the
1066 // usual legal types as legal and handle the lowering to pairwise instructions
1067 // later.
1068 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1069 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1070 .legalIf([=](const LegalityQuery &Query) {
1071 const auto &Ty = Query.Types[1];
1072 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1073 })
1074 .minScalarOrElt(0, MinFPScalar)
1075 .clampMaxNumElements(1, s64, 2)
1076 .clampMaxNumElements(1, s32, 4)
1077 .clampMaxNumElements(1, s16, 8)
1078 .lower();
1079
1080 // For fmul reductions we need to split up into individual operations. We
1081 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1082 // smaller types, followed by scalarizing what remains.
1083 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1084 .minScalarOrElt(0, MinFPScalar)
1085 .clampMaxNumElements(1, s64, 2)
1086 .clampMaxNumElements(1, s32, 4)
1087 .clampMaxNumElements(1, s16, 8)
1088 .clampMaxNumElements(1, s32, 2)
1089 .clampMaxNumElements(1, s16, 4)
1090 .scalarize(1)
1091 .lower();
1092
1093 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1094 .scalarize(2)
1095 .lower();
1096
1097 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1098 .legalFor({{s8, v16s8},
1099 {s8, v8s8},
1100 {s16, v8s16},
1101 {s16, v4s16},
1102 {s32, v4s32},
1103 {s32, v2s32},
1104 {s64, v2s64}})
1105 .clampMaxNumElements(1, s64, 2)
1106 .clampMaxNumElements(1, s32, 4)
1107 .clampMaxNumElements(1, s16, 8)
1108 .clampMaxNumElements(1, s8, 16)
1109 .lower();
1110
1111 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1112 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1113 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1114 .legalIf([=](const LegalityQuery &Query) {
1115 const auto &Ty = Query.Types[1];
1116 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1117 })
1118 .minScalarOrElt(0, MinFPScalar)
1119 .clampMaxNumElements(1, s64, 2)
1120 .clampMaxNumElements(1, s32, 4)
1121 .clampMaxNumElements(1, s16, 8)
1122 .lower();
1123
1124 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1125 .clampMaxNumElements(1, s32, 2)
1126 .clampMaxNumElements(1, s16, 4)
1127 .clampMaxNumElements(1, s8, 8)
1128 .scalarize(1)
1129 .lower();
1130
1132 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1133 .legalFor({{s8, v8s8},
1134 {s8, v16s8},
1135 {s16, v4s16},
1136 {s16, v8s16},
1137 {s32, v2s32},
1138 {s32, v4s32}})
1139 .moreElementsIf(
1140 [=](const LegalityQuery &Query) {
1141 return Query.Types[1].isVector() &&
1142 Query.Types[1].getElementType() != s8 &&
1143 Query.Types[1].getNumElements() & 1;
1144 },
1146 .clampMaxNumElements(1, s64, 2)
1147 .clampMaxNumElements(1, s32, 4)
1148 .clampMaxNumElements(1, s16, 8)
1149 .clampMaxNumElements(1, s8, 16)
1150 .scalarize(1)
1151 .lower();
1152
1154 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1155 // Try to break down into smaller vectors as long as they're at least 64
1156 // bits. This lets us use vector operations for some parts of the
1157 // reduction.
1158 .fewerElementsIf(
1159 [=](const LegalityQuery &Q) {
1160 LLT SrcTy = Q.Types[1];
1161 if (SrcTy.isScalar())
1162 return false;
1163 if (!isPowerOf2_32(SrcTy.getNumElements()))
1164 return false;
1165 // We can usually perform 64b vector operations.
1166 return SrcTy.getSizeInBits() > 64;
1167 },
1168 [=](const LegalityQuery &Q) {
1169 LLT SrcTy = Q.Types[1];
1170 return std::make_pair(1, SrcTy.divide(2));
1171 })
1172 .scalarize(1)
1173 .lower();
1174
1175 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1176 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1177 .lower();
1178
1180 .legalFor({{s32, s64}, {s64, s64}})
1181 .customIf([=](const LegalityQuery &Q) {
1182 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1183 })
1184 .lower();
1186
1187 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1188 .customFor({{s32, s32}, {s64, s64}});
1189
1190 auto always = [=](const LegalityQuery &Q) { return true; };
1191 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1192 if (HasCSSC)
1193 CTPOPActions
1194 .legalFor({{s32, s32},
1195 {s64, s64},
1196 {v8s8, v8s8},
1197 {v16s8, v16s8}})
1198 .customFor({{s128, s128},
1199 {v2s64, v2s64},
1200 {v2s32, v2s32},
1201 {v4s32, v4s32},
1202 {v4s16, v4s16},
1203 {v8s16, v8s16}});
1204 else
1205 CTPOPActions
1206 .legalFor({{v8s8, v8s8},
1207 {v16s8, v16s8}})
1208 .customFor({{s32, s32},
1209 {s64, s64},
1210 {s128, s128},
1211 {v2s64, v2s64},
1212 {v2s32, v2s32},
1213 {v4s32, v4s32},
1214 {v4s16, v4s16},
1215 {v8s16, v8s16}});
1216 CTPOPActions
1217 .clampScalar(0, s32, s128)
1218 .widenScalarToNextPow2(0)
1219 .minScalarEltSameAsIf(always, 1, 0)
1220 .maxScalarEltSameAsIf(always, 1, 0);
1221
1222 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1223 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1224 .clampNumElements(0, v8s8, v16s8)
1225 .clampNumElements(0, v4s16, v8s16)
1226 .clampNumElements(0, v2s32, v4s32)
1227 .clampMaxNumElements(0, s64, 2)
1229 .lower();
1230
1231 // TODO: Libcall support for s128.
1232 // TODO: s16 should be legal with full FP16 support.
1233 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1234 .legalFor({{s64, s32}, {s64, s64}});
1235
1236 // TODO: Custom legalization for mismatched types.
1237 getActionDefinitionsBuilder(G_FCOPYSIGN)
1239 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1240 [=](const LegalityQuery &Query) {
1241 const LLT Ty = Query.Types[0];
1242 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1243 })
1244 .lower();
1245
1247
1248 // Access to floating-point environment.
1249 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1250 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1251 .libcall();
1252
1253 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1254
1255 getActionDefinitionsBuilder(G_PREFETCH).custom();
1256
1258 verify(*ST.getInstrInfo());
1259}
1260
1263 LostDebugLocObserver &LocObserver) const {
1264 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1265 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1266 GISelChangeObserver &Observer = Helper.Observer;
1267 switch (MI.getOpcode()) {
1268 default:
1269 // No idea what to do.
1270 return false;
1271 case TargetOpcode::G_VAARG:
1272 return legalizeVaArg(MI, MRI, MIRBuilder);
1273 case TargetOpcode::G_LOAD:
1274 case TargetOpcode::G_STORE:
1275 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1276 case TargetOpcode::G_SHL:
1277 case TargetOpcode::G_ASHR:
1278 case TargetOpcode::G_LSHR:
1279 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1280 case TargetOpcode::G_GLOBAL_VALUE:
1281 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1282 case TargetOpcode::G_SBFX:
1283 case TargetOpcode::G_UBFX:
1284 return legalizeBitfieldExtract(MI, MRI, Helper);
1285 case TargetOpcode::G_FSHL:
1286 case TargetOpcode::G_FSHR:
1287 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1288 case TargetOpcode::G_ROTR:
1289 return legalizeRotate(MI, MRI, Helper);
1290 case TargetOpcode::G_CTPOP:
1291 return legalizeCTPOP(MI, MRI, Helper);
1292 case TargetOpcode::G_ATOMIC_CMPXCHG:
1293 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1294 case TargetOpcode::G_CTTZ:
1295 return legalizeCTTZ(MI, Helper);
1296 case TargetOpcode::G_BZERO:
1297 case TargetOpcode::G_MEMCPY:
1298 case TargetOpcode::G_MEMMOVE:
1299 case TargetOpcode::G_MEMSET:
1300 return legalizeMemOps(MI, Helper);
1301 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1302 return legalizeExtractVectorElt(MI, MRI, Helper);
1303 case TargetOpcode::G_DYN_STACKALLOC:
1304 return legalizeDynStackAlloc(MI, Helper);
1305 case TargetOpcode::G_PREFETCH:
1306 return legalizePrefetch(MI, Helper);
1307 case TargetOpcode::G_ABS:
1308 return Helper.lowerAbsToCNeg(MI);
1309 case TargetOpcode::G_ICMP:
1310 return legalizeICMP(MI, MRI, MIRBuilder);
1311 }
1312
1313 llvm_unreachable("expected switch to return");
1314}
1315
1316bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1318 MachineIRBuilder &MIRBuilder,
1319 GISelChangeObserver &Observer,
1320 LegalizerHelper &Helper) const {
1321 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1322 MI.getOpcode() == TargetOpcode::G_FSHR);
1323
1324 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1325 // lowering
1326 Register ShiftNo = MI.getOperand(3).getReg();
1327 LLT ShiftTy = MRI.getType(ShiftNo);
1328 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1329
1330 // Adjust shift amount according to Opcode (FSHL/FSHR)
1331 // Convert FSHL to FSHR
1332 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1333 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1334
1335 // Lower non-constant shifts and leave zero shifts to the optimizer.
1336 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1337 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1339
1340 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1341
1342 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1343
1344 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1345 // in the range of 0 <-> BitWidth, it is legal
1346 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1347 VRegAndVal->Value.ult(BitWidth))
1348 return true;
1349
1350 // Cast the ShiftNumber to a 64-bit type
1351 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1352
1353 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1354 Observer.changingInstr(MI);
1355 MI.getOperand(3).setReg(Cast64.getReg(0));
1356 Observer.changedInstr(MI);
1357 }
1358 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1359 // instruction
1360 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1361 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1362 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1363 Cast64.getReg(0)});
1364 MI.eraseFromParent();
1365 }
1366 return true;
1367}
1368
1369bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1371 MachineIRBuilder &MIRBuilder) const {
1372 Register DstReg = MI.getOperand(0).getReg();
1373 Register SrcReg1 = MI.getOperand(2).getReg();
1374 Register SrcReg2 = MI.getOperand(3).getReg();
1375 LLT DstTy = MRI.getType(DstReg);
1376 LLT SrcTy = MRI.getType(SrcReg1);
1377
1378 // Check the vector types are legal
1379 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1380 DstTy.getNumElements() != SrcTy.getNumElements() ||
1381 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1382 return false;
1383
1384 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1385 // following passes
1386 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1387 if (Pred != CmpInst::ICMP_NE)
1388 return true;
1389 Register CmpReg =
1390 MIRBuilder
1391 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1392 .getReg(0);
1393 MIRBuilder.buildNot(DstReg, CmpReg);
1394
1395 MI.eraseFromParent();
1396 return true;
1397}
1398
1399bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1401 LegalizerHelper &Helper) const {
1402 // To allow for imported patterns to match, we ensure that the rotate amount
1403 // is 64b with an extension.
1404 Register AmtReg = MI.getOperand(2).getReg();
1405 LLT AmtTy = MRI.getType(AmtReg);
1406 (void)AmtTy;
1407 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1408 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1409 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1410 Helper.Observer.changingInstr(MI);
1411 MI.getOperand(2).setReg(NewAmt.getReg(0));
1412 Helper.Observer.changedInstr(MI);
1413 return true;
1414}
1415
1416bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1418 GISelChangeObserver &Observer) const {
1419 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1420 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1421 // G_ADD_LOW instructions.
1422 // By splitting this here, we can optimize accesses in the small code model by
1423 // folding in the G_ADD_LOW into the load/store offset.
1424 auto &GlobalOp = MI.getOperand(1);
1425 // Don't modify an intrinsic call.
1426 if (GlobalOp.isSymbol())
1427 return true;
1428 const auto* GV = GlobalOp.getGlobal();
1429 if (GV->isThreadLocal())
1430 return true; // Don't want to modify TLS vars.
1431
1432 auto &TM = ST->getTargetLowering()->getTargetMachine();
1433 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1434
1435 if (OpFlags & AArch64II::MO_GOT)
1436 return true;
1437
1438 auto Offset = GlobalOp.getOffset();
1439 Register DstReg = MI.getOperand(0).getReg();
1440 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1441 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1442 // Set the regclass on the dest reg too.
1443 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1444
1445 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1446 // by creating a MOVK that sets bits 48-63 of the register to (global address
1447 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1448 // prevent an incorrect tag being generated during relocation when the
1449 // global appears before the code section. Without the offset, a global at
1450 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1451 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1452 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1453 // instead of `0xf`.
1454 // This assumes that we're in the small code model so we can assume a binary
1455 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1456 // binary must also be loaded into address range [0, 2^48). Both of these
1457 // properties need to be ensured at runtime when using tagged addresses.
1458 if (OpFlags & AArch64II::MO_TAGGED) {
1459 assert(!Offset &&
1460 "Should not have folded in an offset for a tagged global!");
1461 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1462 .addGlobalAddress(GV, 0x100000000,
1464 .addImm(48);
1465 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1466 }
1467
1468 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1469 .addGlobalAddress(GV, Offset,
1471 MI.eraseFromParent();
1472 return true;
1473}
1474
1476 MachineInstr &MI) const {
1477 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1478 switch (IntrinsicID) {
1479 case Intrinsic::vacopy: {
1480 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1481 unsigned VaListSize =
1482 (ST->isTargetDarwin() || ST->isTargetWindows())
1483 ? PtrSize
1484 : ST->isTargetILP32() ? 20 : 32;
1485
1486 MachineFunction &MF = *MI.getMF();
1488 LLT::scalar(VaListSize * 8));
1489 MachineIRBuilder MIB(MI);
1490 MIB.buildLoad(Val, MI.getOperand(2),
1493 VaListSize, Align(PtrSize)));
1494 MIB.buildStore(Val, MI.getOperand(1),
1497 VaListSize, Align(PtrSize)));
1498 MI.eraseFromParent();
1499 return true;
1500 }
1501 case Intrinsic::get_dynamic_area_offset: {
1502 MachineIRBuilder &MIB = Helper.MIRBuilder;
1503 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1504 MI.eraseFromParent();
1505 return true;
1506 }
1507 case Intrinsic::aarch64_mops_memset_tag: {
1508 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1509 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1510 // the instruction).
1511 MachineIRBuilder MIB(MI);
1512 auto &Value = MI.getOperand(3);
1513 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1514 Value.setReg(ExtValueReg);
1515 return true;
1516 }
1517 case Intrinsic::aarch64_prefetch: {
1518 MachineIRBuilder MIB(MI);
1519 auto &AddrVal = MI.getOperand(1);
1520
1521 int64_t IsWrite = MI.getOperand(2).getImm();
1522 int64_t Target = MI.getOperand(3).getImm();
1523 int64_t IsStream = MI.getOperand(4).getImm();
1524 int64_t IsData = MI.getOperand(5).getImm();
1525
1526 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1527 (!IsData << 3) | // IsDataCache bit
1528 (Target << 1) | // Cache level bits
1529 (unsigned)IsStream; // Stream bit
1530
1531 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1532 MI.eraseFromParent();
1533 return true;
1534 }
1535 case Intrinsic::aarch64_neon_uaddv:
1536 case Intrinsic::aarch64_neon_saddv:
1537 case Intrinsic::aarch64_neon_umaxv:
1538 case Intrinsic::aarch64_neon_smaxv:
1539 case Intrinsic::aarch64_neon_uminv:
1540 case Intrinsic::aarch64_neon_sminv: {
1541 MachineIRBuilder MIB(MI);
1542 MachineRegisterInfo &MRI = *MIB.getMRI();
1543 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1544 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1545 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1546
1547 auto OldDst = MI.getOperand(0).getReg();
1548 auto OldDstTy = MRI.getType(OldDst);
1549 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1550 if (OldDstTy == NewDstTy)
1551 return true;
1552
1553 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1554
1555 Helper.Observer.changingInstr(MI);
1556 MI.getOperand(0).setReg(NewDst);
1557 Helper.Observer.changedInstr(MI);
1558
1559 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1560 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1561 OldDst, NewDst);
1562
1563 return true;
1564 }
1565 case Intrinsic::aarch64_neon_uaddlp:
1566 case Intrinsic::aarch64_neon_saddlp: {
1567 MachineIRBuilder MIB(MI);
1568
1569 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1570 ? AArch64::G_UADDLP
1571 : AArch64::G_SADDLP;
1572 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1573 MI.eraseFromParent();
1574
1575 return true;
1576 }
1577 case Intrinsic::aarch64_neon_uaddlv:
1578 case Intrinsic::aarch64_neon_saddlv: {
1579 MachineIRBuilder MIB(MI);
1580 MachineRegisterInfo &MRI = *MIB.getMRI();
1581
1582 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1583 ? AArch64::G_UADDLV
1584 : AArch64::G_SADDLV;
1585 Register DstReg = MI.getOperand(0).getReg();
1586 Register SrcReg = MI.getOperand(2).getReg();
1587 LLT DstTy = MRI.getType(DstReg);
1588
1589 LLT MidTy, ExtTy;
1590 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1591 MidTy = LLT::fixed_vector(4, 32);
1592 ExtTy = LLT::scalar(32);
1593 } else {
1594 MidTy = LLT::fixed_vector(2, 64);
1595 ExtTy = LLT::scalar(64);
1596 }
1597
1598 Register MidReg =
1599 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1600 Register ZeroReg =
1601 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1602 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1603 {MidReg, ZeroReg})
1604 .getReg(0);
1605
1606 if (DstTy.getScalarSizeInBits() < 32)
1607 MIB.buildTrunc(DstReg, ExtReg);
1608 else
1609 MIB.buildCopy(DstReg, ExtReg);
1610
1611 MI.eraseFromParent();
1612
1613 return true;
1614 }
1615 case Intrinsic::aarch64_neon_smax:
1616 case Intrinsic::aarch64_neon_smin:
1617 case Intrinsic::aarch64_neon_umax:
1618 case Intrinsic::aarch64_neon_umin:
1619 case Intrinsic::aarch64_neon_fmax:
1620 case Intrinsic::aarch64_neon_fmin:
1621 case Intrinsic::aarch64_neon_fmaxnm:
1622 case Intrinsic::aarch64_neon_fminnm: {
1623 MachineIRBuilder MIB(MI);
1624 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1625 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1626 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1627 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1628 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1629 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1630 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1631 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1632 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1633 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1634 {MI.getOperand(2), MI.getOperand(3)});
1635 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1636 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1637 {MI.getOperand(2), MI.getOperand(3)});
1638 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1639 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1640 {MI.getOperand(2), MI.getOperand(3)});
1641 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1642 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1643 {MI.getOperand(2), MI.getOperand(3)});
1644 MI.eraseFromParent();
1645 return true;
1646 }
1647 case Intrinsic::vector_reverse:
1648 // TODO: Add support for vector_reverse
1649 return false;
1650 }
1651
1652 return true;
1653}
1654
1655bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1657 GISelChangeObserver &Observer) const {
1658 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1659 MI.getOpcode() == TargetOpcode::G_LSHR ||
1660 MI.getOpcode() == TargetOpcode::G_SHL);
1661 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1662 // imported patterns can select it later. Either way, it will be legal.
1663 Register AmtReg = MI.getOperand(2).getReg();
1664 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1665 if (!VRegAndVal)
1666 return true;
1667 // Check the shift amount is in range for an immediate form.
1668 int64_t Amount = VRegAndVal->Value.getSExtValue();
1669 if (Amount > 31)
1670 return true; // This will have to remain a register variant.
1671 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1672 Observer.changingInstr(MI);
1673 MI.getOperand(2).setReg(ExtCst.getReg(0));
1674 Observer.changedInstr(MI);
1675 return true;
1676}
1677
1680 Base = Root;
1681 Offset = 0;
1682
1683 Register NewBase;
1684 int64_t NewOffset;
1685 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1686 isShiftedInt<7, 3>(NewOffset)) {
1687 Base = NewBase;
1688 Offset = NewOffset;
1689 }
1690}
1691
1692// FIXME: This should be removed and replaced with the generic bitcast legalize
1693// action.
1694bool AArch64LegalizerInfo::legalizeLoadStore(
1696 GISelChangeObserver &Observer) const {
1697 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1698 MI.getOpcode() == TargetOpcode::G_LOAD);
1699 // Here we just try to handle vector loads/stores where our value type might
1700 // have pointer elements, which the SelectionDAG importer can't handle. To
1701 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1702 // the value to use s64 types.
1703
1704 // Custom legalization requires the instruction, if not deleted, must be fully
1705 // legalized. In order to allow further legalization of the inst, we create
1706 // a new instruction and erase the existing one.
1707
1708 Register ValReg = MI.getOperand(0).getReg();
1709 const LLT ValTy = MRI.getType(ValReg);
1710
1711 if (ValTy == LLT::scalar(128)) {
1712
1713 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1714 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1715 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1716 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1717 bool IsRcpC3 =
1718 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1719
1720 LLT s64 = LLT::scalar(64);
1721
1722 unsigned Opcode;
1723 if (IsRcpC3) {
1724 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1725 } else {
1726 // For LSE2, loads/stores should have been converted to monotonic and had
1727 // a fence inserted after them.
1728 assert(Ordering == AtomicOrdering::Monotonic ||
1729 Ordering == AtomicOrdering::Unordered);
1730 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1731
1732 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1733 }
1734
1736 if (IsLoad) {
1737 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1738 MIRBuilder.buildMergeLikeInstr(
1739 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1740 } else {
1741 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1742 NewI = MIRBuilder.buildInstr(
1743 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1744 }
1745
1746 if (IsRcpC3) {
1747 NewI.addUse(MI.getOperand(1).getReg());
1748 } else {
1749 Register Base;
1750 int Offset;
1751 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1752 NewI.addUse(Base);
1753 NewI.addImm(Offset / 8);
1754 }
1755
1756 NewI.cloneMemRefs(MI);
1758 *MRI.getTargetRegisterInfo(),
1759 *ST->getRegBankInfo());
1760 MI.eraseFromParent();
1761 return true;
1762 }
1763
1764 if (!ValTy.isPointerVector() ||
1765 ValTy.getElementType().getAddressSpace() != 0) {
1766 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1767 return false;
1768 }
1769
1770 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1771 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1772 auto &MMO = **MI.memoperands_begin();
1773 MMO.setType(NewTy);
1774
1775 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1776 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1777 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1778 } else {
1779 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1780 MIRBuilder.buildBitcast(ValReg, NewLoad);
1781 }
1782 MI.eraseFromParent();
1783 return true;
1784}
1785
1786bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1788 MachineIRBuilder &MIRBuilder) const {
1789 MachineFunction &MF = MIRBuilder.getMF();
1790 Align Alignment(MI.getOperand(2).getImm());
1791 Register Dst = MI.getOperand(0).getReg();
1792 Register ListPtr = MI.getOperand(1).getReg();
1793
1794 LLT PtrTy = MRI.getType(ListPtr);
1795 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1796
1797 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1798 const Align PtrAlign = Align(PtrSize);
1799 auto List = MIRBuilder.buildLoad(
1800 PtrTy, ListPtr,
1802 PtrTy, PtrAlign));
1803
1804 MachineInstrBuilder DstPtr;
1805 if (Alignment > PtrAlign) {
1806 // Realign the list to the actual required alignment.
1807 auto AlignMinus1 =
1808 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1809 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1810 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1811 } else
1812 DstPtr = List;
1813
1814 LLT ValTy = MRI.getType(Dst);
1815 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1816 MIRBuilder.buildLoad(
1817 Dst, DstPtr,
1819 ValTy, std::max(Alignment, PtrAlign)));
1820
1821 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1822
1823 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1824
1825 MIRBuilder.buildStore(NewList, ListPtr,
1828 PtrTy, PtrAlign));
1829
1830 MI.eraseFromParent();
1831 return true;
1832}
1833
1834bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1836 // Only legal if we can select immediate forms.
1837 // TODO: Lower this otherwise.
1838 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1839 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1840}
1841
1842bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1844 LegalizerHelper &Helper) const {
1845 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1846 // it can be more efficiently lowered to the following sequence that uses
1847 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1848 // registers are cheap.
1849 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1850 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1851 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1852 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1853 //
1854 // For 128 bit vector popcounts, we lower to the following sequence:
1855 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1856 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1857 // uaddlp.4s v0, v0 // v4s32, v2s64
1858 // uaddlp.2d v0, v0 // v2s64
1859 //
1860 // For 64 bit vector popcounts, we lower to the following sequence:
1861 // cnt.8b v0, v0 // v4s16, v2s32
1862 // uaddlp.4h v0, v0 // v4s16, v2s32
1863 // uaddlp.2s v0, v0 // v2s32
1864
1865 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1866 Register Dst = MI.getOperand(0).getReg();
1867 Register Val = MI.getOperand(1).getReg();
1868 LLT Ty = MRI.getType(Val);
1869 unsigned Size = Ty.getSizeInBits();
1870
1871 assert(Ty == MRI.getType(Dst) &&
1872 "Expected src and dst to have the same type!");
1873
1874 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1875 LLT s64 = LLT::scalar(64);
1876
1877 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1878 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1879 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1880 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1881
1882 MIRBuilder.buildZExt(Dst, Add);
1883 MI.eraseFromParent();
1884 return true;
1885 }
1886
1887 if (!ST->hasNEON() ||
1888 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1889 // Use generic lowering when custom lowering is not possible.
1890 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1891 Helper.lowerBitCount(MI) ==
1893 }
1894
1895 // Pre-conditioning: widen Val up to the nearest vector type.
1896 // s32,s64,v4s16,v2s32 -> v8i8
1897 // v8s16,v4s32,v2s64 -> v16i8
1898 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1899 if (Ty.isScalar()) {
1900 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1901 if (Size == 32) {
1902 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1903 }
1904 }
1905 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1906
1907 // Count bits in each byte-sized lane.
1908 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1909
1910 // Sum across lanes.
1911 Register HSum = CTPOP.getReg(0);
1912 unsigned Opc;
1913 SmallVector<LLT> HAddTys;
1914 if (Ty.isScalar()) {
1915 Opc = Intrinsic::aarch64_neon_uaddlv;
1916 HAddTys.push_back(LLT::scalar(32));
1917 } else if (Ty == LLT::fixed_vector(8, 16)) {
1918 Opc = Intrinsic::aarch64_neon_uaddlp;
1919 HAddTys.push_back(LLT::fixed_vector(8, 16));
1920 } else if (Ty == LLT::fixed_vector(4, 32)) {
1921 Opc = Intrinsic::aarch64_neon_uaddlp;
1922 HAddTys.push_back(LLT::fixed_vector(8, 16));
1923 HAddTys.push_back(LLT::fixed_vector(4, 32));
1924 } else if (Ty == LLT::fixed_vector(2, 64)) {
1925 Opc = Intrinsic::aarch64_neon_uaddlp;
1926 HAddTys.push_back(LLT::fixed_vector(8, 16));
1927 HAddTys.push_back(LLT::fixed_vector(4, 32));
1928 HAddTys.push_back(LLT::fixed_vector(2, 64));
1929 } else if (Ty == LLT::fixed_vector(4, 16)) {
1930 Opc = Intrinsic::aarch64_neon_uaddlp;
1931 HAddTys.push_back(LLT::fixed_vector(4, 16));
1932 } else if (Ty == LLT::fixed_vector(2, 32)) {
1933 Opc = Intrinsic::aarch64_neon_uaddlp;
1934 HAddTys.push_back(LLT::fixed_vector(4, 16));
1935 HAddTys.push_back(LLT::fixed_vector(2, 32));
1936 } else
1937 llvm_unreachable("unexpected vector shape");
1939 for (LLT HTy : HAddTys) {
1940 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
1941 HSum = UADD.getReg(0);
1942 }
1943
1944 // Post-conditioning.
1945 if (Ty.isScalar() && (Size == 64 || Size == 128))
1946 MIRBuilder.buildZExt(Dst, UADD);
1947 else
1948 UADD->getOperand(0).setReg(Dst);
1949 MI.eraseFromParent();
1950 return true;
1951}
1952
1953bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1955 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1956 LLT s64 = LLT::scalar(64);
1957 auto Addr = MI.getOperand(1).getReg();
1958 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1959 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1960 auto DstLo = MRI.createGenericVirtualRegister(s64);
1961 auto DstHi = MRI.createGenericVirtualRegister(s64);
1962
1964 if (ST->hasLSE()) {
1965 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1966 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1967 // the rest of the MIR so we must reassemble the extracted registers into a
1968 // 128-bit known-regclass one with code like this:
1969 //
1970 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1971 // %out = CASP %in1, ...
1972 // %OldLo = G_EXTRACT %out, 0
1973 // %OldHi = G_EXTRACT %out, 64
1974 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1975 unsigned Opcode;
1976 switch (Ordering) {
1978 Opcode = AArch64::CASPAX;
1979 break;
1981 Opcode = AArch64::CASPLX;
1982 break;
1985 Opcode = AArch64::CASPALX;
1986 break;
1987 default:
1988 Opcode = AArch64::CASPX;
1989 break;
1990 }
1991
1992 LLT s128 = LLT::scalar(128);
1993 auto CASDst = MRI.createGenericVirtualRegister(s128);
1994 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1995 auto CASNew = MRI.createGenericVirtualRegister(s128);
1996 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1997 .addUse(DesiredI->getOperand(0).getReg())
1998 .addImm(AArch64::sube64)
1999 .addUse(DesiredI->getOperand(1).getReg())
2000 .addImm(AArch64::subo64);
2001 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2002 .addUse(NewI->getOperand(0).getReg())
2003 .addImm(AArch64::sube64)
2004 .addUse(NewI->getOperand(1).getReg())
2005 .addImm(AArch64::subo64);
2006
2007 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2008
2009 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2010 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2011 } else {
2012 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2013 // can take arbitrary registers so it just has the normal GPR64 operands the
2014 // rest of AArch64 is expecting.
2015 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2016 unsigned Opcode;
2017 switch (Ordering) {
2019 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2020 break;
2022 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2023 break;
2026 Opcode = AArch64::CMP_SWAP_128;
2027 break;
2028 default:
2029 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2030 break;
2031 }
2032
2033 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2034 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2035 {Addr, DesiredI->getOperand(0),
2036 DesiredI->getOperand(1), NewI->getOperand(0),
2037 NewI->getOperand(1)});
2038 }
2039
2040 CAS.cloneMemRefs(MI);
2042 *MRI.getTargetRegisterInfo(),
2043 *ST->getRegBankInfo());
2044
2045 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2046 MI.eraseFromParent();
2047 return true;
2048}
2049
2050bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2051 LegalizerHelper &Helper) const {
2052 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2053 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2054 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2055 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2056 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2057 MI.eraseFromParent();
2058 return true;
2059}
2060
2061bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2062 LegalizerHelper &Helper) const {
2063 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2064
2065 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2066 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2067 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2068 // the instruction).
2069 auto &Value = MI.getOperand(1);
2070 Register ExtValueReg =
2071 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2072 Value.setReg(ExtValueReg);
2073 return true;
2074 }
2075
2076 return false;
2077}
2078
2079bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2081 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2082 auto VRegAndVal =
2083 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2084 if (VRegAndVal)
2085 return true;
2086 return Helper.lowerExtractInsertVectorElt(MI) !=
2088}
2089
2090bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2091 MachineInstr &MI, LegalizerHelper &Helper) const {
2092 MachineFunction &MF = *MI.getParent()->getParent();
2093 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2094 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2095
2096 // If stack probing is not enabled for this function, use the default
2097 // lowering.
2098 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2099 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2100 "inline-asm") {
2101 Helper.lowerDynStackAlloc(MI);
2102 return true;
2103 }
2104
2105 Register Dst = MI.getOperand(0).getReg();
2106 Register AllocSize = MI.getOperand(1).getReg();
2107 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2108
2109 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2110 "Unexpected type for dynamic alloca");
2111 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2112 "Unexpected type for dynamic alloca");
2113
2114 LLT PtrTy = MRI.getType(Dst);
2115 Register SPReg =
2117 Register SPTmp =
2118 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2119 auto NewMI =
2120 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2121 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2122 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2123 MIRBuilder.buildCopy(Dst, SPTmp);
2124
2125 MI.eraseFromParent();
2126 return true;
2127}
2128
2129bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2130 LegalizerHelper &Helper) const {
2131 MachineIRBuilder &MIB = Helper.MIRBuilder;
2132 auto &AddrVal = MI.getOperand(0);
2133
2134 int64_t IsWrite = MI.getOperand(1).getImm();
2135 int64_t Locality = MI.getOperand(2).getImm();
2136 int64_t IsData = MI.getOperand(3).getImm();
2137
2138 bool IsStream = Locality == 0;
2139 if (Locality != 0) {
2140 assert(Locality <= 3 && "Prefetch locality out-of-range");
2141 // The locality degree is the opposite of the cache speed.
2142 // Put the number the other way around.
2143 // The encoding starts at 0 for level 1
2144 Locality = 3 - Locality;
2145 }
2146
2147 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2148
2149 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2150 MI.eraseFromParent();
2151 return true;
2152}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1521
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:716
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...