LLVM 19.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
65 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
66 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
67 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
68
69 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
70 v16s8, v8s16, v4s32,
71 v2s64, v2p0,
72 /* End 128bit types */
73 /* Begin 64bit types */
74 v8s8, v4s16, v2s32};
75 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
76 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
77 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
78
79 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
80
81 // FIXME: support subtargets which have neon/fp-armv8 disabled.
82 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
84 return;
85 }
86
87 // Some instructions only support s16 if the subtarget has full 16-bit FP
88 // support.
89 const bool HasFP16 = ST.hasFullFP16();
90 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
91
92 const bool HasCSSC = ST.hasCSSC();
93 const bool HasRCPC3 = ST.hasRCPC3();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor(PackedVectorAllTypeList)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampNumElements(0, v2s64, v2s64);
107
109 .legalFor({p0, s16, s32, s64})
110 .legalFor(PackedVectorAllTypeList)
112 .clampScalar(0, s16, s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(0, s8, 16)
115 .clampMaxNumElements(0, s16, 8)
116 .clampMaxNumElements(0, s32, 4)
117 .clampMaxNumElements(0, s64, 2)
118 .clampMaxNumElements(0, p0, 2);
119
121 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
123 .clampScalar(0, s32, s64)
124 .clampNumElements(0, v4s16, v8s16)
125 .clampNumElements(0, v2s32, v4s32)
126 .clampNumElements(0, v2s64, v2s64)
127 .moreElementsToNextPow2(0);
128
129 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
130 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
131 .widenScalarToNextPow2(0)
132 .clampScalar(0, s32, s64)
133 .clampMaxNumElements(0, s8, 16)
134 .clampMaxNumElements(0, s16, 8)
135 .clampNumElements(0, v2s32, v4s32)
136 .clampNumElements(0, v2s64, v2s64)
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 2;
140 },
141 0, s32)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 4;
145 },
146 0, s16)
147 .minScalarOrEltIf(
148 [=](const LegalityQuery &Query) {
149 return Query.Types[0].getNumElements() <= 16;
150 },
151 0, s8)
153
154 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
155 .customIf([=](const LegalityQuery &Query) {
156 const auto &SrcTy = Query.Types[0];
157 const auto &AmtTy = Query.Types[1];
158 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
159 AmtTy.getSizeInBits() == 32;
160 })
161 .legalFor({
162 {s32, s32},
163 {s32, s64},
164 {s64, s64},
165 {v8s8, v8s8},
166 {v16s8, v16s8},
167 {v4s16, v4s16},
168 {v8s16, v8s16},
169 {v2s32, v2s32},
170 {v4s32, v4s32},
171 {v2s64, v2s64},
172 })
173 .widenScalarToNextPow2(0)
174 .clampScalar(1, s32, s64)
175 .clampScalar(0, s32, s64)
176 .clampNumElements(0, v8s8, v16s8)
177 .clampNumElements(0, v4s16, v8s16)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 .minScalarSameAs(1, 0);
182
184 .legalFor({{p0, s64}, {v2p0, v2s64}})
185 .clampScalarOrElt(1, s64, s64)
186 .clampNumElements(0, v2p0, v2p0);
187
188 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
189
190 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
191 .legalFor({s32, s64})
192 .libcallFor({s128})
193 .clampScalar(0, s32, s64)
195 .scalarize(0);
196
197 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
198 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
200 .clampScalarOrElt(0, s32, s64)
201 .clampNumElements(0, v2s32, v4s32)
202 .clampNumElements(0, v2s64, v2s64)
203 .moreElementsToNextPow2(0);
204
205
206 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
207 .widenScalarToNextPow2(0, /*Min = */ 32)
208 .clampScalar(0, s32, s64)
209 .lower();
210
211 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
212 .legalFor({s64, v8s16, v16s8, v4s32})
213 .lower();
214
215 auto &MinMaxActions = getActionDefinitionsBuilder(
216 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
217 if (HasCSSC)
218 MinMaxActions
219 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
220 // Making clamping conditional on CSSC extension as without legal types we
221 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
222 // if we detect a type smaller than 32-bit.
223 .minScalar(0, s32);
224 else
225 MinMaxActions
226 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
227 MinMaxActions
228 .clampNumElements(0, v8s8, v16s8)
229 .clampNumElements(0, v4s16, v8s16)
230 .clampNumElements(0, v2s32, v4s32)
231 // FIXME: This sholdn't be needed as v2s64 types are going to
232 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
233 .clampNumElements(0, v2s64, v2s64)
234 .lower();
235
237 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
238 .legalFor({{s32, s32}, {s64, s32}})
239 .clampScalar(0, s32, s64)
240 .clampScalar(1, s32, s64)
242
243 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
244 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
245 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
246 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
247 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
248 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
249 .legalIf([=](const LegalityQuery &Query) {
250 const auto &Ty = Query.Types[0];
251 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
252 })
253 .libcallFor({s128})
254 .minScalarOrElt(0, MinFPScalar)
255 .clampNumElements(0, v4s16, v8s16)
256 .clampNumElements(0, v2s32, v4s32)
257 .clampNumElements(0, v2s64, v2s64)
259
261 .libcallFor({s32, s64})
262 .minScalar(0, s32)
263 .scalarize(0);
264
265 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
266 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
267 .libcallFor({{s64, s128}})
268 .minScalarOrElt(1, MinFPScalar);
269
270 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
271 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10})
272 // We need a call for these, so we always need to scalarize.
273 .scalarize(0)
274 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
275 .minScalar(0, s32)
276 .libcallFor({s32, s64});
278 .scalarize(0)
279 .minScalar(0, s32)
280 .libcallFor({{s32, s32}, {s64, s32}});
281
283 .legalIf(all(typeInSet(0, {s32, s64, p0}),
284 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
286 .clampScalar(0, s32, s64)
288 .minScalar(1, s8)
289 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
290 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
291
293 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
294 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
296 .clampScalar(1, s32, s128)
298 .minScalar(0, s16)
299 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
300 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
301 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
302
303
304 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
305 auto &Actions = getActionDefinitionsBuilder(Op);
306
307 if (Op == G_SEXTLOAD)
309
310 // Atomics have zero extending behavior.
311 Actions
312 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
313 {s32, p0, s16, 8},
314 {s32, p0, s32, 8},
315 {s64, p0, s8, 2},
316 {s64, p0, s16, 2},
317 {s64, p0, s32, 4},
318 {s64, p0, s64, 8},
319 {p0, p0, s64, 8},
320 {v2s32, p0, s64, 8}})
321 .widenScalarToNextPow2(0)
322 .clampScalar(0, s32, s64)
323 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
324 // how to do that yet.
325 .unsupportedIfMemSizeNotPow2()
326 // Lower anything left over into G_*EXT and G_LOAD
327 .lower();
328 }
329
330 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
331 const LLT &ValTy = Query.Types[0];
332 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
333 };
334
335 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
336 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
337
338 if (ST.hasSVE()) {
339 LoadActions.legalForTypesWithMemDesc({
340 // 128 bit base sizes
341 {nxv16s8, p0, nxv16s8, 8},
342 {nxv8s16, p0, nxv8s16, 8},
343 {nxv4s32, p0, nxv4s32, 8},
344 {nxv2s64, p0, nxv2s64, 8},
345 });
346
347 // TODO: Add nxv2p0. Consider bitcastIf.
348 // See #92130
349 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
350 StoreActions.legalForTypesWithMemDesc({
351 // 128 bit base sizes
352 {nxv16s8, p0, nxv16s8, 8},
353 {nxv8s16, p0, nxv8s16, 8},
354 {nxv4s32, p0, nxv4s32, 8},
355 {nxv2s64, p0, nxv2s64, 8},
356 });
357 }
358
359 LoadActions
360 .customIf([=](const LegalityQuery &Query) {
361 return HasRCPC3 && Query.Types[0] == s128 &&
362 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
363 })
364 .customIf([=](const LegalityQuery &Query) {
365 return Query.Types[0] == s128 &&
366 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
367 })
368 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
369 {s16, p0, s16, 8},
370 {s32, p0, s32, 8},
371 {s64, p0, s64, 8},
372 {p0, p0, s64, 8},
373 {s128, p0, s128, 8},
374 {v8s8, p0, s64, 8},
375 {v16s8, p0, s128, 8},
376 {v4s16, p0, s64, 8},
377 {v8s16, p0, s128, 8},
378 {v2s32, p0, s64, 8},
379 {v4s32, p0, s128, 8},
380 {v2s64, p0, s128, 8}})
381 // These extends are also legal
382 .legalForTypesWithMemDesc(
383 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
384 .widenScalarToNextPow2(0, /* MinSize = */ 8)
385 .clampMaxNumElements(0, s8, 16)
386 .clampMaxNumElements(0, s16, 8)
387 .clampMaxNumElements(0, s32, 4)
388 .clampMaxNumElements(0, s64, 2)
389 .clampMaxNumElements(0, p0, 2)
390 .lowerIfMemSizeNotByteSizePow2()
391 .clampScalar(0, s8, s64)
392 .narrowScalarIf(
393 [=](const LegalityQuery &Query) {
394 // Clamp extending load results to 32-bits.
395 return Query.Types[0].isScalar() &&
396 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
397 Query.Types[0].getSizeInBits() > 32;
398 },
399 changeTo(0, s32))
400 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
401 .bitcastIf(typeInSet(0, {v4s8}),
402 [=](const LegalityQuery &Query) {
403 const LLT VecTy = Query.Types[0];
404 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
405 })
406 .customIf(IsPtrVecPred)
407 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
408
409 StoreActions
410 .customIf([=](const LegalityQuery &Query) {
411 return HasRCPC3 && Query.Types[0] == s128 &&
412 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
413 })
414 .customIf([=](const LegalityQuery &Query) {
415 return Query.Types[0] == s128 &&
416 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
417 })
418 .legalForTypesWithMemDesc(
419 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
420 {s32, p0, s8, 8}, // truncstorei8 from s32
421 {s64, p0, s8, 8}, // truncstorei8 from s64
422 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
423 {s64, p0, s16, 8}, // truncstorei16 from s64
424 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
425 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
426 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
427 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
428 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
429 .clampScalar(0, s8, s64)
430 .lowerIf([=](const LegalityQuery &Query) {
431 return Query.Types[0].isScalar() &&
432 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
433 })
434 // Maximum: sN * k = 128
435 .clampMaxNumElements(0, s8, 16)
436 .clampMaxNumElements(0, s16, 8)
437 .clampMaxNumElements(0, s32, 4)
438 .clampMaxNumElements(0, s64, 2)
439 .clampMaxNumElements(0, p0, 2)
440 .lowerIfMemSizeNotPow2()
441 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
442 .bitcastIf(typeInSet(0, {v4s8}),
443 [=](const LegalityQuery &Query) {
444 const LLT VecTy = Query.Types[0];
445 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
446 })
447 .customIf(IsPtrVecPred)
448 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
449
450 getActionDefinitionsBuilder(G_INDEXED_STORE)
451 // Idx 0 == Ptr, Idx 1 == Val
452 // TODO: we can implement legalizations but as of now these are
453 // generated in a very specific way.
455 {p0, s8, s8, 8},
456 {p0, s16, s16, 8},
457 {p0, s32, s8, 8},
458 {p0, s32, s16, 8},
459 {p0, s32, s32, 8},
460 {p0, s64, s64, 8},
461 {p0, p0, p0, 8},
462 {p0, v8s8, v8s8, 8},
463 {p0, v16s8, v16s8, 8},
464 {p0, v4s16, v4s16, 8},
465 {p0, v8s16, v8s16, 8},
466 {p0, v2s32, v2s32, 8},
467 {p0, v4s32, v4s32, 8},
468 {p0, v2s64, v2s64, 8},
469 {p0, v2p0, v2p0, 8},
470 {p0, s128, s128, 8},
471 })
472 .unsupported();
473
474 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
475 LLT LdTy = Query.Types[0];
476 LLT PtrTy = Query.Types[1];
477 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
478 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
479 return false;
480 if (PtrTy != p0)
481 return false;
482 return true;
483 };
484 getActionDefinitionsBuilder(G_INDEXED_LOAD)
487 .legalIf(IndexedLoadBasicPred)
488 .unsupported();
489 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
490 .unsupportedIf(
492 .legalIf(all(typeInSet(0, {s16, s32, s64}),
493 LegalityPredicate([=](const LegalityQuery &Q) {
494 LLT LdTy = Q.Types[0];
495 LLT PtrTy = Q.Types[1];
496 LLT MemTy = Q.MMODescrs[0].MemoryTy;
497 if (PtrTy != p0)
498 return false;
499 if (LdTy == s16)
500 return MemTy == s8;
501 if (LdTy == s32)
502 return MemTy == s8 || MemTy == s16;
503 if (LdTy == s64)
504 return MemTy == s8 || MemTy == s16 || MemTy == s32;
505 return false;
506 })))
507 .unsupported();
508
509 // Constants
511 .legalFor({p0, s8, s16, s32, s64})
512 .widenScalarToNextPow2(0)
513 .clampScalar(0, s8, s64);
514 getActionDefinitionsBuilder(G_FCONSTANT)
515 .legalIf([=](const LegalityQuery &Query) {
516 const auto &Ty = Query.Types[0];
517 if (HasFP16 && Ty == s16)
518 return true;
519 return Ty == s32 || Ty == s64 || Ty == s128;
520 })
521 .clampScalar(0, MinFPScalar, s128);
522
523 // FIXME: fix moreElementsToNextPow2
525 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
527 .clampScalar(1, s32, s64)
528 .clampScalar(0, s32, s32)
529 .minScalarEltSameAsIf(
530 [=](const LegalityQuery &Query) {
531 const LLT &Ty = Query.Types[0];
532 const LLT &SrcTy = Query.Types[1];
533 return Ty.isVector() && !SrcTy.isPointerVector() &&
534 Ty.getElementType() != SrcTy.getElementType();
535 },
536 0, 1)
537 .minScalarOrEltIf(
538 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
539 1, s32)
540 .minScalarOrEltIf(
541 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
542 s64)
544 .clampNumElements(1, v8s8, v16s8)
545 .clampNumElements(1, v4s16, v8s16)
546 .clampNumElements(1, v2s32, v4s32)
547 .clampNumElements(1, v2s64, v2s64)
548 .customIf(isVector(0));
549
551 .legalFor({{s32, MinFPScalar},
552 {s32, s32},
553 {s32, s64},
554 {v4s32, v4s32},
555 {v2s32, v2s32},
556 {v2s64, v2s64}})
557 .legalIf([=](const LegalityQuery &Query) {
558 const auto &Ty = Query.Types[1];
559 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
560 })
562 .clampScalar(0, s32, s32)
563 .clampScalarOrElt(1, MinFPScalar, s64)
564 .minScalarEltSameAsIf(
565 [=](const LegalityQuery &Query) {
566 const LLT &Ty = Query.Types[0];
567 const LLT &SrcTy = Query.Types[1];
568 return Ty.isVector() && !SrcTy.isPointerVector() &&
569 Ty.getElementType() != SrcTy.getElementType();
570 },
571 0, 1)
572 .clampNumElements(1, v4s16, v8s16)
573 .clampNumElements(1, v2s32, v4s32)
574 .clampMaxNumElements(1, s64, 2)
575 .moreElementsToNextPow2(1);
576
577 // Extensions
578 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
579 unsigned DstSize = Query.Types[0].getSizeInBits();
580
581 // Handle legal vectors using legalFor
582 if (Query.Types[0].isVector())
583 return false;
584
585 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
586 return false; // Extending to a scalar s128 needs narrowing.
587
588 const LLT &SrcTy = Query.Types[1];
589
590 // Make sure we fit in a register otherwise. Don't bother checking that
591 // the source type is below 128 bits. We shouldn't be allowing anything
592 // through which is wider than the destination in the first place.
593 unsigned SrcSize = SrcTy.getSizeInBits();
594 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
595 return false;
596
597 return true;
598 };
599 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
600 .legalIf(ExtLegalFunc)
601 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
602 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
604 .clampMaxNumElements(1, s8, 8)
605 .clampMaxNumElements(1, s16, 4)
606 .clampMaxNumElements(1, s32, 2)
607 // Tries to convert a large EXTEND into two smaller EXTENDs
608 .lowerIf([=](const LegalityQuery &Query) {
609 return (Query.Types[0].getScalarSizeInBits() >
610 Query.Types[1].getScalarSizeInBits() * 2) &&
611 Query.Types[0].isVector() &&
612 (Query.Types[1].getScalarSizeInBits() == 8 ||
613 Query.Types[1].getScalarSizeInBits() == 16);
614 })
615 .clampMinNumElements(1, s8, 8)
616 .clampMinNumElements(1, s16, 4);
617
619 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
621 .clampMaxNumElements(0, s8, 8)
622 .clampMaxNumElements(0, s16, 4)
623 .clampMaxNumElements(0, s32, 2)
624 .minScalarOrEltIf(
625 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
626 0, s8)
627 .lowerIf([=](const LegalityQuery &Query) {
628 LLT DstTy = Query.Types[0];
629 LLT SrcTy = Query.Types[1];
630 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
631 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
632 })
633 .clampMinNumElements(0, s8, 8)
634 .clampMinNumElements(0, s16, 4)
635 .alwaysLegal();
636
637 getActionDefinitionsBuilder(G_SEXT_INREG)
638 .legalFor({s32, s64})
639 .legalFor(PackedVectorAllTypeList)
640 .maxScalar(0, s64)
641 .clampNumElements(0, v8s8, v16s8)
642 .clampNumElements(0, v4s16, v8s16)
643 .clampNumElements(0, v2s32, v4s32)
644 .clampMaxNumElements(0, s64, 2)
645 .lower();
646
647 // FP conversions
649 .legalFor(
650 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
651 .clampNumElements(0, v4s16, v4s16)
652 .clampNumElements(0, v2s32, v2s32)
653 .scalarize(0);
654
656 .legalFor(
657 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
658 .clampNumElements(0, v4s32, v4s32)
659 .clampNumElements(0, v2s64, v2s64)
660 .scalarize(0);
661
662 // Conversions
663 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
664 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
665 .legalIf([=](const LegalityQuery &Query) {
666 return HasFP16 &&
667 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
668 Query.Types[1] == v8s16) &&
669 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
670 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
671 })
672 .widenScalarToNextPow2(0)
673 .clampScalar(0, s32, s64)
675 .clampScalarOrElt(1, MinFPScalar, s64)
678 [=](const LegalityQuery &Query) {
679 return Query.Types[0].getScalarSizeInBits() >
680 Query.Types[1].getScalarSizeInBits();
681 },
683 .widenScalarIf(
684 [=](const LegalityQuery &Query) {
685 return Query.Types[0].getScalarSizeInBits() <
686 Query.Types[1].getScalarSizeInBits();
687 },
689 .clampNumElements(0, v4s16, v8s16)
690 .clampNumElements(0, v2s32, v4s32)
691 .clampMaxNumElements(0, s64, 2);
692
693 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
694 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
695 .legalIf([=](const LegalityQuery &Query) {
696 return HasFP16 &&
697 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
698 Query.Types[0] == v8s16) &&
699 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
700 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
701 })
702 .widenScalarToNextPow2(1)
703 .clampScalar(1, s32, s64)
705 .clampScalarOrElt(0, MinFPScalar, s64)
708 [=](const LegalityQuery &Query) {
709 return Query.Types[0].getScalarSizeInBits() <
710 Query.Types[1].getScalarSizeInBits();
711 },
713 .widenScalarIf(
714 [=](const LegalityQuery &Query) {
715 return Query.Types[0].getScalarSizeInBits() >
716 Query.Types[1].getScalarSizeInBits();
717 },
719 .clampNumElements(0, v4s16, v8s16)
720 .clampNumElements(0, v2s32, v4s32)
721 .clampMaxNumElements(0, s64, 2);
722
723 // Control-flow
725 .legalFor({s32})
726 .clampScalar(0, s32, s32);
727 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
728
730 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
731 .widenScalarToNextPow2(0)
732 .clampScalar(0, s32, s64)
733 .clampScalar(1, s32, s32)
735 .lowerIf(isVector(0));
736
737 // Pointer-handling
738 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
739
740 if (TM.getCodeModel() == CodeModel::Small)
741 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
742 else
743 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
744
746 .legalFor({{s64, p0}, {v2s64, v2p0}})
747 .widenScalarToNextPow2(0, 64)
748 .clampScalar(0, s64, s64);
749
751 .unsupportedIf([&](const LegalityQuery &Query) {
752 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
753 })
754 .legalFor({{p0, s64}, {v2p0, v2s64}});
755
756 // Casts for 32 and 64-bit width type are just copies.
757 // Same for 128-bit width type, except they are on the FPR bank.
759 // Keeping 32-bit instructions legal to prevent regression in some tests
760 .legalForCartesianProduct({s32, v2s16, v4s8})
761 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
762 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
763 .lowerIf([=](const LegalityQuery &Query) {
764 return Query.Types[0].isVector() != Query.Types[1].isVector();
765 })
767 .clampNumElements(0, v8s8, v16s8)
768 .clampNumElements(0, v4s16, v8s16)
769 .clampNumElements(0, v2s32, v4s32)
770 .lower();
771
772 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
773
774 // va_list must be a pointer, but most sized types are pretty easy to handle
775 // as the destination.
777 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
778 .clampScalar(0, s8, s64)
779 .widenScalarToNextPow2(0, /*Min*/ 8);
780
781 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
782 .lowerIf(
783 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
784
785 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
786 return ST.outlineAtomics() && !ST.hasLSE();
787 };
788
789 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
790 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
791 predNot(UseOutlineAtomics)))
792 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
793 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
794 return Query.Types[0].getSizeInBits() == 128 &&
795 !UseOutlineAtomics(Query);
796 })
797 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
798 UseOutlineAtomics))
799 .clampScalar(0, s32, s64);
800
801 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
802 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
803 G_ATOMICRMW_XOR})
804 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
805 predNot(UseOutlineAtomics)))
806 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
807 UseOutlineAtomics))
808 .clampScalar(0, s32, s64);
809
810 // Do not outline these atomics operations, as per comment in
811 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
813 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
814 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
815 .clampScalar(0, s32, s64);
816
817 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
818
819 // Merge/Unmerge
820 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
821 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
822 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
824 .widenScalarToNextPow2(LitTyIdx, 8)
825 .widenScalarToNextPow2(BigTyIdx, 32)
826 .clampScalar(LitTyIdx, s8, s64)
827 .clampScalar(BigTyIdx, s32, s128)
828 .legalIf([=](const LegalityQuery &Q) {
829 switch (Q.Types[BigTyIdx].getSizeInBits()) {
830 case 32:
831 case 64:
832 case 128:
833 break;
834 default:
835 return false;
836 }
837 switch (Q.Types[LitTyIdx].getSizeInBits()) {
838 case 8:
839 case 16:
840 case 32:
841 case 64:
842 return true;
843 default:
844 return false;
845 }
846 });
847 }
848
849 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
850 .unsupportedIf([=](const LegalityQuery &Query) {
851 const LLT &EltTy = Query.Types[1].getElementType();
852 return Query.Types[0] != EltTy;
853 })
854 .minScalar(2, s64)
855 .customIf([=](const LegalityQuery &Query) {
856 const LLT &VecTy = Query.Types[1];
857 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
858 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
859 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
860 })
861 .minScalarOrEltIf(
862 [=](const LegalityQuery &Query) {
863 // We want to promote to <M x s1> to <M x s64> if that wouldn't
864 // cause the total vec size to be > 128b.
865 return Query.Types[1].getNumElements() <= 2;
866 },
867 0, s64)
868 .minScalarOrEltIf(
869 [=](const LegalityQuery &Query) {
870 return Query.Types[1].getNumElements() <= 4;
871 },
872 0, s32)
873 .minScalarOrEltIf(
874 [=](const LegalityQuery &Query) {
875 return Query.Types[1].getNumElements() <= 8;
876 },
877 0, s16)
878 .minScalarOrEltIf(
879 [=](const LegalityQuery &Query) {
880 return Query.Types[1].getNumElements() <= 16;
881 },
882 0, s8)
883 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
885 .clampMaxNumElements(1, s64, 2)
886 .clampMaxNumElements(1, s32, 4)
887 .clampMaxNumElements(1, s16, 8)
888 .clampMaxNumElements(1, s8, 16)
889 .clampMaxNumElements(1, p0, 2);
890
891 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
892 .legalIf(
893 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
896 .clampNumElements(0, v8s8, v16s8)
897 .clampNumElements(0, v4s16, v8s16)
898 .clampNumElements(0, v2s32, v4s32)
899 .clampMaxNumElements(0, s64, 2)
900 .clampMaxNumElements(0, p0, 2);
901
902 getActionDefinitionsBuilder(G_BUILD_VECTOR)
903 .legalFor({{v8s8, s8},
904 {v16s8, s8},
905 {v4s16, s16},
906 {v8s16, s16},
907 {v2s32, s32},
908 {v4s32, s32},
909 {v2p0, p0},
910 {v2s64, s64}})
911 .clampNumElements(0, v4s32, v4s32)
912 .clampNumElements(0, v2s64, v2s64)
913 .minScalarOrElt(0, s8)
915 .minScalarSameAs(1, 0);
916
917 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
918
921 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
922 .scalarize(1)
923 .widenScalarToNextPow2(1, /*Min=*/32)
924 .clampScalar(1, s32, s64)
925 .scalarSameSizeAs(0, 1);
926 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
927
928 // TODO: Custom lowering for v2s32, v4s32, v2s64.
929 getActionDefinitionsBuilder(G_BITREVERSE)
930 .legalFor({s32, s64, v8s8, v16s8})
931 .widenScalarToNextPow2(0, /*Min = */ 32)
932 .clampScalar(0, s32, s64);
933
934 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
935
937 .lowerIf(isVector(0))
938 .widenScalarToNextPow2(1, /*Min=*/32)
939 .clampScalar(1, s32, s64)
940 .scalarSameSizeAs(0, 1)
941 .legalIf([=](const LegalityQuery &Query) {
942 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
943 })
944 .customIf([=](const LegalityQuery &Query) {
945 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
946 });
947
948 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
949 .legalIf([=](const LegalityQuery &Query) {
950 const LLT &DstTy = Query.Types[0];
951 const LLT &SrcTy = Query.Types[1];
952 // For now just support the TBL2 variant which needs the source vectors
953 // to be the same size as the dest.
954 if (DstTy != SrcTy)
955 return false;
956 return llvm::is_contained(
957 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
958 })
959 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
960 // just want those lowered into G_BUILD_VECTOR
961 .lowerIf([=](const LegalityQuery &Query) {
962 return !Query.Types[1].isVector();
963 })
964 .moreElementsIf(
965 [](const LegalityQuery &Query) {
966 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
967 Query.Types[0].getNumElements() >
968 Query.Types[1].getNumElements();
969 },
970 changeTo(1, 0))
972 .moreElementsIf(
973 [](const LegalityQuery &Query) {
974 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
975 Query.Types[0].getNumElements() <
976 Query.Types[1].getNumElements();
977 },
978 changeTo(0, 1))
979 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
980 .clampNumElements(0, v8s8, v16s8)
981 .clampNumElements(0, v4s16, v8s16)
982 .clampNumElements(0, v4s32, v4s32)
983 .clampNumElements(0, v2s64, v2s64);
984
985 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
986 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
987
988 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
989
990 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
991
992 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
993
994 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
995
996 if (ST.hasMOPS()) {
997 // G_BZERO is not supported. Currently it is only emitted by
998 // PreLegalizerCombiner for G_MEMSET with zero constant.
1000
1002 .legalForCartesianProduct({p0}, {s64}, {s64})
1003 .customForCartesianProduct({p0}, {s8}, {s64})
1004 .immIdx(0); // Inform verifier imm idx 0 is handled.
1005
1006 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1007 .legalForCartesianProduct({p0}, {p0}, {s64})
1008 .immIdx(0); // Inform verifier imm idx 0 is handled.
1009
1010 // G_MEMCPY_INLINE does not have a tailcall immediate
1011 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1012 .legalForCartesianProduct({p0}, {p0}, {s64});
1013
1014 } else {
1015 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1016 .libcall();
1017 }
1018
1019 // FIXME: Legal vector types are only legal with NEON.
1020 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
1021 if (HasCSSC)
1022 ABSActions
1023 .legalFor({s32, s64});
1024 ABSActions.legalFor(PackedVectorAllTypeList)
1025 .customIf([=](const LegalityQuery &Q) {
1026 // TODO: Fix suboptimal codegen for 128+ bit types.
1027 LLT SrcTy = Q.Types[0];
1028 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1029 })
1030 .widenScalarIf(
1031 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1032 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1033 .widenScalarIf(
1034 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1035 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1036 .clampNumElements(0, v8s8, v16s8)
1037 .clampNumElements(0, v4s16, v8s16)
1038 .clampNumElements(0, v2s32, v4s32)
1039 .clampNumElements(0, v2s64, v2s64)
1040 .moreElementsToNextPow2(0)
1041 .lower();
1042
1043 // For fadd reductions we have pairwise operations available. We treat the
1044 // usual legal types as legal and handle the lowering to pairwise instructions
1045 // later.
1046 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1047 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1048 .legalIf([=](const LegalityQuery &Query) {
1049 const auto &Ty = Query.Types[1];
1050 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1051 })
1052 .minScalarOrElt(0, MinFPScalar)
1053 .clampMaxNumElements(1, s64, 2)
1054 .clampMaxNumElements(1, s32, 4)
1055 .clampMaxNumElements(1, s16, 8)
1056 .lower();
1057
1058 // For fmul reductions we need to split up into individual operations. We
1059 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1060 // smaller types, followed by scalarizing what remains.
1061 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1062 .minScalarOrElt(0, MinFPScalar)
1063 .clampMaxNumElements(1, s64, 2)
1064 .clampMaxNumElements(1, s32, 4)
1065 .clampMaxNumElements(1, s16, 8)
1066 .clampMaxNumElements(1, s32, 2)
1067 .clampMaxNumElements(1, s16, 4)
1068 .scalarize(1)
1069 .lower();
1070
1071 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1072 .scalarize(2)
1073 .lower();
1074
1075 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1076 .legalFor({{s8, v16s8},
1077 {s8, v8s8},
1078 {s16, v8s16},
1079 {s16, v4s16},
1080 {s32, v4s32},
1081 {s32, v2s32},
1082 {s64, v2s64}})
1083 .clampMaxNumElements(1, s64, 2)
1084 .clampMaxNumElements(1, s32, 4)
1085 .clampMaxNumElements(1, s16, 8)
1086 .clampMaxNumElements(1, s8, 16)
1087 .lower();
1088
1089 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1090 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1091 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1092 .legalIf([=](const LegalityQuery &Query) {
1093 const auto &Ty = Query.Types[1];
1094 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1095 })
1096 .minScalarOrElt(0, MinFPScalar)
1097 .clampMaxNumElements(1, s64, 2)
1098 .clampMaxNumElements(1, s32, 4)
1099 .clampMaxNumElements(1, s16, 8)
1100 .lower();
1101
1102 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1103 .clampMaxNumElements(1, s32, 2)
1104 .clampMaxNumElements(1, s16, 4)
1105 .clampMaxNumElements(1, s8, 8)
1106 .scalarize(1)
1107 .lower();
1108
1110 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1111 .legalFor({{s8, v8s8},
1112 {s8, v16s8},
1113 {s16, v4s16},
1114 {s16, v8s16},
1115 {s32, v2s32},
1116 {s32, v4s32}})
1117 .moreElementsIf(
1118 [=](const LegalityQuery &Query) {
1119 return Query.Types[1].isVector() &&
1120 Query.Types[1].getElementType() != s8 &&
1121 Query.Types[1].getNumElements() & 1;
1122 },
1124 .clampMaxNumElements(1, s64, 2)
1125 .clampMaxNumElements(1, s32, 4)
1126 .clampMaxNumElements(1, s16, 8)
1127 .clampMaxNumElements(1, s8, 16)
1128 .scalarize(1)
1129 .lower();
1130
1132 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1133 // Try to break down into smaller vectors as long as they're at least 64
1134 // bits. This lets us use vector operations for some parts of the
1135 // reduction.
1136 .fewerElementsIf(
1137 [=](const LegalityQuery &Q) {
1138 LLT SrcTy = Q.Types[1];
1139 if (SrcTy.isScalar())
1140 return false;
1141 if (!isPowerOf2_32(SrcTy.getNumElements()))
1142 return false;
1143 // We can usually perform 64b vector operations.
1144 return SrcTy.getSizeInBits() > 64;
1145 },
1146 [=](const LegalityQuery &Q) {
1147 LLT SrcTy = Q.Types[1];
1148 return std::make_pair(1, SrcTy.divide(2));
1149 })
1150 .scalarize(1)
1151 .lower();
1152
1153 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1154 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1155 .lower();
1156
1158 .legalFor({{s32, s64}, {s64, s64}})
1159 .customIf([=](const LegalityQuery &Q) {
1160 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1161 })
1162 .lower();
1164
1165 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1166 .customFor({{s32, s32}, {s64, s64}});
1167
1168 auto always = [=](const LegalityQuery &Q) { return true; };
1169 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1170 if (HasCSSC)
1171 CTPOPActions
1172 .legalFor({{s32, s32},
1173 {s64, s64},
1174 {v8s8, v8s8},
1175 {v16s8, v16s8}})
1176 .customFor({{s128, s128},
1177 {v2s64, v2s64},
1178 {v2s32, v2s32},
1179 {v4s32, v4s32},
1180 {v4s16, v4s16},
1181 {v8s16, v8s16}});
1182 else
1183 CTPOPActions
1184 .legalFor({{v8s8, v8s8},
1185 {v16s8, v16s8}})
1186 .customFor({{s32, s32},
1187 {s64, s64},
1188 {s128, s128},
1189 {v2s64, v2s64},
1190 {v2s32, v2s32},
1191 {v4s32, v4s32},
1192 {v4s16, v4s16},
1193 {v8s16, v8s16}});
1194 CTPOPActions
1195 .clampScalar(0, s32, s128)
1196 .widenScalarToNextPow2(0)
1197 .minScalarEltSameAsIf(always, 1, 0)
1198 .maxScalarEltSameAsIf(always, 1, 0);
1199
1200 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1201 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1202 .clampNumElements(0, v8s8, v16s8)
1203 .clampNumElements(0, v4s16, v8s16)
1204 .clampNumElements(0, v2s32, v4s32)
1205 .clampMaxNumElements(0, s64, 2)
1207 .lower();
1208
1209 // TODO: Libcall support for s128.
1210 // TODO: s16 should be legal with full FP16 support.
1211 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1212 .legalFor({{s64, s32}, {s64, s64}});
1213
1214 // TODO: Custom legalization for mismatched types.
1215 getActionDefinitionsBuilder(G_FCOPYSIGN)
1217 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1218 [=](const LegalityQuery &Query) {
1219 const LLT Ty = Query.Types[0];
1220 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1221 })
1222 .lower();
1223
1225
1226 // Access to floating-point environment.
1227 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1228 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1229 .libcall();
1230
1231 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1232
1233 getActionDefinitionsBuilder(G_PREFETCH).custom();
1234
1236 verify(*ST.getInstrInfo());
1237}
1238
1241 LostDebugLocObserver &LocObserver) const {
1242 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1243 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1244 GISelChangeObserver &Observer = Helper.Observer;
1245 switch (MI.getOpcode()) {
1246 default:
1247 // No idea what to do.
1248 return false;
1249 case TargetOpcode::G_VAARG:
1250 return legalizeVaArg(MI, MRI, MIRBuilder);
1251 case TargetOpcode::G_LOAD:
1252 case TargetOpcode::G_STORE:
1253 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1254 case TargetOpcode::G_SHL:
1255 case TargetOpcode::G_ASHR:
1256 case TargetOpcode::G_LSHR:
1257 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1258 case TargetOpcode::G_GLOBAL_VALUE:
1259 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1260 case TargetOpcode::G_SBFX:
1261 case TargetOpcode::G_UBFX:
1262 return legalizeBitfieldExtract(MI, MRI, Helper);
1263 case TargetOpcode::G_FSHL:
1264 case TargetOpcode::G_FSHR:
1265 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1266 case TargetOpcode::G_ROTR:
1267 return legalizeRotate(MI, MRI, Helper);
1268 case TargetOpcode::G_CTPOP:
1269 return legalizeCTPOP(MI, MRI, Helper);
1270 case TargetOpcode::G_ATOMIC_CMPXCHG:
1271 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1272 case TargetOpcode::G_CTTZ:
1273 return legalizeCTTZ(MI, Helper);
1274 case TargetOpcode::G_BZERO:
1275 case TargetOpcode::G_MEMCPY:
1276 case TargetOpcode::G_MEMMOVE:
1277 case TargetOpcode::G_MEMSET:
1278 return legalizeMemOps(MI, Helper);
1279 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1280 return legalizeExtractVectorElt(MI, MRI, Helper);
1281 case TargetOpcode::G_DYN_STACKALLOC:
1282 return legalizeDynStackAlloc(MI, Helper);
1283 case TargetOpcode::G_PREFETCH:
1284 return legalizePrefetch(MI, Helper);
1285 case TargetOpcode::G_ABS:
1286 return Helper.lowerAbsToCNeg(MI);
1287 case TargetOpcode::G_ICMP:
1288 return legalizeICMP(MI, MRI, MIRBuilder);
1289 }
1290
1291 llvm_unreachable("expected switch to return");
1292}
1293
1294bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1296 MachineIRBuilder &MIRBuilder,
1297 GISelChangeObserver &Observer,
1298 LegalizerHelper &Helper) const {
1299 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1300 MI.getOpcode() == TargetOpcode::G_FSHR);
1301
1302 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1303 // lowering
1304 Register ShiftNo = MI.getOperand(3).getReg();
1305 LLT ShiftTy = MRI.getType(ShiftNo);
1306 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1307
1308 // Adjust shift amount according to Opcode (FSHL/FSHR)
1309 // Convert FSHL to FSHR
1310 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1311 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1312
1313 // Lower non-constant shifts and leave zero shifts to the optimizer.
1314 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1315 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1317
1318 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1319
1320 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1321
1322 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1323 // in the range of 0 <-> BitWidth, it is legal
1324 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1325 VRegAndVal->Value.ult(BitWidth))
1326 return true;
1327
1328 // Cast the ShiftNumber to a 64-bit type
1329 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1330
1331 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1332 Observer.changingInstr(MI);
1333 MI.getOperand(3).setReg(Cast64.getReg(0));
1334 Observer.changedInstr(MI);
1335 }
1336 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1337 // instruction
1338 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1339 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1340 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1341 Cast64.getReg(0)});
1342 MI.eraseFromParent();
1343 }
1344 return true;
1345}
1346
1347bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1349 MachineIRBuilder &MIRBuilder) const {
1350 Register DstReg = MI.getOperand(0).getReg();
1351 Register SrcReg1 = MI.getOperand(2).getReg();
1352 Register SrcReg2 = MI.getOperand(3).getReg();
1353 LLT DstTy = MRI.getType(DstReg);
1354 LLT SrcTy = MRI.getType(SrcReg1);
1355
1356 // Check the vector types are legal
1357 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1358 DstTy.getNumElements() != SrcTy.getNumElements() ||
1359 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1360 return false;
1361
1362 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1363 // following passes
1364 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1365 if (Pred != CmpInst::ICMP_NE)
1366 return true;
1367 Register CmpReg =
1368 MIRBuilder
1369 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1370 .getReg(0);
1371 MIRBuilder.buildNot(DstReg, CmpReg);
1372
1373 MI.eraseFromParent();
1374 return true;
1375}
1376
1377bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1379 LegalizerHelper &Helper) const {
1380 // To allow for imported patterns to match, we ensure that the rotate amount
1381 // is 64b with an extension.
1382 Register AmtReg = MI.getOperand(2).getReg();
1383 LLT AmtTy = MRI.getType(AmtReg);
1384 (void)AmtTy;
1385 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1386 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1387 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1388 Helper.Observer.changingInstr(MI);
1389 MI.getOperand(2).setReg(NewAmt.getReg(0));
1390 Helper.Observer.changedInstr(MI);
1391 return true;
1392}
1393
1394bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1396 GISelChangeObserver &Observer) const {
1397 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1398 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1399 // G_ADD_LOW instructions.
1400 // By splitting this here, we can optimize accesses in the small code model by
1401 // folding in the G_ADD_LOW into the load/store offset.
1402 auto &GlobalOp = MI.getOperand(1);
1403 // Don't modify an intrinsic call.
1404 if (GlobalOp.isSymbol())
1405 return true;
1406 const auto* GV = GlobalOp.getGlobal();
1407 if (GV->isThreadLocal())
1408 return true; // Don't want to modify TLS vars.
1409
1410 auto &TM = ST->getTargetLowering()->getTargetMachine();
1411 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1412
1413 if (OpFlags & AArch64II::MO_GOT)
1414 return true;
1415
1416 auto Offset = GlobalOp.getOffset();
1417 Register DstReg = MI.getOperand(0).getReg();
1418 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1419 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1420 // Set the regclass on the dest reg too.
1421 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1422
1423 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1424 // by creating a MOVK that sets bits 48-63 of the register to (global address
1425 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1426 // prevent an incorrect tag being generated during relocation when the
1427 // global appears before the code section. Without the offset, a global at
1428 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1429 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1430 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1431 // instead of `0xf`.
1432 // This assumes that we're in the small code model so we can assume a binary
1433 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1434 // binary must also be loaded into address range [0, 2^48). Both of these
1435 // properties need to be ensured at runtime when using tagged addresses.
1436 if (OpFlags & AArch64II::MO_TAGGED) {
1437 assert(!Offset &&
1438 "Should not have folded in an offset for a tagged global!");
1439 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1440 .addGlobalAddress(GV, 0x100000000,
1442 .addImm(48);
1443 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1444 }
1445
1446 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1447 .addGlobalAddress(GV, Offset,
1449 MI.eraseFromParent();
1450 return true;
1451}
1452
1454 MachineInstr &MI) const {
1455 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1456 switch (IntrinsicID) {
1457 case Intrinsic::vacopy: {
1458 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1459 unsigned VaListSize =
1460 (ST->isTargetDarwin() || ST->isTargetWindows())
1461 ? PtrSize
1462 : ST->isTargetILP32() ? 20 : 32;
1463
1464 MachineFunction &MF = *MI.getMF();
1466 LLT::scalar(VaListSize * 8));
1467 MachineIRBuilder MIB(MI);
1468 MIB.buildLoad(Val, MI.getOperand(2),
1471 VaListSize, Align(PtrSize)));
1472 MIB.buildStore(Val, MI.getOperand(1),
1475 VaListSize, Align(PtrSize)));
1476 MI.eraseFromParent();
1477 return true;
1478 }
1479 case Intrinsic::get_dynamic_area_offset: {
1480 MachineIRBuilder &MIB = Helper.MIRBuilder;
1481 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1482 MI.eraseFromParent();
1483 return true;
1484 }
1485 case Intrinsic::aarch64_mops_memset_tag: {
1486 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1487 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1488 // the instruction).
1489 MachineIRBuilder MIB(MI);
1490 auto &Value = MI.getOperand(3);
1491 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1492 Value.setReg(ExtValueReg);
1493 return true;
1494 }
1495 case Intrinsic::aarch64_prefetch: {
1496 MachineIRBuilder MIB(MI);
1497 auto &AddrVal = MI.getOperand(1);
1498
1499 int64_t IsWrite = MI.getOperand(2).getImm();
1500 int64_t Target = MI.getOperand(3).getImm();
1501 int64_t IsStream = MI.getOperand(4).getImm();
1502 int64_t IsData = MI.getOperand(5).getImm();
1503
1504 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1505 (!IsData << 3) | // IsDataCache bit
1506 (Target << 1) | // Cache level bits
1507 (unsigned)IsStream; // Stream bit
1508
1509 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1510 MI.eraseFromParent();
1511 return true;
1512 }
1513 case Intrinsic::aarch64_neon_uaddv:
1514 case Intrinsic::aarch64_neon_saddv:
1515 case Intrinsic::aarch64_neon_umaxv:
1516 case Intrinsic::aarch64_neon_smaxv:
1517 case Intrinsic::aarch64_neon_uminv:
1518 case Intrinsic::aarch64_neon_sminv: {
1519 MachineIRBuilder MIB(MI);
1520 MachineRegisterInfo &MRI = *MIB.getMRI();
1521 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1522 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1523 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1524
1525 auto OldDst = MI.getOperand(0).getReg();
1526 auto OldDstTy = MRI.getType(OldDst);
1527 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1528 if (OldDstTy == NewDstTy)
1529 return true;
1530
1531 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1532
1533 Helper.Observer.changingInstr(MI);
1534 MI.getOperand(0).setReg(NewDst);
1535 Helper.Observer.changedInstr(MI);
1536
1537 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1538 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1539 OldDst, NewDst);
1540
1541 return true;
1542 }
1543 case Intrinsic::aarch64_neon_uaddlp:
1544 case Intrinsic::aarch64_neon_saddlp: {
1545 MachineIRBuilder MIB(MI);
1546
1547 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1548 ? AArch64::G_UADDLP
1549 : AArch64::G_SADDLP;
1550 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1551 MI.eraseFromParent();
1552
1553 return true;
1554 }
1555 case Intrinsic::aarch64_neon_uaddlv:
1556 case Intrinsic::aarch64_neon_saddlv: {
1557 MachineIRBuilder MIB(MI);
1558 MachineRegisterInfo &MRI = *MIB.getMRI();
1559
1560 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1561 ? AArch64::G_UADDLV
1562 : AArch64::G_SADDLV;
1563 Register DstReg = MI.getOperand(0).getReg();
1564 Register SrcReg = MI.getOperand(2).getReg();
1565 LLT DstTy = MRI.getType(DstReg);
1566
1567 LLT MidTy, ExtTy;
1568 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1569 MidTy = LLT::fixed_vector(4, 32);
1570 ExtTy = LLT::scalar(32);
1571 } else {
1572 MidTy = LLT::fixed_vector(2, 64);
1573 ExtTy = LLT::scalar(64);
1574 }
1575
1576 Register MidReg =
1577 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1578 Register ZeroReg =
1579 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1580 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1581 {MidReg, ZeroReg})
1582 .getReg(0);
1583
1584 if (DstTy.getScalarSizeInBits() < 32)
1585 MIB.buildTrunc(DstReg, ExtReg);
1586 else
1587 MIB.buildCopy(DstReg, ExtReg);
1588
1589 MI.eraseFromParent();
1590
1591 return true;
1592 }
1593 case Intrinsic::aarch64_neon_smax:
1594 case Intrinsic::aarch64_neon_smin:
1595 case Intrinsic::aarch64_neon_umax:
1596 case Intrinsic::aarch64_neon_umin:
1597 case Intrinsic::aarch64_neon_fmax:
1598 case Intrinsic::aarch64_neon_fmin:
1599 case Intrinsic::aarch64_neon_fmaxnm:
1600 case Intrinsic::aarch64_neon_fminnm: {
1601 MachineIRBuilder MIB(MI);
1602 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1603 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1604 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1605 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1606 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1607 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1608 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1609 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1610 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1611 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1612 {MI.getOperand(2), MI.getOperand(3)});
1613 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1614 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1615 {MI.getOperand(2), MI.getOperand(3)});
1616 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1617 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1618 {MI.getOperand(2), MI.getOperand(3)});
1619 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1620 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1621 {MI.getOperand(2), MI.getOperand(3)});
1622 MI.eraseFromParent();
1623 return true;
1624 }
1625 case Intrinsic::vector_reverse:
1626 // TODO: Add support for vector_reverse
1627 return false;
1628 }
1629
1630 return true;
1631}
1632
1633bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1635 GISelChangeObserver &Observer) const {
1636 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1637 MI.getOpcode() == TargetOpcode::G_LSHR ||
1638 MI.getOpcode() == TargetOpcode::G_SHL);
1639 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1640 // imported patterns can select it later. Either way, it will be legal.
1641 Register AmtReg = MI.getOperand(2).getReg();
1642 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1643 if (!VRegAndVal)
1644 return true;
1645 // Check the shift amount is in range for an immediate form.
1646 int64_t Amount = VRegAndVal->Value.getSExtValue();
1647 if (Amount > 31)
1648 return true; // This will have to remain a register variant.
1649 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1650 Observer.changingInstr(MI);
1651 MI.getOperand(2).setReg(ExtCst.getReg(0));
1652 Observer.changedInstr(MI);
1653 return true;
1654}
1655
1658 Base = Root;
1659 Offset = 0;
1660
1661 Register NewBase;
1662 int64_t NewOffset;
1663 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1664 isShiftedInt<7, 3>(NewOffset)) {
1665 Base = NewBase;
1666 Offset = NewOffset;
1667 }
1668}
1669
1670// FIXME: This should be removed and replaced with the generic bitcast legalize
1671// action.
1672bool AArch64LegalizerInfo::legalizeLoadStore(
1674 GISelChangeObserver &Observer) const {
1675 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1676 MI.getOpcode() == TargetOpcode::G_LOAD);
1677 // Here we just try to handle vector loads/stores where our value type might
1678 // have pointer elements, which the SelectionDAG importer can't handle. To
1679 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1680 // the value to use s64 types.
1681
1682 // Custom legalization requires the instruction, if not deleted, must be fully
1683 // legalized. In order to allow further legalization of the inst, we create
1684 // a new instruction and erase the existing one.
1685
1686 Register ValReg = MI.getOperand(0).getReg();
1687 const LLT ValTy = MRI.getType(ValReg);
1688
1689 if (ValTy == LLT::scalar(128)) {
1690
1691 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1692 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1693 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1694 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1695 bool IsRcpC3 =
1696 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1697
1698 LLT s64 = LLT::scalar(64);
1699
1700 unsigned Opcode;
1701 if (IsRcpC3) {
1702 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1703 } else {
1704 // For LSE2, loads/stores should have been converted to monotonic and had
1705 // a fence inserted after them.
1706 assert(Ordering == AtomicOrdering::Monotonic ||
1707 Ordering == AtomicOrdering::Unordered);
1708 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1709
1710 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1711 }
1712
1714 if (IsLoad) {
1715 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1716 MIRBuilder.buildMergeLikeInstr(
1717 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1718 } else {
1719 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1720 NewI = MIRBuilder.buildInstr(
1721 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1722 }
1723
1724 if (IsRcpC3) {
1725 NewI.addUse(MI.getOperand(1).getReg());
1726 } else {
1727 Register Base;
1728 int Offset;
1729 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1730 NewI.addUse(Base);
1731 NewI.addImm(Offset / 8);
1732 }
1733
1734 NewI.cloneMemRefs(MI);
1736 *MRI.getTargetRegisterInfo(),
1737 *ST->getRegBankInfo());
1738 MI.eraseFromParent();
1739 return true;
1740 }
1741
1742 if (!ValTy.isPointerVector() ||
1743 ValTy.getElementType().getAddressSpace() != 0) {
1744 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1745 return false;
1746 }
1747
1748 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1749 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1750 auto &MMO = **MI.memoperands_begin();
1751 MMO.setType(NewTy);
1752
1753 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1754 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1755 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1756 } else {
1757 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1758 MIRBuilder.buildBitcast(ValReg, NewLoad);
1759 }
1760 MI.eraseFromParent();
1761 return true;
1762}
1763
1764bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1766 MachineIRBuilder &MIRBuilder) const {
1767 MachineFunction &MF = MIRBuilder.getMF();
1768 Align Alignment(MI.getOperand(2).getImm());
1769 Register Dst = MI.getOperand(0).getReg();
1770 Register ListPtr = MI.getOperand(1).getReg();
1771
1772 LLT PtrTy = MRI.getType(ListPtr);
1773 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1774
1775 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1776 const Align PtrAlign = Align(PtrSize);
1777 auto List = MIRBuilder.buildLoad(
1778 PtrTy, ListPtr,
1780 PtrTy, PtrAlign));
1781
1782 MachineInstrBuilder DstPtr;
1783 if (Alignment > PtrAlign) {
1784 // Realign the list to the actual required alignment.
1785 auto AlignMinus1 =
1786 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1787 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1788 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1789 } else
1790 DstPtr = List;
1791
1792 LLT ValTy = MRI.getType(Dst);
1793 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1794 MIRBuilder.buildLoad(
1795 Dst, DstPtr,
1797 ValTy, std::max(Alignment, PtrAlign)));
1798
1799 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1800
1801 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1802
1803 MIRBuilder.buildStore(NewList, ListPtr,
1806 PtrTy, PtrAlign));
1807
1808 MI.eraseFromParent();
1809 return true;
1810}
1811
1812bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1814 // Only legal if we can select immediate forms.
1815 // TODO: Lower this otherwise.
1816 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1817 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1818}
1819
1820bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1822 LegalizerHelper &Helper) const {
1823 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1824 // it can be more efficiently lowered to the following sequence that uses
1825 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1826 // registers are cheap.
1827 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1828 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1829 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1830 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1831 //
1832 // For 128 bit vector popcounts, we lower to the following sequence:
1833 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1834 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1835 // uaddlp.4s v0, v0 // v4s32, v2s64
1836 // uaddlp.2d v0, v0 // v2s64
1837 //
1838 // For 64 bit vector popcounts, we lower to the following sequence:
1839 // cnt.8b v0, v0 // v4s16, v2s32
1840 // uaddlp.4h v0, v0 // v4s16, v2s32
1841 // uaddlp.2s v0, v0 // v2s32
1842
1843 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1844 Register Dst = MI.getOperand(0).getReg();
1845 Register Val = MI.getOperand(1).getReg();
1846 LLT Ty = MRI.getType(Val);
1847 unsigned Size = Ty.getSizeInBits();
1848
1849 assert(Ty == MRI.getType(Dst) &&
1850 "Expected src and dst to have the same type!");
1851
1852 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1853 LLT s64 = LLT::scalar(64);
1854
1855 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1856 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1857 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1858 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1859
1860 MIRBuilder.buildZExt(Dst, Add);
1861 MI.eraseFromParent();
1862 return true;
1863 }
1864
1865 if (!ST->hasNEON() ||
1866 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1867 // Use generic lowering when custom lowering is not possible.
1868 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1869 Helper.lowerBitCount(MI) ==
1871 }
1872
1873 // Pre-conditioning: widen Val up to the nearest vector type.
1874 // s32,s64,v4s16,v2s32 -> v8i8
1875 // v8s16,v4s32,v2s64 -> v16i8
1876 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1877 if (Ty.isScalar()) {
1878 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1879 if (Size == 32) {
1880 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1881 }
1882 }
1883 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1884
1885 // Count bits in each byte-sized lane.
1886 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1887
1888 // Sum across lanes.
1889 Register HSum = CTPOP.getReg(0);
1890 unsigned Opc;
1891 SmallVector<LLT> HAddTys;
1892 if (Ty.isScalar()) {
1893 Opc = Intrinsic::aarch64_neon_uaddlv;
1894 HAddTys.push_back(LLT::scalar(32));
1895 } else if (Ty == LLT::fixed_vector(8, 16)) {
1896 Opc = Intrinsic::aarch64_neon_uaddlp;
1897 HAddTys.push_back(LLT::fixed_vector(8, 16));
1898 } else if (Ty == LLT::fixed_vector(4, 32)) {
1899 Opc = Intrinsic::aarch64_neon_uaddlp;
1900 HAddTys.push_back(LLT::fixed_vector(8, 16));
1901 HAddTys.push_back(LLT::fixed_vector(4, 32));
1902 } else if (Ty == LLT::fixed_vector(2, 64)) {
1903 Opc = Intrinsic::aarch64_neon_uaddlp;
1904 HAddTys.push_back(LLT::fixed_vector(8, 16));
1905 HAddTys.push_back(LLT::fixed_vector(4, 32));
1906 HAddTys.push_back(LLT::fixed_vector(2, 64));
1907 } else if (Ty == LLT::fixed_vector(4, 16)) {
1908 Opc = Intrinsic::aarch64_neon_uaddlp;
1909 HAddTys.push_back(LLT::fixed_vector(4, 16));
1910 } else if (Ty == LLT::fixed_vector(2, 32)) {
1911 Opc = Intrinsic::aarch64_neon_uaddlp;
1912 HAddTys.push_back(LLT::fixed_vector(4, 16));
1913 HAddTys.push_back(LLT::fixed_vector(2, 32));
1914 } else
1915 llvm_unreachable("unexpected vector shape");
1917 for (LLT HTy : HAddTys) {
1918 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
1919 HSum = UADD.getReg(0);
1920 }
1921
1922 // Post-conditioning.
1923 if (Ty.isScalar() && (Size == 64 || Size == 128))
1924 MIRBuilder.buildZExt(Dst, UADD);
1925 else
1926 UADD->getOperand(0).setReg(Dst);
1927 MI.eraseFromParent();
1928 return true;
1929}
1930
1931bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1933 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1934 LLT s64 = LLT::scalar(64);
1935 auto Addr = MI.getOperand(1).getReg();
1936 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1937 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1938 auto DstLo = MRI.createGenericVirtualRegister(s64);
1939 auto DstHi = MRI.createGenericVirtualRegister(s64);
1940
1942 if (ST->hasLSE()) {
1943 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1944 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1945 // the rest of the MIR so we must reassemble the extracted registers into a
1946 // 128-bit known-regclass one with code like this:
1947 //
1948 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1949 // %out = CASP %in1, ...
1950 // %OldLo = G_EXTRACT %out, 0
1951 // %OldHi = G_EXTRACT %out, 64
1952 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1953 unsigned Opcode;
1954 switch (Ordering) {
1956 Opcode = AArch64::CASPAX;
1957 break;
1959 Opcode = AArch64::CASPLX;
1960 break;
1963 Opcode = AArch64::CASPALX;
1964 break;
1965 default:
1966 Opcode = AArch64::CASPX;
1967 break;
1968 }
1969
1970 LLT s128 = LLT::scalar(128);
1971 auto CASDst = MRI.createGenericVirtualRegister(s128);
1972 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1973 auto CASNew = MRI.createGenericVirtualRegister(s128);
1974 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1975 .addUse(DesiredI->getOperand(0).getReg())
1976 .addImm(AArch64::sube64)
1977 .addUse(DesiredI->getOperand(1).getReg())
1978 .addImm(AArch64::subo64);
1979 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1980 .addUse(NewI->getOperand(0).getReg())
1981 .addImm(AArch64::sube64)
1982 .addUse(NewI->getOperand(1).getReg())
1983 .addImm(AArch64::subo64);
1984
1985 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1986
1987 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1988 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1989 } else {
1990 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1991 // can take arbitrary registers so it just has the normal GPR64 operands the
1992 // rest of AArch64 is expecting.
1993 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1994 unsigned Opcode;
1995 switch (Ordering) {
1997 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1998 break;
2000 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2001 break;
2004 Opcode = AArch64::CMP_SWAP_128;
2005 break;
2006 default:
2007 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2008 break;
2009 }
2010
2011 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2012 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2013 {Addr, DesiredI->getOperand(0),
2014 DesiredI->getOperand(1), NewI->getOperand(0),
2015 NewI->getOperand(1)});
2016 }
2017
2018 CAS.cloneMemRefs(MI);
2020 *MRI.getTargetRegisterInfo(),
2021 *ST->getRegBankInfo());
2022
2023 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2024 MI.eraseFromParent();
2025 return true;
2026}
2027
2028bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2029 LegalizerHelper &Helper) const {
2030 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2031 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2032 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2033 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2034 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2035 MI.eraseFromParent();
2036 return true;
2037}
2038
2039bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2040 LegalizerHelper &Helper) const {
2041 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2042
2043 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2044 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2045 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2046 // the instruction).
2047 auto &Value = MI.getOperand(1);
2048 Register ExtValueReg =
2049 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2050 Value.setReg(ExtValueReg);
2051 return true;
2052 }
2053
2054 return false;
2055}
2056
2057bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2059 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2060 auto VRegAndVal =
2061 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2062 if (VRegAndVal)
2063 return true;
2064 return Helper.lowerExtractInsertVectorElt(MI) !=
2066}
2067
2068bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2069 MachineInstr &MI, LegalizerHelper &Helper) const {
2070 MachineFunction &MF = *MI.getParent()->getParent();
2071 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2072 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2073
2074 // If stack probing is not enabled for this function, use the default
2075 // lowering.
2076 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2077 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2078 "inline-asm") {
2079 Helper.lowerDynStackAlloc(MI);
2080 return true;
2081 }
2082
2083 Register Dst = MI.getOperand(0).getReg();
2084 Register AllocSize = MI.getOperand(1).getReg();
2085 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2086
2087 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2088 "Unexpected type for dynamic alloca");
2089 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2090 "Unexpected type for dynamic alloca");
2091
2092 LLT PtrTy = MRI.getType(Dst);
2093 Register SPReg =
2095 Register SPTmp =
2096 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2097 auto NewMI =
2098 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2099 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2100 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2101 MIRBuilder.buildCopy(Dst, SPTmp);
2102
2103 MI.eraseFromParent();
2104 return true;
2105}
2106
2107bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2108 LegalizerHelper &Helper) const {
2109 MachineIRBuilder &MIB = Helper.MIRBuilder;
2110 auto &AddrVal = MI.getOperand(0);
2111
2112 int64_t IsWrite = MI.getOperand(1).getImm();
2113 int64_t Locality = MI.getOperand(2).getImm();
2114 int64_t IsData = MI.getOperand(3).getImm();
2115
2116 bool IsStream = Locality == 0;
2117 if (Locality != 0) {
2118 assert(Locality <= 3 && "Prefetch locality out-of-range");
2119 // The locality degree is the opposite of the cache speed.
2120 // Put the number the other way around.
2121 // The encoding starts at 0 for level 1
2122 Locality = 3 - Locality;
2123 }
2124
2125 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2126
2127 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2128 MI.eraseFromParent();
2129 return true;
2130}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1521
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:706
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:680
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:574
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...