LLVM 18.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
64 v16s8, v8s16, v4s32,
65 v2s64, v2p0,
66 /* End 128bit types */
67 /* Begin 64bit types */
68 v8s8, v4s16, v2s32};
69 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
70 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
71 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
72
73 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
74
75 // FIXME: support subtargets which have neon/fp-armv8 disabled.
76 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
78 return;
79 }
80
81 // Some instructions only support s16 if the subtarget has full 16-bit FP
82 // support.
83 const bool HasFP16 = ST.hasFullFP16();
84 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
85
86 const bool HasCSSC = ST.hasCSSC();
87 const bool HasRCPC3 = ST.hasRCPC3();
88
90 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
91 .legalFor({p0, s8, s16, s32, s64})
92 .legalFor(PackedVectorAllTypeList)
94 .clampScalar(0, s8, s64)
96 [=](const LegalityQuery &Query) {
97 return Query.Types[0].isVector() &&
98 (Query.Types[0].getElementType() != s64 ||
99 Query.Types[0].getNumElements() != 2);
100 },
101 [=](const LegalityQuery &Query) {
102 LLT EltTy = Query.Types[0].getElementType();
103 if (EltTy == s64)
104 return std::make_pair(0, LLT::fixed_vector(2, 64));
105 return std::make_pair(0, EltTy);
106 });
107
109 .legalFor({p0, s16, s32, s64})
110 .legalFor(PackedVectorAllTypeList)
112 .clampScalar(0, s16, s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(0, s8, 16)
115 .clampMaxNumElements(0, s16, 8)
116 .clampMaxNumElements(0, s32, 4)
117 .clampMaxNumElements(0, s64, 2)
118 .clampMaxNumElements(0, p0, 2);
119
121 .legalFor({s32, s64, v4s32, v2s32, v2s64})
122 .widenScalarToNextPow2(0)
123 .clampScalar(0, s32, s64);
124
125 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
126 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
129 .clampMaxNumElements(0, s8, 16)
130 .clampMaxNumElements(0, s16, 8)
131 .clampNumElements(0, v2s32, v4s32)
132 .clampNumElements(0, v2s64, v2s64)
134 [=](const LegalityQuery &Query) {
135 return Query.Types[0].getNumElements() <= 2;
136 },
137 0, s32)
138 .minScalarOrEltIf(
139 [=](const LegalityQuery &Query) {
140 return Query.Types[0].getNumElements() <= 4;
141 },
142 0, s16)
143 .minScalarOrEltIf(
144 [=](const LegalityQuery &Query) {
145 return Query.Types[0].getNumElements() <= 16;
146 },
147 0, s8)
149
150 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
151 .customIf([=](const LegalityQuery &Query) {
152 const auto &SrcTy = Query.Types[0];
153 const auto &AmtTy = Query.Types[1];
154 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
155 AmtTy.getSizeInBits() == 32;
156 })
157 .legalFor({
158 {s32, s32},
159 {s32, s64},
160 {s64, s64},
161 {v8s8, v8s8},
162 {v16s8, v16s8},
163 {v4s16, v4s16},
164 {v8s16, v8s16},
165 {v2s32, v2s32},
166 {v4s32, v4s32},
167 {v2s64, v2s64},
168 })
169 .widenScalarToNextPow2(0)
170 .clampScalar(1, s32, s64)
171 .clampScalar(0, s32, s64)
172 .clampNumElements(0, v2s32, v4s32)
173 .clampNumElements(0, v2s64, v2s64)
175 .minScalarSameAs(1, 0);
176
178 .legalFor({{p0, s64}, {v2p0, v2s64}})
179 .clampScalar(1, s64, s64);
180
181 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
182
183 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
184 .legalFor({s32, s64})
185 .libcallFor({s128})
186 .clampScalar(0, s32, s64)
188 .scalarize(0);
189
190 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
191 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
193 .clampScalarOrElt(0, s32, s64)
194 .clampNumElements(0, v2s32, v4s32)
195 .clampNumElements(0, v2s64, v2s64)
196 .moreElementsToNextPow2(0);
197
198
199 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
200 .widenScalarToNextPow2(0, /*Min = */ 32)
201 .clampScalar(0, s32, s64)
202 .lower();
203
204 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
205 .legalFor({s64, v8s16, v16s8, v4s32})
206 .lower();
207
208 auto &MinMaxActions = getActionDefinitionsBuilder(
209 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
210 if (HasCSSC)
211 MinMaxActions
212 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
213 // Making clamping conditional on CSSC extension as without legal types we
214 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
215 // if we detect a type smaller than 32-bit.
216 .minScalar(0, s32);
217 else
218 MinMaxActions
219 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
220 MinMaxActions
221 .clampNumElements(0, v8s8, v16s8)
222 .clampNumElements(0, v4s16, v8s16)
223 .clampNumElements(0, v2s32, v4s32)
224 // FIXME: This sholdn't be needed as v2s64 types are going to
225 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
226 .clampNumElements(0, v2s64, v2s64)
227 .lower();
228
230 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
231 .legalFor({{s32, s32}, {s64, s32}})
232 .clampScalar(0, s32, s64)
233 .clampScalar(1, s32, s64)
235
236 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
237 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
238 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
239 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
240 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
241 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
242 .legalIf([=](const LegalityQuery &Query) {
243 const auto &Ty = Query.Types[0];
244 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
245 })
246 .libcallFor({s128})
247 .minScalarOrElt(0, MinFPScalar)
248 .clampNumElements(0, v4s16, v8s16)
249 .clampNumElements(0, v2s32, v4s32)
250 .clampNumElements(0, v2s64, v2s64)
252
254 .libcallFor({s32, s64})
255 .minScalar(0, s32)
256 .scalarize(0);
257
258 getActionDefinitionsBuilder(G_INTRINSIC_LRINT)
259 // If we don't have full FP16 support, then scalarize the elements of
260 // vectors containing fp16 types.
262 [=, &ST](const LegalityQuery &Query) {
263 const auto &Ty = Query.Types[0];
264 return Ty.isVector() && Ty.getElementType() == s16 &&
265 !ST.hasFullFP16();
266 },
267 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
268 // If we don't have full FP16 support, then widen s16 to s32 if we
269 // encounter it.
270 .widenScalarIf(
271 [=, &ST](const LegalityQuery &Query) {
272 return Query.Types[0] == s16 && !ST.hasFullFP16();
273 },
274 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
275 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
276
278 {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10,
279 G_FEXP, G_FEXP2, G_FEXP10})
280 // We need a call for these, so we always need to scalarize.
281 .scalarize(0)
282 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
283 .minScalar(0, s32)
284 .libcallFor({s32, s64});
285
287 .legalIf(all(typeInSet(0, {s32, s64, p0}),
288 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
290 .clampScalar(0, s32, s64)
292 .minScalar(1, s8)
293 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
294 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
295
297 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
298 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
300 .clampScalar(1, s32, s128)
302 .minScalar(0, s16)
303 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
304 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
305 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
306
307
308 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
309 auto &Actions = getActionDefinitionsBuilder(Op);
310
311 if (Op == G_SEXTLOAD)
313
314 // Atomics have zero extending behavior.
315 Actions
316 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
317 {s32, p0, s16, 8},
318 {s32, p0, s32, 8},
319 {s64, p0, s8, 2},
320 {s64, p0, s16, 2},
321 {s64, p0, s32, 4},
322 {s64, p0, s64, 8},
323 {p0, p0, s64, 8},
324 {v2s32, p0, s64, 8}})
325 .widenScalarToNextPow2(0)
326 .clampScalar(0, s32, s64)
327 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
328 // how to do that yet.
329 .unsupportedIfMemSizeNotPow2()
330 // Lower anything left over into G_*EXT and G_LOAD
331 .lower();
332 }
333
334 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
335 const LLT &ValTy = Query.Types[0];
336 if (!ValTy.isVector())
337 return false;
338 const LLT EltTy = ValTy.getElementType();
339 return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
340 };
341
343 .customIf([=](const LegalityQuery &Query) {
344 return HasRCPC3 && Query.Types[0] == s128 &&
345 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
346 })
347 .customIf([=](const LegalityQuery &Query) {
348 return Query.Types[0] == s128 &&
349 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
350 })
351 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
352 {s16, p0, s16, 8},
353 {s32, p0, s32, 8},
354 {s64, p0, s64, 8},
355 {p0, p0, s64, 8},
356 {s128, p0, s128, 8},
357 {v8s8, p0, s64, 8},
358 {v16s8, p0, s128, 8},
359 {v4s16, p0, s64, 8},
360 {v8s16, p0, s128, 8},
361 {v2s32, p0, s64, 8},
362 {v4s32, p0, s128, 8},
363 {v2s64, p0, s128, 8}})
364 // These extends are also legal
365 .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
366 .widenScalarToNextPow2(0, /* MinSize = */ 8)
368 .clampScalar(0, s8, s64)
370 [=](const LegalityQuery &Query) {
371 // Clamp extending load results to 32-bits.
372 return Query.Types[0].isScalar() &&
373 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
374 Query.Types[0].getSizeInBits() > 32;
375 },
376 changeTo(0, s32))
377 .clampMaxNumElements(0, s8, 16)
378 .clampMaxNumElements(0, s16, 8)
379 .clampMaxNumElements(0, s32, 4)
380 .clampMaxNumElements(0, s64, 2)
381 .clampMaxNumElements(0, p0, 2)
382 .customIf(IsPtrVecPred)
383 .scalarizeIf(typeIs(0, v2s16), 0);
384
386 .customIf([=](const LegalityQuery &Query) {
387 return HasRCPC3 && Query.Types[0] == s128 &&
388 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
389 })
390 .customIf([=](const LegalityQuery &Query) {
391 return Query.Types[0] == s128 &&
392 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
393 })
394 .legalForTypesWithMemDesc(
395 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
396 {s32, p0, s8, 8}, // truncstorei8 from s32
397 {s64, p0, s8, 8}, // truncstorei8 from s64
398 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
399 {s64, p0, s16, 8}, // truncstorei16 from s64
400 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
401 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
402 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
403 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
404 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
405 .clampScalar(0, s8, s64)
406 .lowerIf([=](const LegalityQuery &Query) {
407 return Query.Types[0].isScalar() &&
408 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
409 })
410 // Maximum: sN * k = 128
411 .clampMaxNumElements(0, s8, 16)
412 .clampMaxNumElements(0, s16, 8)
413 .clampMaxNumElements(0, s32, 4)
414 .clampMaxNumElements(0, s64, 2)
415 .clampMaxNumElements(0, p0, 2)
417 .customIf(IsPtrVecPred)
418 .scalarizeIf(typeIs(0, v2s16), 0);
419
420 getActionDefinitionsBuilder(G_INDEXED_STORE)
421 // Idx 0 == Ptr, Idx 1 == Val
422 // TODO: we can implement legalizations but as of now these are
423 // generated in a very specific way.
425 {p0, s8, s8, 8},
426 {p0, s16, s16, 8},
427 {p0, s32, s8, 8},
428 {p0, s32, s16, 8},
429 {p0, s32, s32, 8},
430 {p0, s64, s64, 8},
431 {p0, p0, p0, 8},
432 {p0, v8s8, v8s8, 8},
433 {p0, v16s8, v16s8, 8},
434 {p0, v4s16, v4s16, 8},
435 {p0, v8s16, v8s16, 8},
436 {p0, v2s32, v2s32, 8},
437 {p0, v4s32, v4s32, 8},
438 {p0, v2s64, v2s64, 8},
439 {p0, v2p0, v2p0, 8},
440 {p0, s128, s128, 8},
441 })
442 .unsupported();
443
444 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
445 LLT LdTy = Query.Types[0];
446 LLT PtrTy = Query.Types[1];
447 if (llvm::find(PackedVectorAllTypesVec, LdTy) ==
448 PackedVectorAllTypesVec.end() &&
449 llvm::find(ScalarAndPtrTypesVec, LdTy) == ScalarAndPtrTypesVec.end() &&
450 LdTy != s128)
451 return false;
452 if (PtrTy != p0)
453 return false;
454 return true;
455 };
456 getActionDefinitionsBuilder(G_INDEXED_LOAD)
459 .legalIf(IndexedLoadBasicPred)
460 .unsupported();
461 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
462 .unsupportedIf(
464 .legalIf(all(typeInSet(0, {s16, s32, s64}),
465 LegalityPredicate([=](const LegalityQuery &Q) {
466 LLT LdTy = Q.Types[0];
467 LLT PtrTy = Q.Types[1];
468 LLT MemTy = Q.MMODescrs[0].MemoryTy;
469 if (PtrTy != p0)
470 return false;
471 if (LdTy == s16)
472 return MemTy == s8;
473 if (LdTy == s32)
474 return MemTy == s8 || MemTy == s16;
475 if (LdTy == s64)
476 return MemTy == s8 || MemTy == s16 || MemTy == s32;
477 return false;
478 })))
479 .unsupported();
480
481 // Constants
483 .legalFor({p0, s8, s16, s32, s64})
484 .widenScalarToNextPow2(0)
485 .clampScalar(0, s8, s64);
486 getActionDefinitionsBuilder(G_FCONSTANT)
487 .legalIf([=](const LegalityQuery &Query) {
488 const auto &Ty = Query.Types[0];
489 if (HasFP16 && Ty == s16)
490 return true;
491 return Ty == s32 || Ty == s64 || Ty == s128;
492 })
493 .clampScalar(0, MinFPScalar, s128);
494
496 .legalFor({{s32, s32},
497 {s32, s64},
498 {s32, p0},
499 {v4s32, v4s32},
500 {v2s32, v2s32},
501 {v2s64, v2s64},
502 {v2s64, v2p0},
503 {v4s16, v4s16},
504 {v8s16, v8s16},
505 {v8s8, v8s8},
506 {v16s8, v16s8}})
508 .clampScalar(1, s32, s64)
509 .clampScalar(0, s32, s32)
510 .minScalarEltSameAsIf(
511 [=](const LegalityQuery &Query) {
512 const LLT &Ty = Query.Types[0];
513 const LLT &SrcTy = Query.Types[1];
514 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
515 Ty.getElementType() != SrcTy.getElementType();
516 },
517 0, 1)
518 .minScalarOrEltIf(
519 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
520 1, s32)
521 .minScalarOrEltIf(
522 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
523 s64)
524 .clampNumElements(0, v2s32, v4s32);
525
527 // If we don't have full FP16 support, then scalarize the elements of
528 // vectors containing fp16 types.
530 [=](const LegalityQuery &Query) {
531 const auto &Ty = Query.Types[0];
532 return Ty.isVector() && Ty.getElementType() == s16 && !HasFP16;
533 },
534 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
535 // If we don't have full FP16 support, then widen s16 to s32 if we
536 // encounter it.
537 .widenScalarIf(
538 [=](const LegalityQuery &Query) {
539 return Query.Types[0] == s16 && !HasFP16;
540 },
541 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
542 .legalFor({{s16, s16},
543 {s32, s32},
544 {s32, s64},
545 {v4s32, v4s32},
546 {v2s32, v2s32},
547 {v2s64, v2s64},
548 {v4s16, v4s16},
549 {v8s16, v8s16}})
551 .clampScalar(1, s32, s64)
552 .clampScalar(0, s32, s32)
553 .minScalarEltSameAsIf(
554 [=](const LegalityQuery &Query) {
555 const LLT &Ty = Query.Types[0];
556 const LLT &SrcTy = Query.Types[1];
557 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
558 Ty.getElementType() != SrcTy.getElementType();
559 },
560 0, 1)
561 .clampNumElements(0, v2s32, v4s32)
562 .clampMaxNumElements(1, s64, 2);
563
564 // Extensions
565 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
566 unsigned DstSize = Query.Types[0].getSizeInBits();
567
568 // Handle legal vectors using legalFor
569 if (Query.Types[0].isVector())
570 return false;
571
572 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
573 return false; // Extending to a scalar s128 needs narrowing.
574
575 const LLT &SrcTy = Query.Types[1];
576
577 // Make sure we fit in a register otherwise. Don't bother checking that
578 // the source type is below 128 bits. We shouldn't be allowing anything
579 // through which is wider than the destination in the first place.
580 unsigned SrcSize = SrcTy.getSizeInBits();
581 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
582 return false;
583
584 return true;
585 };
586 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
587 .legalIf(ExtLegalFunc)
588 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
589 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
591 .clampMaxNumElements(1, s8, 8)
592 .clampMaxNumElements(1, s16, 4)
593 .clampMaxNumElements(1, s32, 2)
594 // Tries to convert a large EXTEND into two smaller EXTENDs
595 .lowerIf([=](const LegalityQuery &Query) {
596 return (Query.Types[0].getScalarSizeInBits() >
597 Query.Types[1].getScalarSizeInBits() * 2) &&
598 Query.Types[0].isVector() &&
599 (Query.Types[1].getScalarSizeInBits() == 8 ||
600 Query.Types[1].getScalarSizeInBits() == 16);
601 });
602
604 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
606 .clampMaxNumElements(0, s8, 8)
607 .clampMaxNumElements(0, s16, 4)
608 .clampMaxNumElements(0, s32, 2)
609 .minScalarOrEltIf(
610 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
611 0, s8)
612 .lowerIf([=](const LegalityQuery &Query) {
613 LLT DstTy = Query.Types[0];
614 LLT SrcTy = Query.Types[1];
615 return DstTy.isVector() && (SrcTy.getSizeInBits() > 128 ||
616 (DstTy.getScalarSizeInBits() * 2 <
617 SrcTy.getScalarSizeInBits()));
618 })
619
620 .alwaysLegal();
621
622 getActionDefinitionsBuilder(G_SEXT_INREG)
623 .legalFor({s32, s64})
624 .legalFor(PackedVectorAllTypeList)
625 .maxScalar(0, s64)
626 .lower();
627
628 // FP conversions
630 .legalFor(
631 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
632 .clampNumElements(0, v4s16, v4s16)
633 .clampNumElements(0, v2s32, v2s32)
634 .scalarize(0);
635
637 .legalFor(
638 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
639 .clampNumElements(0, v4s32, v4s32)
640 .clampNumElements(0, v2s64, v2s64)
641 .scalarize(0);
642
643 // Conversions
644 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
645 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
646 .legalIf([=](const LegalityQuery &Query) {
647 return HasFP16 &&
648 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
649 Query.Types[1] == v8s16) &&
650 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
651 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
652 })
653 .widenScalarToNextPow2(0)
654 .clampScalar(0, s32, s64)
656 .clampScalarOrElt(1, MinFPScalar, s64)
659 [=](const LegalityQuery &Query) {
660 return Query.Types[0].getScalarSizeInBits() >
661 Query.Types[1].getScalarSizeInBits();
662 },
664 .widenScalarIf(
665 [=](const LegalityQuery &Query) {
666 return Query.Types[0].getScalarSizeInBits() <
667 Query.Types[1].getScalarSizeInBits();
668 },
670 .clampNumElements(0, v4s16, v8s16)
671 .clampNumElements(0, v2s32, v4s32)
672 .clampMaxNumElements(0, s64, 2);
673
674 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
675 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
676 .legalIf([=](const LegalityQuery &Query) {
677 return HasFP16 &&
678 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
679 Query.Types[0] == v8s16) &&
680 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
681 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
682 })
683 .widenScalarToNextPow2(1)
684 .clampScalar(1, s32, s64)
686 .clampScalarOrElt(0, MinFPScalar, s64)
689 [=](const LegalityQuery &Query) {
690 return Query.Types[0].getScalarSizeInBits() <
691 Query.Types[1].getScalarSizeInBits();
692 },
694 .widenScalarIf(
695 [=](const LegalityQuery &Query) {
696 return Query.Types[0].getScalarSizeInBits() >
697 Query.Types[1].getScalarSizeInBits();
698 },
700 .clampNumElements(0, v4s16, v8s16)
701 .clampNumElements(0, v2s32, v4s32)
702 .clampMaxNumElements(0, s64, 2);
703
704 // Control-flow
706 .legalFor({s32})
707 .clampScalar(0, s32, s32);
708 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
709
711 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
712 .widenScalarToNextPow2(0)
713 .clampScalar(0, s32, s64)
714 .clampScalar(1, s32, s32)
716 .lowerIf(isVector(0));
717
718 // Pointer-handling
719 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
720
721 if (TM.getCodeModel() == CodeModel::Small)
722 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
723 else
724 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
725
727 .legalFor({{s64, p0}, {v2s64, v2p0}})
728 .widenScalarToNextPow2(0, 64)
729 .clampScalar(0, s64, s64);
730
732 .unsupportedIf([&](const LegalityQuery &Query) {
733 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
734 })
735 .legalFor({{p0, s64}, {v2p0, v2s64}});
736
737 // Casts for 32 and 64-bit width type are just copies.
738 // Same for 128-bit width type, except they are on the FPR bank.
740 // FIXME: This is wrong since G_BITCAST is not allowed to change the
741 // number of bits but it's what the previous code described and fixing
742 // it breaks tests.
743 .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
744 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
745 v2p0});
746
747 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
748
749 // va_list must be a pointer, but most sized types are pretty easy to handle
750 // as the destination.
752 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
753 .clampScalar(0, s8, s64)
754 .widenScalarToNextPow2(0, /*Min*/ 8);
755
756 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
757 .lowerIf(
758 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
759
760 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
761 .customIf([](const LegalityQuery &Query) {
762 return Query.Types[0].getSizeInBits() == 128;
763 })
764 .clampScalar(0, s32, s64)
765 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
766
768 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
769 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
770 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
771 .clampScalar(0, s32, s64)
772 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
773
774 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
775
776 // Merge/Unmerge
777 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
778 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
779 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
781 .widenScalarToNextPow2(LitTyIdx, 8)
782 .widenScalarToNextPow2(BigTyIdx, 32)
783 .clampScalar(LitTyIdx, s8, s64)
784 .clampScalar(BigTyIdx, s32, s128)
785 .legalIf([=](const LegalityQuery &Q) {
786 switch (Q.Types[BigTyIdx].getSizeInBits()) {
787 case 32:
788 case 64:
789 case 128:
790 break;
791 default:
792 return false;
793 }
794 switch (Q.Types[LitTyIdx].getSizeInBits()) {
795 case 8:
796 case 16:
797 case 32:
798 case 64:
799 return true;
800 default:
801 return false;
802 }
803 });
804 }
805
806 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
807 .unsupportedIf([=](const LegalityQuery &Query) {
808 const LLT &EltTy = Query.Types[1].getElementType();
809 return Query.Types[0] != EltTy;
810 })
811 .minScalar(2, s64)
812 .customIf([=](const LegalityQuery &Query) {
813 const LLT &VecTy = Query.Types[1];
814 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
815 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
816 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
817 })
818 .minScalarOrEltIf(
819 [=](const LegalityQuery &Query) {
820 // We want to promote to <M x s1> to <M x s64> if that wouldn't
821 // cause the total vec size to be > 128b.
822 return Query.Types[1].getNumElements() <= 2;
823 },
824 0, s64)
825 .minScalarOrEltIf(
826 [=](const LegalityQuery &Query) {
827 return Query.Types[1].getNumElements() <= 4;
828 },
829 0, s32)
830 .minScalarOrEltIf(
831 [=](const LegalityQuery &Query) {
832 return Query.Types[1].getNumElements() <= 8;
833 },
834 0, s16)
835 .minScalarOrEltIf(
836 [=](const LegalityQuery &Query) {
837 return Query.Types[1].getNumElements() <= 16;
838 },
839 0, s8)
840 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
841 .clampMaxNumElements(1, s64, 2)
842 .clampMaxNumElements(1, s32, 4)
843 .clampMaxNumElements(1, s16, 8)
844 .clampMaxNumElements(1, p0, 2);
845
846 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
847 .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}))
849
850 getActionDefinitionsBuilder(G_BUILD_VECTOR)
851 .legalFor({{v8s8, s8},
852 {v16s8, s8},
853 {v4s16, s16},
854 {v8s16, s16},
855 {v2s32, s32},
856 {v4s32, s32},
857 {v2p0, p0},
858 {v2s64, s64}})
859 .clampNumElements(0, v4s32, v4s32)
860 .clampNumElements(0, v2s64, v2s64)
861 .minScalarOrElt(0, s8)
863 .minScalarSameAs(1, 0);
864
865 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
866
869 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
870 .scalarize(1)
871 .widenScalarToNextPow2(1, /*Min=*/32)
872 .clampScalar(1, s32, s64)
873 .scalarSameSizeAs(0, 1);
874 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
875
876 // TODO: Custom lowering for v2s32, v4s32, v2s64.
877 getActionDefinitionsBuilder(G_BITREVERSE)
878 .legalFor({s32, s64, v8s8, v16s8})
879 .widenScalarToNextPow2(0, /*Min = */ 32)
880 .clampScalar(0, s32, s64);
881
882 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
883
885 .lowerIf(isVector(0))
886 .widenScalarToNextPow2(1, /*Min=*/32)
887 .clampScalar(1, s32, s64)
888 .scalarSameSizeAs(0, 1)
889 .legalIf([=](const LegalityQuery &Query) {
890 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
891 })
892 .customIf([=](const LegalityQuery &Query) {
893 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
894 });
895
896 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
897 .legalIf([=](const LegalityQuery &Query) {
898 const LLT &DstTy = Query.Types[0];
899 const LLT &SrcTy = Query.Types[1];
900 // For now just support the TBL2 variant which needs the source vectors
901 // to be the same size as the dest.
902 if (DstTy != SrcTy)
903 return false;
904 return llvm::is_contained(
905 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
906 })
907 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
908 // just want those lowered into G_BUILD_VECTOR
909 .lowerIf([=](const LegalityQuery &Query) {
910 return !Query.Types[1].isVector();
911 })
912 .moreElementsIf(
913 [](const LegalityQuery &Query) {
914 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
915 Query.Types[0].getNumElements() >
916 Query.Types[1].getNumElements();
917 },
918 changeTo(1, 0))
920 .clampNumElements(0, v4s32, v4s32)
921 .clampNumElements(0, v2s64, v2s64)
922 .moreElementsIf(
923 [](const LegalityQuery &Query) {
924 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
925 Query.Types[0].getNumElements() <
926 Query.Types[1].getNumElements();
927 },
928 changeTo(0, 1));
929
930 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
931 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
932
933 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
934
935 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
936
937 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
938
939 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
940
941 if (ST.hasMOPS()) {
942 // G_BZERO is not supported. Currently it is only emitted by
943 // PreLegalizerCombiner for G_MEMSET with zero constant.
945
947 .legalForCartesianProduct({p0}, {s64}, {s64})
948 .customForCartesianProduct({p0}, {s8}, {s64})
949 .immIdx(0); // Inform verifier imm idx 0 is handled.
950
951 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
952 .legalForCartesianProduct({p0}, {p0}, {s64})
953 .immIdx(0); // Inform verifier imm idx 0 is handled.
954
955 // G_MEMCPY_INLINE does not have a tailcall immediate
956 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
957 .legalForCartesianProduct({p0}, {p0}, {s64});
958
959 } else {
960 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
961 .libcall();
962 }
963
964 // FIXME: Legal vector types are only legal with NEON.
965 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
966 if (HasCSSC)
967 ABSActions
968 .legalFor({s32, s64});
969 ABSActions
970 .legalFor(PackedVectorAllTypeList)
971 .lowerIf(isScalar(0));
972
973 // For fadd reductions we have pairwise operations available. We treat the
974 // usual legal types as legal and handle the lowering to pairwise instructions
975 // later.
976 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
977 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
978 .legalIf([=](const LegalityQuery &Query) {
979 const auto &Ty = Query.Types[1];
980 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
981 })
982 .minScalarOrElt(0, MinFPScalar)
983 .clampMaxNumElements(1, s64, 2)
984 .clampMaxNumElements(1, s32, 4)
985 .clampMaxNumElements(1, s16, 8)
986 .lower();
987
988 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
989 .legalFor({{s8, v16s8},
990 {s8, v8s8},
991 {s16, v8s16},
992 {s16, v4s16},
993 {s32, v4s32},
994 {s32, v2s32},
995 {s64, v2s64}})
996 .clampMaxNumElements(1, s64, 2)
997 .clampMaxNumElements(1, s32, 4)
998 .clampMaxNumElements(1, s16, 8)
999 .clampMaxNumElements(1, s8, 16)
1000 .lower();
1001
1002 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1003 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1004 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1005 .legalIf([=](const LegalityQuery &Query) {
1006 const auto &Ty = Query.Types[1];
1007 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1008 })
1009 .minScalarOrElt(0, MinFPScalar)
1010 .clampMaxNumElements(1, s64, 2)
1011 .clampMaxNumElements(1, s32, 4)
1012 .clampMaxNumElements(1, s16, 8)
1013 .lower();
1014
1015 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1016 .clampMaxNumElements(1, s32, 2)
1017 .clampMaxNumElements(1, s16, 4)
1018 .clampMaxNumElements(1, s8, 8)
1019 .scalarize(1)
1020 .lower();
1021
1023 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1024 .legalFor({{s8, v8s8},
1025 {s8, v16s8},
1026 {s16, v4s16},
1027 {s16, v8s16},
1028 {s32, v2s32},
1029 {s32, v4s32}})
1030 .clampMaxNumElements(1, s64, 2)
1031 .clampMaxNumElements(1, s32, 4)
1032 .clampMaxNumElements(1, s16, 8)
1033 .clampMaxNumElements(1, s8, 16)
1034 .scalarize(1)
1035 .lower();
1036
1038 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1039 // Try to break down into smaller vectors as long as they're at least 64
1040 // bits. This lets us use vector operations for some parts of the
1041 // reduction.
1042 .fewerElementsIf(
1043 [=](const LegalityQuery &Q) {
1044 LLT SrcTy = Q.Types[1];
1045 if (SrcTy.isScalar())
1046 return false;
1047 if (!isPowerOf2_32(SrcTy.getNumElements()))
1048 return false;
1049 // We can usually perform 64b vector operations.
1050 return SrcTy.getSizeInBits() > 64;
1051 },
1052 [=](const LegalityQuery &Q) {
1053 LLT SrcTy = Q.Types[1];
1054 return std::make_pair(1, SrcTy.divide(2));
1055 })
1056 .scalarize(1)
1057 .lower();
1058
1059 getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
1060 .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
1061
1062 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1063 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1064 .lower();
1065
1067 .legalFor({{s32, s64}, {s64, s64}})
1068 .customIf([=](const LegalityQuery &Q) {
1069 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1070 })
1071 .lower();
1073
1074 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1075 .customFor({{s32, s32}, {s64, s64}});
1076
1077 auto always = [=](const LegalityQuery &Q) { return true; };
1078 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1079 if (HasCSSC)
1080 CTPOPActions
1081 .legalFor({{s32, s32},
1082 {s64, s64},
1083 {v8s8, v8s8},
1084 {v16s8, v16s8}})
1085 .customFor({{s128, s128},
1086 {v2s64, v2s64},
1087 {v2s32, v2s32},
1088 {v4s32, v4s32},
1089 {v4s16, v4s16},
1090 {v8s16, v8s16}});
1091 else
1092 CTPOPActions
1093 .legalFor({{v8s8, v8s8},
1094 {v16s8, v16s8}})
1095 .customFor({{s32, s32},
1096 {s64, s64},
1097 {s128, s128},
1098 {v2s64, v2s64},
1099 {v2s32, v2s32},
1100 {v4s32, v4s32},
1101 {v4s16, v4s16},
1102 {v8s16, v8s16}});
1103 CTPOPActions
1104 .clampScalar(0, s32, s128)
1105 .widenScalarToNextPow2(0)
1106 .minScalarEltSameAsIf(always, 1, 0)
1107 .maxScalarEltSameAsIf(always, 1, 0);
1108
1109 // TODO: Vector types.
1110 getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
1111
1112 // TODO: Libcall support for s128.
1113 // TODO: s16 should be legal with full FP16 support.
1114 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1115 .legalFor({{s64, s32}, {s64, s64}});
1116
1117 // TODO: Custom legalization for vector types.
1118 // TODO: Custom legalization for mismatched types.
1119 // TODO: s16 support.
1120 getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
1121
1123
1124 // Access to floating-point environment.
1125 getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1126 .libcall();
1127
1128 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1129
1131 verify(*ST.getInstrInfo());
1132}
1133
1135 MachineInstr &MI) const {
1136 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1137 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1138 GISelChangeObserver &Observer = Helper.Observer;
1139 switch (MI.getOpcode()) {
1140 default:
1141 // No idea what to do.
1142 return false;
1143 case TargetOpcode::G_VAARG:
1144 return legalizeVaArg(MI, MRI, MIRBuilder);
1145 case TargetOpcode::G_LOAD:
1146 case TargetOpcode::G_STORE:
1147 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1148 case TargetOpcode::G_SHL:
1149 case TargetOpcode::G_ASHR:
1150 case TargetOpcode::G_LSHR:
1151 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1152 case TargetOpcode::G_GLOBAL_VALUE:
1153 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1154 case TargetOpcode::G_SBFX:
1155 case TargetOpcode::G_UBFX:
1156 return legalizeBitfieldExtract(MI, MRI, Helper);
1157 case TargetOpcode::G_FSHL:
1158 case TargetOpcode::G_FSHR:
1159 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1160 case TargetOpcode::G_ROTR:
1161 return legalizeRotate(MI, MRI, Helper);
1162 case TargetOpcode::G_CTPOP:
1163 return legalizeCTPOP(MI, MRI, Helper);
1164 case TargetOpcode::G_ATOMIC_CMPXCHG:
1165 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1166 case TargetOpcode::G_CTTZ:
1167 return legalizeCTTZ(MI, Helper);
1168 case TargetOpcode::G_BZERO:
1169 case TargetOpcode::G_MEMCPY:
1170 case TargetOpcode::G_MEMMOVE:
1171 case TargetOpcode::G_MEMSET:
1172 return legalizeMemOps(MI, Helper);
1173 case TargetOpcode::G_FCOPYSIGN:
1174 return legalizeFCopySign(MI, Helper);
1175 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1176 return legalizeExtractVectorElt(MI, MRI, Helper);
1177 case TargetOpcode::G_DYN_STACKALLOC:
1178 return legalizeDynStackAlloc(MI, Helper);
1179 }
1180
1181 llvm_unreachable("expected switch to return");
1182}
1183
1184bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1186 MachineIRBuilder &MIRBuilder,
1187 GISelChangeObserver &Observer,
1188 LegalizerHelper &Helper) const {
1189 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1190 MI.getOpcode() == TargetOpcode::G_FSHR);
1191
1192 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1193 // lowering
1194 Register ShiftNo = MI.getOperand(3).getReg();
1195 LLT ShiftTy = MRI.getType(ShiftNo);
1196 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1197
1198 // Adjust shift amount according to Opcode (FSHL/FSHR)
1199 // Convert FSHL to FSHR
1200 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1201 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1202
1203 // Lower non-constant shifts and leave zero shifts to the optimizer.
1204 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1205 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1207
1208 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1209
1210 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1211
1212 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1213 // in the range of 0 <-> BitWidth, it is legal
1214 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1215 VRegAndVal->Value.ult(BitWidth))
1216 return true;
1217
1218 // Cast the ShiftNumber to a 64-bit type
1219 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1220
1221 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1222 Observer.changingInstr(MI);
1223 MI.getOperand(3).setReg(Cast64.getReg(0));
1224 Observer.changedInstr(MI);
1225 }
1226 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1227 // instruction
1228 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1229 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1230 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1231 Cast64.getReg(0)});
1232 MI.eraseFromParent();
1233 }
1234 return true;
1235}
1236
1237bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1239 LegalizerHelper &Helper) const {
1240 // To allow for imported patterns to match, we ensure that the rotate amount
1241 // is 64b with an extension.
1242 Register AmtReg = MI.getOperand(2).getReg();
1243 LLT AmtTy = MRI.getType(AmtReg);
1244 (void)AmtTy;
1245 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1246 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1247 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1248 Helper.Observer.changingInstr(MI);
1249 MI.getOperand(2).setReg(NewAmt.getReg(0));
1250 Helper.Observer.changedInstr(MI);
1251 return true;
1252}
1253
1254bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1256 GISelChangeObserver &Observer) const {
1257 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1258 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1259 // G_ADD_LOW instructions.
1260 // By splitting this here, we can optimize accesses in the small code model by
1261 // folding in the G_ADD_LOW into the load/store offset.
1262 auto &GlobalOp = MI.getOperand(1);
1263 const auto* GV = GlobalOp.getGlobal();
1264 if (GV->isThreadLocal())
1265 return true; // Don't want to modify TLS vars.
1266
1267 auto &TM = ST->getTargetLowering()->getTargetMachine();
1268 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1269
1270 if (OpFlags & AArch64II::MO_GOT)
1271 return true;
1272
1273 auto Offset = GlobalOp.getOffset();
1274 Register DstReg = MI.getOperand(0).getReg();
1275 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1276 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1277 // Set the regclass on the dest reg too.
1278 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1279
1280 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1281 // by creating a MOVK that sets bits 48-63 of the register to (global address
1282 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1283 // prevent an incorrect tag being generated during relocation when the
1284 // global appears before the code section. Without the offset, a global at
1285 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1286 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1287 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1288 // instead of `0xf`.
1289 // This assumes that we're in the small code model so we can assume a binary
1290 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1291 // binary must also be loaded into address range [0, 2^48). Both of these
1292 // properties need to be ensured at runtime when using tagged addresses.
1293 if (OpFlags & AArch64II::MO_TAGGED) {
1294 assert(!Offset &&
1295 "Should not have folded in an offset for a tagged global!");
1296 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1297 .addGlobalAddress(GV, 0x100000000,
1299 .addImm(48);
1300 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1301 }
1302
1303 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1304 .addGlobalAddress(GV, Offset,
1306 MI.eraseFromParent();
1307 return true;
1308}
1309
1311 MachineInstr &MI) const {
1312 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1313 switch (IntrinsicID) {
1314 case Intrinsic::vacopy: {
1315 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1316 unsigned VaListSize =
1317 (ST->isTargetDarwin() || ST->isTargetWindows())
1318 ? PtrSize
1319 : ST->isTargetILP32() ? 20 : 32;
1320
1321 MachineFunction &MF = *MI.getMF();
1323 LLT::scalar(VaListSize * 8));
1324 MachineIRBuilder MIB(MI);
1325 MIB.buildLoad(Val, MI.getOperand(2),
1328 VaListSize, Align(PtrSize)));
1329 MIB.buildStore(Val, MI.getOperand(1),
1332 VaListSize, Align(PtrSize)));
1333 MI.eraseFromParent();
1334 return true;
1335 }
1336 case Intrinsic::get_dynamic_area_offset: {
1337 MachineIRBuilder &MIB = Helper.MIRBuilder;
1338 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1339 MI.eraseFromParent();
1340 return true;
1341 }
1342 case Intrinsic::aarch64_mops_memset_tag: {
1343 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1344 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1345 // the instruction).
1346 MachineIRBuilder MIB(MI);
1347 auto &Value = MI.getOperand(3);
1348 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1349 Value.setReg(ExtValueReg);
1350 return true;
1351 }
1352 case Intrinsic::prefetch: {
1353 MachineIRBuilder MIB(MI);
1354 auto &AddrVal = MI.getOperand(1);
1355
1356 int64_t IsWrite = MI.getOperand(2).getImm();
1357 int64_t Locality = MI.getOperand(3).getImm();
1358 int64_t IsData = MI.getOperand(4).getImm();
1359
1360 bool IsStream = Locality == 0;
1361 if (Locality != 0) {
1362 assert(Locality <= 3 && "Prefetch locality out-of-range");
1363 // The locality degree is the opposite of the cache speed.
1364 // Put the number the other way around.
1365 // The encoding starts at 0 for level 1
1366 Locality = 3 - Locality;
1367 }
1368
1369 unsigned PrfOp =
1370 (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
1371
1372 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1373 MI.eraseFromParent();
1374 return true;
1375 }
1376 case Intrinsic::aarch64_prefetch: {
1377 MachineIRBuilder MIB(MI);
1378 auto &AddrVal = MI.getOperand(1);
1379
1380 int64_t IsWrite = MI.getOperand(2).getImm();
1381 int64_t Target = MI.getOperand(3).getImm();
1382 int64_t IsStream = MI.getOperand(4).getImm();
1383 int64_t IsData = MI.getOperand(5).getImm();
1384
1385 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1386 (!IsData << 3) | // IsDataCache bit
1387 (Target << 1) | // Cache level bits
1388 (unsigned)IsStream; // Stream bit
1389
1390 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1391 MI.eraseFromParent();
1392 return true;
1393 }
1394 case Intrinsic::aarch64_neon_uaddv:
1395 case Intrinsic::aarch64_neon_saddv:
1396 case Intrinsic::aarch64_neon_umaxv:
1397 case Intrinsic::aarch64_neon_smaxv:
1398 case Intrinsic::aarch64_neon_uminv:
1399 case Intrinsic::aarch64_neon_sminv: {
1400 MachineIRBuilder MIB(MI);
1401 MachineRegisterInfo &MRI = *MIB.getMRI();
1402 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1403 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1404 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1405
1406 auto OldDst = MI.getOperand(0).getReg();
1407 auto OldDstTy = MRI.getType(OldDst);
1408 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1409 if (OldDstTy == NewDstTy)
1410 return true;
1411
1412 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1413
1414 Helper.Observer.changingInstr(MI);
1415 MI.getOperand(0).setReg(NewDst);
1416 Helper.Observer.changedInstr(MI);
1417
1418 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1419 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1420 OldDst, NewDst);
1421
1422 return true;
1423 }
1424 case Intrinsic::aarch64_neon_smax:
1425 case Intrinsic::aarch64_neon_smin:
1426 case Intrinsic::aarch64_neon_umax:
1427 case Intrinsic::aarch64_neon_umin:
1428 case Intrinsic::aarch64_neon_fmax:
1429 case Intrinsic::aarch64_neon_fmin: {
1430 MachineIRBuilder MIB(MI);
1431 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1432 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1433 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1434 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1435 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1436 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1437 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1438 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1439 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1440 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1441 {MI.getOperand(2), MI.getOperand(3)});
1442 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1443 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1444 {MI.getOperand(2), MI.getOperand(3)});
1445 MI.eraseFromParent();
1446 return true;
1447 }
1448 case Intrinsic::experimental_vector_reverse:
1449 // TODO: Add support for vector_reverse
1450 return false;
1451 }
1452
1453 return true;
1454}
1455
1456bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1458 GISelChangeObserver &Observer) const {
1459 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1460 MI.getOpcode() == TargetOpcode::G_LSHR ||
1461 MI.getOpcode() == TargetOpcode::G_SHL);
1462 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1463 // imported patterns can select it later. Either way, it will be legal.
1464 Register AmtReg = MI.getOperand(2).getReg();
1465 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1466 if (!VRegAndVal)
1467 return true;
1468 // Check the shift amount is in range for an immediate form.
1469 int64_t Amount = VRegAndVal->Value.getSExtValue();
1470 if (Amount > 31)
1471 return true; // This will have to remain a register variant.
1472 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1473 Observer.changingInstr(MI);
1474 MI.getOperand(2).setReg(ExtCst.getReg(0));
1475 Observer.changedInstr(MI);
1476 return true;
1477}
1478
1481 Base = Root;
1482 Offset = 0;
1483
1484 Register NewBase;
1485 int64_t NewOffset;
1486 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1487 isShiftedInt<7, 3>(NewOffset)) {
1488 Base = NewBase;
1489 Offset = NewOffset;
1490 }
1491}
1492
1493// FIXME: This should be removed and replaced with the generic bitcast legalize
1494// action.
1495bool AArch64LegalizerInfo::legalizeLoadStore(
1497 GISelChangeObserver &Observer) const {
1498 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1499 MI.getOpcode() == TargetOpcode::G_LOAD);
1500 // Here we just try to handle vector loads/stores where our value type might
1501 // have pointer elements, which the SelectionDAG importer can't handle. To
1502 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1503 // the value to use s64 types.
1504
1505 // Custom legalization requires the instruction, if not deleted, must be fully
1506 // legalized. In order to allow further legalization of the inst, we create
1507 // a new instruction and erase the existing one.
1508
1509 Register ValReg = MI.getOperand(0).getReg();
1510 const LLT ValTy = MRI.getType(ValReg);
1511
1512 if (ValTy == LLT::scalar(128)) {
1513
1514 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1515 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1516 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1517 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1518 bool IsRcpC3 =
1519 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1520
1521 LLT s64 = LLT::scalar(64);
1522
1523 unsigned Opcode;
1524 if (IsRcpC3) {
1525 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1526 } else {
1527 // For LSE2, loads/stores should have been converted to monotonic and had
1528 // a fence inserted after them.
1529 assert(Ordering == AtomicOrdering::Monotonic ||
1530 Ordering == AtomicOrdering::Unordered);
1531 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1532
1533 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1534 }
1535
1537 if (IsLoad) {
1538 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1539 MIRBuilder.buildMergeLikeInstr(
1540 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1541 } else {
1542 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1543 NewI = MIRBuilder.buildInstr(
1544 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1545 }
1546
1547 if (IsRcpC3) {
1548 NewI.addUse(MI.getOperand(1).getReg());
1549 } else {
1550 Register Base;
1551 int Offset;
1552 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1553 NewI.addUse(Base);
1554 NewI.addImm(Offset / 8);
1555 }
1556
1557 NewI.cloneMemRefs(MI);
1559 *MRI.getTargetRegisterInfo(),
1560 *ST->getRegBankInfo());
1561 MI.eraseFromParent();
1562 return true;
1563 }
1564
1565 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
1566 ValTy.getElementType().getAddressSpace() != 0) {
1567 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1568 return false;
1569 }
1570
1571 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1572 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1573 auto &MMO = **MI.memoperands_begin();
1574 MMO.setType(NewTy);
1575
1576 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1577 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1578 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1579 } else {
1580 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1581 MIRBuilder.buildBitcast(ValReg, NewLoad);
1582 }
1583 MI.eraseFromParent();
1584 return true;
1585}
1586
1587bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1589 MachineIRBuilder &MIRBuilder) const {
1590 MachineFunction &MF = MIRBuilder.getMF();
1591 Align Alignment(MI.getOperand(2).getImm());
1592 Register Dst = MI.getOperand(0).getReg();
1593 Register ListPtr = MI.getOperand(1).getReg();
1594
1595 LLT PtrTy = MRI.getType(ListPtr);
1596 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1597
1598 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1599 const Align PtrAlign = Align(PtrSize);
1600 auto List = MIRBuilder.buildLoad(
1601 PtrTy, ListPtr,
1603 PtrTy, PtrAlign));
1604
1605 MachineInstrBuilder DstPtr;
1606 if (Alignment > PtrAlign) {
1607 // Realign the list to the actual required alignment.
1608 auto AlignMinus1 =
1609 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1610 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1611 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1612 } else
1613 DstPtr = List;
1614
1615 LLT ValTy = MRI.getType(Dst);
1616 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1617 MIRBuilder.buildLoad(
1618 Dst, DstPtr,
1620 ValTy, std::max(Alignment, PtrAlign)));
1621
1622 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1623
1624 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1625
1626 MIRBuilder.buildStore(NewList, ListPtr,
1629 PtrTy, PtrAlign));
1630
1631 MI.eraseFromParent();
1632 return true;
1633}
1634
1635bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1637 // Only legal if we can select immediate forms.
1638 // TODO: Lower this otherwise.
1639 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1640 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1641}
1642
1643bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1645 LegalizerHelper &Helper) const {
1646 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1647 // it can be more efficiently lowered to the following sequence that uses
1648 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1649 // registers are cheap.
1650 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1651 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1652 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1653 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1654 //
1655 // For 128 bit vector popcounts, we lower to the following sequence:
1656 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1657 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1658 // uaddlp.4s v0, v0 // v4s32, v2s64
1659 // uaddlp.2d v0, v0 // v2s64
1660 //
1661 // For 64 bit vector popcounts, we lower to the following sequence:
1662 // cnt.8b v0, v0 // v4s16, v2s32
1663 // uaddlp.4h v0, v0 // v4s16, v2s32
1664 // uaddlp.2s v0, v0 // v2s32
1665
1666 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1667 Register Dst = MI.getOperand(0).getReg();
1668 Register Val = MI.getOperand(1).getReg();
1669 LLT Ty = MRI.getType(Val);
1670 unsigned Size = Ty.getSizeInBits();
1671
1672 assert(Ty == MRI.getType(Dst) &&
1673 "Expected src and dst to have the same type!");
1674
1675 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1676 LLT s64 = LLT::scalar(64);
1677
1678 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1679 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1680 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1681 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1682
1683 MIRBuilder.buildZExt(Dst, Add);
1684 MI.eraseFromParent();
1685 return true;
1686 }
1687
1688 if (!ST->hasNEON() ||
1689 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1690 // Use generic lowering when custom lowering is not possible.
1691 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1692 Helper.lowerBitCount(MI) ==
1694 }
1695
1696 // Pre-conditioning: widen Val up to the nearest vector type.
1697 // s32,s64,v4s16,v2s32 -> v8i8
1698 // v8s16,v4s32,v2s64 -> v16i8
1699 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1700 if (Ty.isScalar()) {
1701 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1702 if (Size == 32) {
1703 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1704 }
1705 }
1706 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1707
1708 // Count bits in each byte-sized lane.
1709 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1710
1711 // Sum across lanes.
1712 Register HSum = CTPOP.getReg(0);
1713 unsigned Opc;
1714 SmallVector<LLT> HAddTys;
1715 if (Ty.isScalar()) {
1716 Opc = Intrinsic::aarch64_neon_uaddlv;
1717 HAddTys.push_back(LLT::scalar(32));
1718 } else if (Ty == LLT::fixed_vector(8, 16)) {
1719 Opc = Intrinsic::aarch64_neon_uaddlp;
1720 HAddTys.push_back(LLT::fixed_vector(8, 16));
1721 } else if (Ty == LLT::fixed_vector(4, 32)) {
1722 Opc = Intrinsic::aarch64_neon_uaddlp;
1723 HAddTys.push_back(LLT::fixed_vector(8, 16));
1724 HAddTys.push_back(LLT::fixed_vector(4, 32));
1725 } else if (Ty == LLT::fixed_vector(2, 64)) {
1726 Opc = Intrinsic::aarch64_neon_uaddlp;
1727 HAddTys.push_back(LLT::fixed_vector(8, 16));
1728 HAddTys.push_back(LLT::fixed_vector(4, 32));
1729 HAddTys.push_back(LLT::fixed_vector(2, 64));
1730 } else if (Ty == LLT::fixed_vector(4, 16)) {
1731 Opc = Intrinsic::aarch64_neon_uaddlp;
1732 HAddTys.push_back(LLT::fixed_vector(4, 16));
1733 } else if (Ty == LLT::fixed_vector(2, 32)) {
1734 Opc = Intrinsic::aarch64_neon_uaddlp;
1735 HAddTys.push_back(LLT::fixed_vector(4, 16));
1736 HAddTys.push_back(LLT::fixed_vector(2, 32));
1737 } else
1738 llvm_unreachable("unexpected vector shape");
1740 for (LLT HTy : HAddTys) {
1741 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
1742 HSum = UADD.getReg(0);
1743 }
1744
1745 // Post-conditioning.
1746 if (Ty.isScalar() && (Size == 64 || Size == 128))
1747 MIRBuilder.buildZExt(Dst, UADD);
1748 else
1749 UADD->getOperand(0).setReg(Dst);
1750 MI.eraseFromParent();
1751 return true;
1752}
1753
1754bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1756 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1757 LLT s64 = LLT::scalar(64);
1758 auto Addr = MI.getOperand(1).getReg();
1759 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1760 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1761 auto DstLo = MRI.createGenericVirtualRegister(s64);
1762 auto DstHi = MRI.createGenericVirtualRegister(s64);
1763
1765 if (ST->hasLSE()) {
1766 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1767 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1768 // the rest of the MIR so we must reassemble the extracted registers into a
1769 // 128-bit known-regclass one with code like this:
1770 //
1771 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1772 // %out = CASP %in1, ...
1773 // %OldLo = G_EXTRACT %out, 0
1774 // %OldHi = G_EXTRACT %out, 64
1775 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1776 unsigned Opcode;
1777 switch (Ordering) {
1779 Opcode = AArch64::CASPAX;
1780 break;
1782 Opcode = AArch64::CASPLX;
1783 break;
1786 Opcode = AArch64::CASPALX;
1787 break;
1788 default:
1789 Opcode = AArch64::CASPX;
1790 break;
1791 }
1792
1793 LLT s128 = LLT::scalar(128);
1794 auto CASDst = MRI.createGenericVirtualRegister(s128);
1795 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1796 auto CASNew = MRI.createGenericVirtualRegister(s128);
1797 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1798 .addUse(DesiredI->getOperand(0).getReg())
1799 .addImm(AArch64::sube64)
1800 .addUse(DesiredI->getOperand(1).getReg())
1801 .addImm(AArch64::subo64);
1802 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1803 .addUse(NewI->getOperand(0).getReg())
1804 .addImm(AArch64::sube64)
1805 .addUse(NewI->getOperand(1).getReg())
1806 .addImm(AArch64::subo64);
1807
1808 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1809
1810 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1811 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1812 } else {
1813 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1814 // can take arbitrary registers so it just has the normal GPR64 operands the
1815 // rest of AArch64 is expecting.
1816 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1817 unsigned Opcode;
1818 switch (Ordering) {
1820 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1821 break;
1823 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1824 break;
1827 Opcode = AArch64::CMP_SWAP_128;
1828 break;
1829 default:
1830 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1831 break;
1832 }
1833
1834 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1835 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1836 {Addr, DesiredI->getOperand(0),
1837 DesiredI->getOperand(1), NewI->getOperand(0),
1838 NewI->getOperand(1)});
1839 }
1840
1841 CAS.cloneMemRefs(MI);
1843 *MRI.getTargetRegisterInfo(),
1844 *ST->getRegBankInfo());
1845
1846 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
1847 MI.eraseFromParent();
1848 return true;
1849}
1850
1851bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1852 LegalizerHelper &Helper) const {
1853 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1854 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1855 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1856 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1857 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1858 MI.eraseFromParent();
1859 return true;
1860}
1861
1862bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
1863 LegalizerHelper &Helper) const {
1864 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1865
1866 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
1867 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
1868 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1869 // the instruction).
1870 auto &Value = MI.getOperand(1);
1871 Register ExtValueReg =
1872 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1873 Value.setReg(ExtValueReg);
1874 return true;
1875 }
1876
1877 return false;
1878}
1879
1880bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
1881 LegalizerHelper &Helper) const {
1882 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1883 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1884 Register Dst = MI.getOperand(0).getReg();
1885 LLT DstTy = MRI.getType(Dst);
1886 assert(DstTy.isScalar() && "Only expected scalars right now!");
1887 const unsigned DstSize = DstTy.getSizeInBits();
1888 assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
1889 assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
1890 "Expected homogeneous types!");
1891
1892 // We want to materialize a mask with the high bit set.
1893 uint64_t EltMask;
1894 LLT VecTy;
1895
1896 // TODO: s16 support.
1897 switch (DstSize) {
1898 default:
1899 llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
1900 case 64: {
1901 // AdvSIMD immediate moves cannot materialize out mask in a single
1902 // instruction for 64-bit elements. Instead, materialize zero and then
1903 // negate it.
1904 EltMask = 0;
1905 VecTy = LLT::fixed_vector(2, DstTy);
1906 break;
1907 }
1908 case 32:
1909 EltMask = 0x80000000ULL;
1910 VecTy = LLT::fixed_vector(4, DstTy);
1911 break;
1912 }
1913
1914 // Widen In1 and In2 to 128 bits. We want these to eventually become
1915 // INSERT_SUBREGs.
1916 auto Undef = MIRBuilder.buildUndef(VecTy);
1917 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
1918 auto Ins1 = MIRBuilder.buildInsertVectorElement(
1919 VecTy, Undef, MI.getOperand(1).getReg(), Zero);
1920 auto Ins2 = MIRBuilder.buildInsertVectorElement(
1921 VecTy, Undef, MI.getOperand(2).getReg(), Zero);
1922
1923 // Construct the mask.
1924 auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
1925 if (DstSize == 64)
1926 Mask = MIRBuilder.buildFNeg(VecTy, Mask);
1927
1928 auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins2, Ins1});
1929
1930 // Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
1931 // want this to eventually become an EXTRACT_SUBREG.
1932 SmallVector<Register, 2> DstRegs(1, Dst);
1933 for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
1934 DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
1935 MIRBuilder.buildUnmerge(DstRegs, Sel);
1936 MI.eraseFromParent();
1937 return true;
1938}
1939
1940bool AArch64LegalizerInfo::legalizeExtractVectorElt(
1942 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1943 auto VRegAndVal =
1944 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1945 if (VRegAndVal)
1946 return true;
1947 return Helper.lowerExtractInsertVectorElt(MI) !=
1949}
1950
1951bool AArch64LegalizerInfo::legalizeDynStackAlloc(
1952 MachineInstr &MI, LegalizerHelper &Helper) const {
1953 MachineFunction &MF = *MI.getParent()->getParent();
1954 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1955 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1956
1957 // If stack probing is not enabled for this function, use the default
1958 // lowering.
1959 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
1960 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
1961 "inline-asm") {
1962 Helper.lowerDynStackAlloc(MI);
1963 return true;
1964 }
1965
1966 Register Dst = MI.getOperand(0).getReg();
1967 Register AllocSize = MI.getOperand(1).getReg();
1968 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
1969
1970 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
1971 "Unexpected type for dynamic alloca");
1972 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
1973 "Unexpected type for dynamic alloca");
1974
1975 LLT PtrTy = MRI.getType(Dst);
1976 Register SPReg =
1978 Register SPTmp =
1979 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
1980 auto NewMI =
1981 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
1982 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
1983 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
1984 MIRBuilder.buildCopy(Dst, SPTmp);
1985
1986 MI.eraseFromParent();
1987 return true;
1988}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static constexpr uint32_t Opcode
Definition: aarch32.h:200
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1672
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1507
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:318
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:692
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:666
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:257
constexpr bool isScalar() const
Definition: LowLevelType.h:139
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:56
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:149
constexpr bool isVector() const
Definition: LowLevelType.h:145
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:49
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:183
constexpr bool isPointer() const
Definition: LowLevelType.h:141
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:280
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:174
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:92
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:227
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
LegalityPredicate isScalar(unsigned TypeIdx)
True iff the specified type index is a scalar.
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:73
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:152
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1883
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...