LLVM 17.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v8s16 = LLT::fixed_vector(8, 16);
54 const LLT v4s16 = LLT::fixed_vector(4, 16);
55 const LLT v2s16 = LLT::fixed_vector(2, 16);
56 const LLT v2s32 = LLT::fixed_vector(2, 32);
57 const LLT v4s32 = LLT::fixed_vector(4, 32);
58 const LLT v2s64 = LLT::fixed_vector(2, 64);
59 const LLT v2p0 = LLT::fixed_vector(2, p0);
60
61 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
62 v16s8, v8s16, v4s32,
63 v2s64, v2p0,
64 /* End 128bit types */
65 /* Begin 64bit types */
66 v8s8, v4s16, v2s32};
67
68 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
69
70 // FIXME: support subtargets which have neon/fp-armv8 disabled.
71 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
73 return;
74 }
75
76 // Some instructions only support s16 if the subtarget has full 16-bit FP
77 // support.
78 const bool HasFP16 = ST.hasFullFP16();
79 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
80
81 const bool HasCSSC = ST.hasCSSC();
82 const bool HasRCPC3 = ST.hasRCPC3();
83
84 getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
85 .legalFor({p0, s8, s16, s32, s64})
86 .legalFor(PackedVectorAllTypeList)
88 .clampScalar(0, s8, s64)
90 [=](const LegalityQuery &Query) {
91 return Query.Types[0].isVector() &&
92 (Query.Types[0].getElementType() != s64 ||
93 Query.Types[0].getNumElements() != 2);
94 },
95 [=](const LegalityQuery &Query) {
96 LLT EltTy = Query.Types[0].getElementType();
97 if (EltTy == s64)
98 return std::make_pair(0, LLT::fixed_vector(2, 64));
99 return std::make_pair(0, EltTy);
100 });
101
103 .legalFor({p0, s16, s32, s64})
104 .legalFor(PackedVectorAllTypeList)
106 .clampScalar(0, s16, s64)
107 // Maximum: sN * k = 128
108 .clampMaxNumElements(0, s8, 16)
109 .clampMaxNumElements(0, s16, 8)
110 .clampMaxNumElements(0, s32, 4)
111 .clampMaxNumElements(0, s64, 2)
112 .clampMaxNumElements(0, p0, 2);
113
115 .legalFor({s32, s64, v4s32, v2s32, v2s64})
116 .widenScalarToNextPow2(0)
117 .clampScalar(0, s32, s64);
118
119 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
120 .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
121 .scalarizeIf(
122 [=](const LegalityQuery &Query) {
123 return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
124 },
125 0)
126 .legalFor({v2s64})
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
129 .clampMaxNumElements(0, s8, 16)
130 .clampMaxNumElements(0, s16, 8)
131 .clampNumElements(0, v2s32, v4s32)
132 .clampNumElements(0, v2s64, v2s64)
134 [=](const LegalityQuery &Query) {
135 return Query.Types[0].getNumElements() <= 2;
136 },
137 0, s32)
138 .minScalarOrEltIf(
139 [=](const LegalityQuery &Query) {
140 return Query.Types[0].getNumElements() <= 4;
141 },
142 0, s16)
143 .minScalarOrEltIf(
144 [=](const LegalityQuery &Query) {
145 return Query.Types[0].getNumElements() <= 16;
146 },
147 0, s8)
149
150 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
151 .customIf([=](const LegalityQuery &Query) {
152 const auto &SrcTy = Query.Types[0];
153 const auto &AmtTy = Query.Types[1];
154 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
155 AmtTy.getSizeInBits() == 32;
156 })
157 .legalFor({
158 {s32, s32},
159 {s32, s64},
160 {s64, s64},
161 {v8s8, v8s8},
162 {v16s8, v16s8},
163 {v4s16, v4s16},
164 {v8s16, v8s16},
165 {v2s32, v2s32},
166 {v4s32, v4s32},
167 {v2s64, v2s64},
168 })
169 .widenScalarToNextPow2(0)
170 .clampScalar(1, s32, s64)
171 .clampScalar(0, s32, s64)
172 .clampNumElements(0, v2s32, v4s32)
173 .clampNumElements(0, v2s64, v2s64)
175 .minScalarSameAs(1, 0);
176
178 .legalFor({{p0, s64}, {v2p0, v2s64}})
179 .clampScalar(1, s64, s64);
180
181 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
182
183 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
184 .legalFor({s32, s64})
185 .libcallFor({s128})
186 .clampScalar(0, s32, s64)
188 .scalarize(0);
189
190 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
191 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
193 .clampScalarOrElt(0, s32, s64)
194 .clampNumElements(0, v2s32, v4s32)
195 .clampNumElements(0, v2s64, v2s64)
196 .moreElementsToNextPow2(0);
197
198
199 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
200 .widenScalarToNextPow2(0, /*Min = */ 32)
201 .clampScalar(0, s32, s64)
202 .lower();
203
204 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
205 .legalFor({s64, v8s16, v16s8, v4s32})
206 .lower();
207
208 auto &MinMaxActions = getActionDefinitionsBuilder(
209 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
210 if (HasCSSC)
211 MinMaxActions
212 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
213 // Making clamping conditional on CSSC extension as without legal types we
214 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
215 // if we detect a type smaller than 32-bit.
216 .minScalar(0, s32);
217 else
218 MinMaxActions
219 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
220 MinMaxActions
221 .clampNumElements(0, v8s8, v16s8)
222 .clampNumElements(0, v4s16, v8s16)
223 .clampNumElements(0, v2s32, v4s32)
224 // FIXME: This sholdn't be needed as v2s64 types are going to
225 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
226 .clampNumElements(0, v2s64, v2s64)
227 .lower();
228
230 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
231 .legalFor({{s32, s32}, {s64, s32}})
232 .clampScalar(0, s32, s64)
233 .clampScalar(1, s32, s64)
235
236 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
237 .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
238 .clampScalar(0, MinFPScalar, s64)
239 .clampNumElements(0, v2s32, v4s32)
240 .clampNumElements(0, v2s64, v2s64);
241
242 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
243
244 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
245 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
246 G_FNEARBYINT, G_INTRINSIC_LRINT})
247 // If we don't have full FP16 support, then scalarize the elements of
248 // vectors containing fp16 types.
249 .fewerElementsIf(
250 [=, &ST](const LegalityQuery &Query) {
251 const auto &Ty = Query.Types[0];
252 return Ty.isVector() && Ty.getElementType() == s16 &&
253 !ST.hasFullFP16();
254 },
255 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
256 // If we don't have full FP16 support, then widen s16 to s32 if we
257 // encounter it.
258 .widenScalarIf(
259 [=, &ST](const LegalityQuery &Query) {
260 return Query.Types[0] == s16 && !ST.hasFullFP16();
261 },
262 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
263 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
264
266 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
267 // We need a call for these, so we always need to scalarize.
268 .scalarize(0)
269 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
270 .minScalar(0, s32)
271 .libcallFor({s32, s64, v2s32, v4s32, v2s64});
272
274 .legalIf(all(typeInSet(0, {s32, s64, p0}),
275 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
277 .clampScalar(0, s32, s64)
279 .minScalar(1, s8)
280 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
281 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
282
284 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
285 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
287 .clampScalar(1, s32, s128)
289 .minScalar(0, s16)
290 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
291 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
292 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
293
294
295 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
296 auto &Actions = getActionDefinitionsBuilder(Op);
297
298 if (Op == G_SEXTLOAD)
300
301 // Atomics have zero extending behavior.
302 Actions
303 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
304 {s32, p0, s16, 8},
305 {s32, p0, s32, 8},
306 {s64, p0, s8, 2},
307 {s64, p0, s16, 2},
308 {s64, p0, s32, 4},
309 {s64, p0, s64, 8},
310 {p0, p0, s64, 8},
311 {v2s32, p0, s64, 8}})
312 .widenScalarToNextPow2(0)
313 .clampScalar(0, s32, s64)
314 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
315 // how to do that yet.
316 .unsupportedIfMemSizeNotPow2()
317 // Lower anything left over into G_*EXT and G_LOAD
318 .lower();
319 }
320
321 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
322 const LLT &ValTy = Query.Types[0];
323 if (!ValTy.isVector())
324 return false;
325 const LLT EltTy = ValTy.getElementType();
326 return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
327 };
328
330 .customIf([=](const LegalityQuery &Query) {
331 return HasRCPC3 && Query.Types[0] == s128 &&
332 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
333 })
334 .customIf([=](const LegalityQuery &Query) {
335 return Query.Types[0] == s128 &&
336 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
337 })
338 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
339 {s16, p0, s16, 8},
340 {s32, p0, s32, 8},
341 {s64, p0, s64, 8},
342 {p0, p0, s64, 8},
343 {s128, p0, s128, 8},
344 {v8s8, p0, s64, 8},
345 {v16s8, p0, s128, 8},
346 {v4s16, p0, s64, 8},
347 {v8s16, p0, s128, 8},
348 {v2s32, p0, s64, 8},
349 {v4s32, p0, s128, 8},
350 {v2s64, p0, s128, 8}})
351 // These extends are also legal
352 .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
353 .widenScalarToNextPow2(0, /* MinSize = */ 8)
355 .clampScalar(0, s8, s64)
357 [=](const LegalityQuery &Query) {
358 // Clamp extending load results to 32-bits.
359 return Query.Types[0].isScalar() &&
360 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
361 Query.Types[0].getSizeInBits() > 32;
362 },
363 changeTo(0, s32))
364 .clampMaxNumElements(0, s8, 16)
365 .clampMaxNumElements(0, s16, 8)
366 .clampMaxNumElements(0, s32, 4)
367 .clampMaxNumElements(0, s64, 2)
368 .clampMaxNumElements(0, p0, 2)
369 .customIf(IsPtrVecPred)
370 .scalarizeIf(typeIs(0, v2s16), 0);
371
373 .customIf([=](const LegalityQuery &Query) {
374 return HasRCPC3 && Query.Types[0] == s128 &&
375 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
376 })
377 .customIf([=](const LegalityQuery &Query) {
378 return Query.Types[0] == s128 &&
379 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
380 })
381 .legalForTypesWithMemDesc(
382 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
383 {s32, p0, s8, 8}, // truncstorei8 from s32
384 {s64, p0, s8, 8}, // truncstorei8 from s64
385 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
386 {s64, p0, s16, 8}, // truncstorei16 from s64
387 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
388 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
389 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
390 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
391 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
392 .clampScalar(0, s8, s64)
393 .lowerIf([=](const LegalityQuery &Query) {
394 return Query.Types[0].isScalar() &&
395 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
396 })
397 // Maximum: sN * k = 128
398 .clampMaxNumElements(0, s8, 16)
399 .clampMaxNumElements(0, s16, 8)
400 .clampMaxNumElements(0, s32, 4)
401 .clampMaxNumElements(0, s64, 2)
402 .clampMaxNumElements(0, p0, 2)
404 .customIf(IsPtrVecPred)
405 .scalarizeIf(typeIs(0, v2s16), 0);
406
407 // Constants
409 .legalFor({p0, s8, s16, s32, s64})
410 .widenScalarToNextPow2(0)
411 .clampScalar(0, s8, s64);
412 getActionDefinitionsBuilder(G_FCONSTANT)
413 .legalIf([=](const LegalityQuery &Query) {
414 const auto &Ty = Query.Types[0];
415 if (HasFP16 && Ty == s16)
416 return true;
417 return Ty == s32 || Ty == s64 || Ty == s128;
418 })
419 .clampScalar(0, MinFPScalar, s128);
420
421 getActionDefinitionsBuilder({G_ICMP, G_FCMP})
422 .legalFor({{s32, s32},
423 {s32, s64},
424 {s32, p0},
425 {v4s32, v4s32},
426 {v2s32, v2s32},
427 {v2s64, v2s64},
428 {v2s64, v2p0},
429 {v4s16, v4s16},
430 {v8s16, v8s16},
431 {v8s8, v8s8},
432 {v16s8, v16s8}})
434 .clampScalar(1, s32, s64)
435 .clampScalar(0, s32, s32)
436 .minScalarEltSameAsIf(
437 [=](const LegalityQuery &Query) {
438 const LLT &Ty = Query.Types[0];
439 const LLT &SrcTy = Query.Types[1];
440 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
441 Ty.getElementType() != SrcTy.getElementType();
442 },
443 0, 1)
444 .minScalarOrEltIf(
445 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
446 1, s32)
447 .minScalarOrEltIf(
448 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
449 s64)
450 .clampNumElements(0, v2s32, v4s32);
451
452 // Extensions
453 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
454 unsigned DstSize = Query.Types[0].getSizeInBits();
455
456 if (DstSize == 128 && !Query.Types[0].isVector())
457 return false; // Extending to a scalar s128 needs narrowing.
458
459 // Make sure that we have something that will fit in a register, and
460 // make sure it's a power of 2.
461 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
462 return false;
463
464 const LLT &SrcTy = Query.Types[1];
465
466 // Make sure we fit in a register otherwise. Don't bother checking that
467 // the source type is below 128 bits. We shouldn't be allowing anything
468 // through which is wider than the destination in the first place.
469 unsigned SrcSize = SrcTy.getSizeInBits();
470 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
471 return false;
472
473 return true;
474 };
475 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
476 .legalIf(ExtLegalFunc)
477 .clampScalar(0, s64, s64); // Just for s128, others are handled above.
478
481 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
482 0, s8)
483 .customIf([=](const LegalityQuery &Query) {
484 LLT DstTy = Query.Types[0];
485 LLT SrcTy = Query.Types[1];
486 return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
487 })
488 .alwaysLegal();
489
490 getActionDefinitionsBuilder(G_SEXT_INREG)
491 .legalFor({s32, s64})
492 .legalFor(PackedVectorAllTypeList)
493 .lower();
494
495 // FP conversions
497 .legalFor(
498 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
499 .clampMaxNumElements(0, s32, 2);
501 .legalFor(
502 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
503 .clampMaxNumElements(0, s64, 2);
504
505 // Conversions
506 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
507 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
508 .widenScalarToNextPow2(0)
509 .clampScalar(0, s32, s64)
511 .clampScalar(1, s32, s64);
512
513 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
514 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
515 .clampScalar(1, s32, s64)
516 .minScalarSameAs(1, 0)
517 .clampScalar(0, s32, s64)
519
520 // Control-flow
522 .legalFor({s32})
523 .clampScalar(0, s32, s32);
524 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
525
527 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
528 .widenScalarToNextPow2(0)
529 .clampScalar(0, s32, s64)
530 .clampScalar(1, s32, s32)
532 .lowerIf(isVector(0));
533
534 // Pointer-handling
535 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
536
537 if (TM.getCodeModel() == CodeModel::Small)
538 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
539 else
540 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
541
543 .legalFor({{s64, p0}, {v2s64, v2p0}})
544 .widenScalarToNextPow2(0, 64)
545 .clampScalar(0, s64, s64);
546
548 .unsupportedIf([&](const LegalityQuery &Query) {
549 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
550 })
551 .legalFor({{p0, s64}, {v2p0, v2s64}});
552
553 // Casts for 32 and 64-bit width type are just copies.
554 // Same for 128-bit width type, except they are on the FPR bank.
556 // FIXME: This is wrong since G_BITCAST is not allowed to change the
557 // number of bits but it's what the previous code described and fixing
558 // it breaks tests.
559 .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
560 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
561 v2p0});
562
563 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
564
565 // va_list must be a pointer, but most sized types are pretty easy to handle
566 // as the destination.
568 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
569 .clampScalar(0, s8, s64)
570 .widenScalarToNextPow2(0, /*Min*/ 8);
571
572 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
573 .lowerIf(
574 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
575
576 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
577 .customIf([](const LegalityQuery &Query) {
578 return Query.Types[0].getSizeInBits() == 128;
579 })
580 .clampScalar(0, s32, s64)
581 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
582
584 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
585 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
586 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
587 .clampScalar(0, s32, s64)
588 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
589
590 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
591
592 // Merge/Unmerge
593 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
594 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
595 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
597 .widenScalarToNextPow2(LitTyIdx, 8)
598 .widenScalarToNextPow2(BigTyIdx, 32)
599 .clampScalar(LitTyIdx, s8, s64)
600 .clampScalar(BigTyIdx, s32, s128)
601 .legalIf([=](const LegalityQuery &Q) {
602 switch (Q.Types[BigTyIdx].getSizeInBits()) {
603 case 32:
604 case 64:
605 case 128:
606 break;
607 default:
608 return false;
609 }
610 switch (Q.Types[LitTyIdx].getSizeInBits()) {
611 case 8:
612 case 16:
613 case 32:
614 case 64:
615 return true;
616 default:
617 return false;
618 }
619 });
620 }
621
622 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
623 .unsupportedIf([=](const LegalityQuery &Query) {
624 const LLT &EltTy = Query.Types[1].getElementType();
625 return Query.Types[0] != EltTy;
626 })
627 .minScalar(2, s64)
628 .legalIf([=](const LegalityQuery &Query) {
629 const LLT &VecTy = Query.Types[1];
630 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
631 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
632 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
633 VecTy == v2p0;
634 })
635 .minScalarOrEltIf(
636 [=](const LegalityQuery &Query) {
637 // We want to promote to <M x s1> to <M x s64> if that wouldn't
638 // cause the total vec size to be > 128b.
639 return Query.Types[1].getNumElements() <= 2;
640 },
641 0, s64)
642 .minScalarOrEltIf(
643 [=](const LegalityQuery &Query) {
644 return Query.Types[1].getNumElements() <= 4;
645 },
646 0, s32)
647 .minScalarOrEltIf(
648 [=](const LegalityQuery &Query) {
649 return Query.Types[1].getNumElements() <= 8;
650 },
651 0, s16)
652 .minScalarOrEltIf(
653 [=](const LegalityQuery &Query) {
654 return Query.Types[1].getNumElements() <= 16;
655 },
656 0, s8)
657 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
658 .clampMaxNumElements(1, s64, 2)
659 .clampMaxNumElements(1, s32, 4)
660 .clampMaxNumElements(1, s16, 8)
661 .clampMaxNumElements(1, p0, 2);
662
663 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
664 .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
665
666 getActionDefinitionsBuilder(G_BUILD_VECTOR)
667 .legalFor({{v8s8, s8},
668 {v16s8, s8},
669 {v2s16, s16},
670 {v4s16, s16},
671 {v8s16, s16},
672 {v2s32, s32},
673 {v4s32, s32},
674 {v2p0, p0},
675 {v2s64, s64}})
676 .clampNumElements(0, v4s32, v4s32)
677 .clampNumElements(0, v2s64, v2s64)
678 .minScalarOrElt(0, s8)
679 .minScalarSameAs(1, 0);
680
681 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
682
685 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
686 .scalarize(1);
687 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
688
689 // TODO: Custom lowering for v2s32, v4s32, v2s64.
690 getActionDefinitionsBuilder(G_BITREVERSE)
691 .legalFor({s32, s64, v8s8, v16s8})
692 .widenScalarToNextPow2(0, /*Min = */ 32)
693 .clampScalar(0, s32, s64);
694
695 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
696
698 .lowerIf(isVector(0))
699 .clampScalar(0, s32, s64)
700 .scalarSameSizeAs(1, 0)
701 .legalIf([=](const LegalityQuery &Query) {
702 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
703 })
704 .customIf([=](const LegalityQuery &Query) {
705 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
706 });
707
708 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
709 .legalIf([=](const LegalityQuery &Query) {
710 const LLT &DstTy = Query.Types[0];
711 const LLT &SrcTy = Query.Types[1];
712 // For now just support the TBL2 variant which needs the source vectors
713 // to be the same size as the dest.
714 if (DstTy != SrcTy)
715 return false;
716 return llvm::is_contained({v2s32, v4s32, v2s64, v2p0, v16s8, v8s16},
717 DstTy);
718 })
719 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
720 // just want those lowered into G_BUILD_VECTOR
721 .lowerIf([=](const LegalityQuery &Query) {
722 return !Query.Types[1].isVector();
723 })
724 .moreElementsIf(
725 [](const LegalityQuery &Query) {
726 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
727 Query.Types[0].getNumElements() >
728 Query.Types[1].getNumElements();
729 },
730 changeTo(1, 0))
732 .clampNumElements(0, v4s32, v4s32)
733 .clampNumElements(0, v2s64, v2s64)
734 .moreElementsIf(
735 [](const LegalityQuery &Query) {
736 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
737 Query.Types[0].getNumElements() <
738 Query.Types[1].getNumElements();
739 },
740 changeTo(0, 1));
741
742 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
743 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
744
745 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
746
747 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
748 return Query.Types[0] == p0 && Query.Types[1] == s64;
749 });
750
751 getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
752
753 if (ST.hasMOPS()) {
754 // G_BZERO is not supported. Currently it is only emitted by
755 // PreLegalizerCombiner for G_MEMSET with zero constant.
757
759 .legalForCartesianProduct({p0}, {s64}, {s64})
760 .customForCartesianProduct({p0}, {s8}, {s64})
761 .immIdx(0); // Inform verifier imm idx 0 is handled.
762
763 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
764 .legalForCartesianProduct({p0}, {p0}, {s64})
765 .immIdx(0); // Inform verifier imm idx 0 is handled.
766
767 // G_MEMCPY_INLINE does not have a tailcall immediate
768 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
769 .legalForCartesianProduct({p0}, {p0}, {s64});
770
771 } else {
772 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
773 .libcall();
774 }
775
776 // FIXME: Legal vector types are only legal with NEON.
777 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
778 if (HasCSSC)
779 ABSActions
780 .legalFor({s32, s64});
781 ABSActions
782 .legalFor(PackedVectorAllTypeList)
783 .lowerIf(isScalar(0));
784
785 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
786 // We only have FADDP to do reduction-like operations. Lower the rest.
787 .legalFor({{s32, v2s32}, {s64, v2s64}})
788 .clampMaxNumElements(1, s64, 2)
789 .clampMaxNumElements(1, s32, 2)
790 .lower();
791
792 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
793 .legalFor(
794 {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
795 .clampMaxNumElements(1, s64, 2)
796 .clampMaxNumElements(1, s32, 4)
797 .lower();
798
800 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
801 // Try to break down into smaller vectors as long as they're at least 64
802 // bits. This lets us use vector operations for some parts of the
803 // reduction.
804 .fewerElementsIf(
805 [=](const LegalityQuery &Q) {
806 LLT SrcTy = Q.Types[1];
807 if (SrcTy.isScalar())
808 return false;
810 return false;
811 // We can usually perform 64b vector operations.
812 return SrcTy.getSizeInBits() > 64;
813 },
814 [=](const LegalityQuery &Q) {
815 LLT SrcTy = Q.Types[1];
816 return std::make_pair(1, SrcTy.divide(2));
817 })
818 .scalarize(1)
819 .lower();
820
821 getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
822 .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
823
824 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
825
827 .legalFor({{s32, s64}, {s64, s64}})
828 .customIf([=](const LegalityQuery &Q) {
829 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
830 })
831 .lower();
833
834 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
835 .customFor({{s32, s32}, {s64, s64}});
836
837 auto always = [=](const LegalityQuery &Q) { return true; };
838 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
839 if (HasCSSC)
840 CTPOPActions
841 .legalFor({{s32, s32},
842 {s64, s64},
843 {v8s8, v8s8},
844 {v16s8, v16s8}})
845 .customFor({{s128, s128},
846 {v2s64, v2s64},
847 {v2s32, v2s32},
848 {v4s32, v4s32},
849 {v4s16, v4s16},
850 {v8s16, v8s16}});
851 else
852 CTPOPActions
853 .legalFor({{v8s8, v8s8},
854 {v16s8, v16s8}})
855 .customFor({{s32, s32},
856 {s64, s64},
857 {s128, s128},
858 {v2s64, v2s64},
859 {v2s32, v2s32},
860 {v4s32, v4s32},
861 {v4s16, v4s16},
862 {v8s16, v8s16}});
863 CTPOPActions
864 .clampScalar(0, s32, s128)
865 .widenScalarToNextPow2(0)
866 .minScalarEltSameAsIf(always, 1, 0)
867 .maxScalarEltSameAsIf(always, 1, 0);
868
869 // TODO: Vector types.
870 getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
871
872 // TODO: Vector types.
873 getActionDefinitionsBuilder({G_FMAXNUM, G_FMINNUM})
874 .legalFor({MinFPScalar, s32, s64})
875 .libcallFor({s128})
876 .minScalar(0, MinFPScalar);
877
878 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
879 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
880 .legalIf([=](const LegalityQuery &Query) {
881 const auto &Ty = Query.Types[0];
882 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
883 })
884 .minScalar(0, MinFPScalar)
885 .clampNumElements(0, v4s16, v8s16)
886 .clampNumElements(0, v2s32, v4s32)
887 .clampNumElements(0, v2s64, v2s64);
888
889 // TODO: Libcall support for s128.
890 // TODO: s16 should be legal with full FP16 support.
891 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
892 .legalFor({{s64, s32}, {s64, s64}});
893
894 // TODO: Custom legalization for vector types.
895 // TODO: Custom legalization for mismatched types.
896 // TODO: s16 support.
897 getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
898
900
902 verify(*ST.getInstrInfo());
903}
904
906 MachineInstr &MI) const {
907 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
908 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
909 GISelChangeObserver &Observer = Helper.Observer;
910 switch (MI.getOpcode()) {
911 default:
912 // No idea what to do.
913 return false;
914 case TargetOpcode::G_VAARG:
915 return legalizeVaArg(MI, MRI, MIRBuilder);
916 case TargetOpcode::G_LOAD:
917 case TargetOpcode::G_STORE:
918 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
919 case TargetOpcode::G_SHL:
920 case TargetOpcode::G_ASHR:
921 case TargetOpcode::G_LSHR:
922 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
923 case TargetOpcode::G_GLOBAL_VALUE:
924 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
925 case TargetOpcode::G_TRUNC:
926 return legalizeVectorTrunc(MI, Helper);
927 case TargetOpcode::G_SBFX:
928 case TargetOpcode::G_UBFX:
929 return legalizeBitfieldExtract(MI, MRI, Helper);
930 case TargetOpcode::G_ROTR:
931 return legalizeRotate(MI, MRI, Helper);
932 case TargetOpcode::G_CTPOP:
933 return legalizeCTPOP(MI, MRI, Helper);
934 case TargetOpcode::G_ATOMIC_CMPXCHG:
935 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
936 case TargetOpcode::G_CTTZ:
937 return legalizeCTTZ(MI, Helper);
938 case TargetOpcode::G_BZERO:
939 case TargetOpcode::G_MEMCPY:
940 case TargetOpcode::G_MEMMOVE:
941 case TargetOpcode::G_MEMSET:
942 return legalizeMemOps(MI, Helper);
943 case TargetOpcode::G_FCOPYSIGN:
944 return legalizeFCopySign(MI, Helper);
945 }
946
947 llvm_unreachable("expected switch to return");
948}
949
950bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
952 LegalizerHelper &Helper) const {
953 // To allow for imported patterns to match, we ensure that the rotate amount
954 // is 64b with an extension.
955 Register AmtReg = MI.getOperand(2).getReg();
956 LLT AmtTy = MRI.getType(AmtReg);
957 (void)AmtTy;
958 assert(AmtTy.isScalar() && "Expected a scalar rotate");
959 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
960 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
961 Helper.Observer.changingInstr(MI);
962 MI.getOperand(2).setReg(NewAmt.getReg(0));
963 Helper.Observer.changedInstr(MI);
964 return true;
965}
966
968 MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
970 for (int I = 0; I < NumParts; ++I)
971 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
972 MIRBuilder.buildUnmerge(VRegs, Reg);
973}
974
975bool AArch64LegalizerInfo::legalizeVectorTrunc(
976 MachineInstr &MI, LegalizerHelper &Helper) const {
977 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
978 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
979 // Similar to how operand splitting is done in SelectiondDAG, we can handle
980 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
981 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
982 // %lo16(<4 x s16>) = G_TRUNC %inlo
983 // %hi16(<4 x s16>) = G_TRUNC %inhi
984 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
985 // %res(<8 x s8>) = G_TRUNC %in16
986
987 Register DstReg = MI.getOperand(0).getReg();
988 Register SrcReg = MI.getOperand(1).getReg();
989 LLT DstTy = MRI.getType(DstReg);
990 LLT SrcTy = MRI.getType(SrcReg);
991 assert(llvm::has_single_bit<uint32_t>(DstTy.getSizeInBits()) &&
992 llvm::has_single_bit<uint32_t>(SrcTy.getSizeInBits()));
993
994 // Split input type.
995 LLT SplitSrcTy =
997 // First, split the source into two smaller vectors.
998 SmallVector<Register, 2> SplitSrcs;
999 extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
1000
1001 // Truncate the splits into intermediate narrower elements.
1002 LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
1003 for (unsigned I = 0; I < SplitSrcs.size(); ++I)
1004 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
1005
1006 auto Concat = MIRBuilder.buildConcatVectors(
1007 DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
1008
1009 Helper.Observer.changingInstr(MI);
1010 MI.getOperand(1).setReg(Concat.getReg(0));
1011 Helper.Observer.changedInstr(MI);
1012 return true;
1013}
1014
1015bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1017 GISelChangeObserver &Observer) const {
1018 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1019 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1020 // G_ADD_LOW instructions.
1021 // By splitting this here, we can optimize accesses in the small code model by
1022 // folding in the G_ADD_LOW into the load/store offset.
1023 auto &GlobalOp = MI.getOperand(1);
1024 const auto* GV = GlobalOp.getGlobal();
1025 if (GV->isThreadLocal())
1026 return true; // Don't want to modify TLS vars.
1027
1028 auto &TM = ST->getTargetLowering()->getTargetMachine();
1029 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1030
1031 if (OpFlags & AArch64II::MO_GOT)
1032 return true;
1033
1034 auto Offset = GlobalOp.getOffset();
1035 Register DstReg = MI.getOperand(0).getReg();
1036 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1037 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1038 // Set the regclass on the dest reg too.
1039 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1040
1041 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1042 // by creating a MOVK that sets bits 48-63 of the register to (global address
1043 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1044 // prevent an incorrect tag being generated during relocation when the the
1045 // global appears before the code section. Without the offset, a global at
1046 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1047 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1048 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1049 // instead of `0xf`.
1050 // This assumes that we're in the small code model so we can assume a binary
1051 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1052 // binary must also be loaded into address range [0, 2^48). Both of these
1053 // properties need to be ensured at runtime when using tagged addresses.
1054 if (OpFlags & AArch64II::MO_TAGGED) {
1055 assert(!Offset &&
1056 "Should not have folded in an offset for a tagged global!");
1057 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1058 .addGlobalAddress(GV, 0x100000000,
1060 .addImm(48);
1061 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1062 }
1063
1064 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1065 .addGlobalAddress(GV, Offset,
1067 MI.eraseFromParent();
1068 return true;
1069}
1070
1072 MachineInstr &MI) const {
1073 switch (MI.getIntrinsicID()) {
1074 case Intrinsic::vacopy: {
1075 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1076 unsigned VaListSize =
1077 (ST->isTargetDarwin() || ST->isTargetWindows())
1078 ? PtrSize
1079 : ST->isTargetILP32() ? 20 : 32;
1080
1081 MachineFunction &MF = *MI.getMF();
1083 LLT::scalar(VaListSize * 8));
1084 MachineIRBuilder MIB(MI);
1085 MIB.buildLoad(Val, MI.getOperand(2),
1088 VaListSize, Align(PtrSize)));
1089 MIB.buildStore(Val, MI.getOperand(1),
1092 VaListSize, Align(PtrSize)));
1093 MI.eraseFromParent();
1094 return true;
1095 }
1096 case Intrinsic::get_dynamic_area_offset: {
1097 MachineIRBuilder &MIB = Helper.MIRBuilder;
1098 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1099 MI.eraseFromParent();
1100 return true;
1101 }
1102 case Intrinsic::aarch64_mops_memset_tag: {
1103 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1104 // Zext the value to 64 bit
1105 MachineIRBuilder MIB(MI);
1106 auto &Value = MI.getOperand(3);
1107 Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1108 Value.setReg(ZExtValueReg);
1109 return true;
1110 }
1111 case Intrinsic::prefetch: {
1112 MachineIRBuilder MIB(MI);
1113 auto &AddrVal = MI.getOperand(1);
1114
1115 int64_t IsWrite = MI.getOperand(2).getImm();
1116 int64_t Locality = MI.getOperand(3).getImm();
1117 int64_t IsData = MI.getOperand(4).getImm();
1118
1119 bool IsStream = Locality == 0;
1120 if (Locality != 0) {
1121 assert(Locality <= 3 && "Prefetch locality out-of-range");
1122 // The locality degree is the opposite of the cache speed.
1123 // Put the number the other way around.
1124 // The encoding starts at 0 for level 1
1125 Locality = 3 - Locality;
1126 }
1127
1128 unsigned PrfOp =
1129 (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
1130
1131 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1132 MI.eraseFromParent();
1133 return true;
1134 }
1135 case Intrinsic::aarch64_prefetch: {
1136 MachineIRBuilder MIB(MI);
1137 auto &AddrVal = MI.getOperand(1);
1138
1139 int64_t IsWrite = MI.getOperand(2).getImm();
1140 int64_t Target = MI.getOperand(3).getImm();
1141 int64_t IsStream = MI.getOperand(4).getImm();
1142 int64_t IsData = MI.getOperand(5).getImm();
1143
1144 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1145 (!IsData << 3) | // IsDataCache bit
1146 (Target << 1) | // Cache level bits
1147 (unsigned)IsStream; // Stream bit
1148
1149 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1150 MI.eraseFromParent();
1151 return true;
1152 }
1153 }
1154
1155 return true;
1156}
1157
1158bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1160 GISelChangeObserver &Observer) const {
1161 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1162 MI.getOpcode() == TargetOpcode::G_LSHR ||
1163 MI.getOpcode() == TargetOpcode::G_SHL);
1164 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1165 // imported patterns can select it later. Either way, it will be legal.
1166 Register AmtReg = MI.getOperand(2).getReg();
1167 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1168 if (!VRegAndVal)
1169 return true;
1170 // Check the shift amount is in range for an immediate form.
1171 int64_t Amount = VRegAndVal->Value.getSExtValue();
1172 if (Amount > 31)
1173 return true; // This will have to remain a register variant.
1174 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1175 Observer.changingInstr(MI);
1176 MI.getOperand(2).setReg(ExtCst.getReg(0));
1177 Observer.changedInstr(MI);
1178 return true;
1179}
1180
1183 Base = Root;
1184 Offset = 0;
1185
1186 Register NewBase;
1187 int64_t NewOffset;
1188 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1189 isShiftedInt<7, 3>(NewOffset)) {
1190 Base = NewBase;
1191 Offset = NewOffset;
1192 }
1193}
1194
1195// FIXME: This should be removed and replaced with the generic bitcast legalize
1196// action.
1197bool AArch64LegalizerInfo::legalizeLoadStore(
1199 GISelChangeObserver &Observer) const {
1200 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1201 MI.getOpcode() == TargetOpcode::G_LOAD);
1202 // Here we just try to handle vector loads/stores where our value type might
1203 // have pointer elements, which the SelectionDAG importer can't handle. To
1204 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1205 // the value to use s64 types.
1206
1207 // Custom legalization requires the instruction, if not deleted, must be fully
1208 // legalized. In order to allow further legalization of the inst, we create
1209 // a new instruction and erase the existing one.
1210
1211 Register ValReg = MI.getOperand(0).getReg();
1212 const LLT ValTy = MRI.getType(ValReg);
1213
1214 if (ValTy == LLT::scalar(128)) {
1215
1216 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1217 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1218 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1219 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1220 bool IsRcpC3 =
1221 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1222
1223 LLT s64 = LLT::scalar(64);
1224
1225 unsigned Opcode;
1226 if (IsRcpC3) {
1227 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1228 } else {
1229 // For LSE2, loads/stores should have been converted to monotonic and had
1230 // a fence inserted after them.
1231 assert(Ordering == AtomicOrdering::Monotonic ||
1232 Ordering == AtomicOrdering::Unordered);
1233 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1234
1235 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1236 }
1237
1239 if (IsLoad) {
1240 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1241 MIRBuilder.buildMergeLikeInstr(
1242 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1243 } else {
1244 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1245 NewI = MIRBuilder.buildInstr(
1246 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1247 }
1248
1249 if (IsRcpC3) {
1250 NewI.addUse(MI.getOperand(1).getReg());
1251 } else {
1252 Register Base;
1253 int Offset;
1254 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1255 NewI.addUse(Base);
1256 NewI.addImm(Offset / 8);
1257 }
1258
1259 NewI.cloneMemRefs(MI);
1261 *MRI.getTargetRegisterInfo(),
1262 *ST->getRegBankInfo());
1263 MI.eraseFromParent();
1264 return true;
1265 }
1266
1267 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
1268 ValTy.getElementType().getAddressSpace() != 0) {
1269 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1270 return false;
1271 }
1272
1273 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1274 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1275 auto &MMO = **MI.memoperands_begin();
1276 MMO.setType(NewTy);
1277
1278 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1279 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1280 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1281 } else {
1282 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1283 MIRBuilder.buildBitcast(ValReg, NewLoad);
1284 }
1285 MI.eraseFromParent();
1286 return true;
1287}
1288
1289bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1291 MachineIRBuilder &MIRBuilder) const {
1292 MachineFunction &MF = MIRBuilder.getMF();
1293 Align Alignment(MI.getOperand(2).getImm());
1294 Register Dst = MI.getOperand(0).getReg();
1295 Register ListPtr = MI.getOperand(1).getReg();
1296
1297 LLT PtrTy = MRI.getType(ListPtr);
1298 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1299
1300 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1301 const Align PtrAlign = Align(PtrSize);
1302 auto List = MIRBuilder.buildLoad(
1303 PtrTy, ListPtr,
1305 PtrTy, PtrAlign));
1306
1307 MachineInstrBuilder DstPtr;
1308 if (Alignment > PtrAlign) {
1309 // Realign the list to the actual required alignment.
1310 auto AlignMinus1 =
1311 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1312 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1313 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1314 } else
1315 DstPtr = List;
1316
1317 LLT ValTy = MRI.getType(Dst);
1318 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1319 MIRBuilder.buildLoad(
1320 Dst, DstPtr,
1322 ValTy, std::max(Alignment, PtrAlign)));
1323
1324 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1325
1326 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1327
1328 MIRBuilder.buildStore(NewList, ListPtr,
1331 PtrTy, PtrAlign));
1332
1333 MI.eraseFromParent();
1334 return true;
1335}
1336
1337bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1339 // Only legal if we can select immediate forms.
1340 // TODO: Lower this otherwise.
1341 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1342 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1343}
1344
1345bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1347 LegalizerHelper &Helper) const {
1348 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1349 // it can be more efficiently lowered to the following sequence that uses
1350 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1351 // registers are cheap.
1352 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1353 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1354 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1355 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1356 //
1357 // For 128 bit vector popcounts, we lower to the following sequence:
1358 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1359 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1360 // uaddlp.4s v0, v0 // v4s32, v2s64
1361 // uaddlp.2d v0, v0 // v2s64
1362 //
1363 // For 64 bit vector popcounts, we lower to the following sequence:
1364 // cnt.8b v0, v0 // v4s16, v2s32
1365 // uaddlp.4h v0, v0 // v4s16, v2s32
1366 // uaddlp.2s v0, v0 // v2s32
1367
1368 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1369 Register Dst = MI.getOperand(0).getReg();
1370 Register Val = MI.getOperand(1).getReg();
1371 LLT Ty = MRI.getType(Val);
1372 unsigned Size = Ty.getSizeInBits();
1373
1374 assert(Ty == MRI.getType(Dst) &&
1375 "Expected src and dst to have the same type!");
1376
1377 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1378 LLT s64 = LLT::scalar(64);
1379
1380 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1381 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1382 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1383 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1384
1385 MIRBuilder.buildZExt(Dst, Add);
1386 MI.eraseFromParent();
1387 return true;
1388 }
1389
1390 if (!ST->hasNEON() ||
1391 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1392 // Use generic lowering when custom lowering is not possible.
1393 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1394 Helper.lowerBitCount(MI) ==
1396 }
1397
1398 // Pre-conditioning: widen Val up to the nearest vector type.
1399 // s32,s64,v4s16,v2s32 -> v8i8
1400 // v8s16,v4s32,v2s64 -> v16i8
1401 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1402 if (Ty.isScalar()) {
1403 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1404 if (Size == 32) {
1405 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1406 }
1407 }
1408 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1409
1410 // Count bits in each byte-sized lane.
1411 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1412
1413 // Sum across lanes.
1414 Register HSum = CTPOP.getReg(0);
1415 unsigned Opc;
1416 SmallVector<LLT> HAddTys;
1417 if (Ty.isScalar()) {
1418 Opc = Intrinsic::aarch64_neon_uaddlv;
1419 HAddTys.push_back(LLT::scalar(32));
1420 } else if (Ty == LLT::fixed_vector(8, 16)) {
1421 Opc = Intrinsic::aarch64_neon_uaddlp;
1422 HAddTys.push_back(LLT::fixed_vector(8, 16));
1423 } else if (Ty == LLT::fixed_vector(4, 32)) {
1424 Opc = Intrinsic::aarch64_neon_uaddlp;
1425 HAddTys.push_back(LLT::fixed_vector(8, 16));
1426 HAddTys.push_back(LLT::fixed_vector(4, 32));
1427 } else if (Ty == LLT::fixed_vector(2, 64)) {
1428 Opc = Intrinsic::aarch64_neon_uaddlp;
1429 HAddTys.push_back(LLT::fixed_vector(8, 16));
1430 HAddTys.push_back(LLT::fixed_vector(4, 32));
1431 HAddTys.push_back(LLT::fixed_vector(2, 64));
1432 } else if (Ty == LLT::fixed_vector(4, 16)) {
1433 Opc = Intrinsic::aarch64_neon_uaddlp;
1434 HAddTys.push_back(LLT::fixed_vector(4, 16));
1435 } else if (Ty == LLT::fixed_vector(2, 32)) {
1436 Opc = Intrinsic::aarch64_neon_uaddlp;
1437 HAddTys.push_back(LLT::fixed_vector(4, 16));
1438 HAddTys.push_back(LLT::fixed_vector(2, 32));
1439 } else
1440 llvm_unreachable("unexpected vector shape");
1442 for (LLT HTy : HAddTys) {
1443 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
1444 .addUse(HSum);
1445 HSum = UADD.getReg(0);
1446 }
1447
1448 // Post-conditioning.
1449 if (Ty.isScalar() && (Size == 64 || Size == 128))
1450 MIRBuilder.buildZExt(Dst, UADD);
1451 else
1452 UADD->getOperand(0).setReg(Dst);
1453 MI.eraseFromParent();
1454 return true;
1455}
1456
1457bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1459 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1460 LLT s64 = LLT::scalar(64);
1461 auto Addr = MI.getOperand(1).getReg();
1462 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1463 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1464 auto DstLo = MRI.createGenericVirtualRegister(s64);
1465 auto DstHi = MRI.createGenericVirtualRegister(s64);
1466
1468 if (ST->hasLSE()) {
1469 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1470 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1471 // the rest of the MIR so we must reassemble the extracted registers into a
1472 // 128-bit known-regclass one with code like this:
1473 //
1474 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1475 // %out = CASP %in1, ...
1476 // %OldLo = G_EXTRACT %out, 0
1477 // %OldHi = G_EXTRACT %out, 64
1478 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1479 unsigned Opcode;
1480 switch (Ordering) {
1482 Opcode = AArch64::CASPAX;
1483 break;
1485 Opcode = AArch64::CASPLX;
1486 break;
1489 Opcode = AArch64::CASPALX;
1490 break;
1491 default:
1492 Opcode = AArch64::CASPX;
1493 break;
1494 }
1495
1496 LLT s128 = LLT::scalar(128);
1497 auto CASDst = MRI.createGenericVirtualRegister(s128);
1498 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1499 auto CASNew = MRI.createGenericVirtualRegister(s128);
1500 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1501 .addUse(DesiredI->getOperand(0).getReg())
1502 .addImm(AArch64::sube64)
1503 .addUse(DesiredI->getOperand(1).getReg())
1504 .addImm(AArch64::subo64);
1505 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1506 .addUse(NewI->getOperand(0).getReg())
1507 .addImm(AArch64::sube64)
1508 .addUse(NewI->getOperand(1).getReg())
1509 .addImm(AArch64::subo64);
1510
1511 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1512
1513 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1514 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1515 } else {
1516 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1517 // can take arbitrary registers so it just has the normal GPR64 operands the
1518 // rest of AArch64 is expecting.
1519 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1520 unsigned Opcode;
1521 switch (Ordering) {
1523 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1524 break;
1526 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1527 break;
1530 Opcode = AArch64::CMP_SWAP_128;
1531 break;
1532 default:
1533 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1534 break;
1535 }
1536
1537 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1538 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1539 {Addr, DesiredI->getOperand(0),
1540 DesiredI->getOperand(1), NewI->getOperand(0),
1541 NewI->getOperand(1)});
1542 }
1543
1544 CAS.cloneMemRefs(MI);
1546 *MRI.getTargetRegisterInfo(),
1547 *ST->getRegBankInfo());
1548
1549 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
1550 MI.eraseFromParent();
1551 return true;
1552}
1553
1554bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1555 LegalizerHelper &Helper) const {
1556 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1557 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1558 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1559 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1560 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1561 MI.eraseFromParent();
1562 return true;
1563}
1564
1565bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
1566 LegalizerHelper &Helper) const {
1567 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1568
1569 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
1570 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
1571 // Zext the value operand to 64 bit
1572 auto &Value = MI.getOperand(1);
1573 Register ZExtValueReg =
1574 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1575 Value.setReg(ZExtValueReg);
1576 return true;
1577 }
1578
1579 return false;
1580}
1581
1582bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
1583 LegalizerHelper &Helper) const {
1584 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1585 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1586 Register Dst = MI.getOperand(0).getReg();
1587 LLT DstTy = MRI.getType(Dst);
1588 assert(DstTy.isScalar() && "Only expected scalars right now!");
1589 const unsigned DstSize = DstTy.getSizeInBits();
1590 assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
1591 assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
1592 "Expected homogeneous types!");
1593
1594 // We want to materialize a mask with the high bit set.
1595 uint64_t EltMask;
1596 LLT VecTy;
1597
1598 // TODO: s16 support.
1599 switch (DstSize) {
1600 default:
1601 llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
1602 case 64: {
1603 // AdvSIMD immediate moves cannot materialize out mask in a single
1604 // instruction for 64-bit elements. Instead, materialize zero and then
1605 // negate it.
1606 EltMask = 0;
1607 VecTy = LLT::fixed_vector(2, DstTy);
1608 break;
1609 }
1610 case 32:
1611 EltMask = 0x80000000ULL;
1612 VecTy = LLT::fixed_vector(4, DstTy);
1613 break;
1614 }
1615
1616 // Widen In1 and In2 to 128 bits. We want these to eventually become
1617 // INSERT_SUBREGs.
1618 auto Undef = MIRBuilder.buildUndef(VecTy);
1619 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
1620 auto Ins1 = MIRBuilder.buildInsertVectorElement(
1621 VecTy, Undef, MI.getOperand(1).getReg(), Zero);
1622 auto Ins2 = MIRBuilder.buildInsertVectorElement(
1623 VecTy, Undef, MI.getOperand(2).getReg(), Zero);
1624
1625 // Construct the mask.
1626 auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
1627 if (DstSize == 64)
1628 Mask = MIRBuilder.buildFNeg(VecTy, Mask);
1629
1630 auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
1631
1632 // Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
1633 // want this to eventually become an EXTRACT_SUBREG.
1634 SmallVector<Register, 2> DstRegs(1, Dst);
1635 for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
1636 DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
1637 MIRBuilder.buildUnmerge(DstRegs, Sel);
1638 MI.eraseFromParent();
1639 return true;
1640}
unsigned const MachineRegisterInfo * MRI
static void extractParts(Register Reg, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs)
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LegalizeResult lowerBitCount(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
const TargetMachine & getTargetMachine() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:234
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
LegalityPredicate isScalar(unsigned TypeIdx)
True iff the specified type index is a scalar.
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:73
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:152
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:409
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...