LLVM 23.0.0git
AArch64RegisterBankInfo.cpp
Go to the documentation of this file.
1//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the RegisterBankInfo class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64RegisterInfo.h"
16#include "AArch64Subtarget.h"
18#include "llvm/ADT/STLExtras.h"
33#include "llvm/IR/IntrinsicsAArch64.h"
36#include <cassert>
37
38#define GET_TARGET_REGBANK_IMPL
39#include "AArch64GenRegisterBank.inc"
40
41// This file will be TableGen'ed at some point.
42#include "AArch64GenRegisterBankInfo.def"
43
44using namespace llvm;
45static const unsigned CustomMappingID = 1;
46
48 const TargetRegisterInfo &TRI) {
49 static llvm::once_flag InitializeRegisterBankFlag;
50
51 static auto InitializeRegisterBankOnce = [&]() {
52 // We have only one set of register banks, whatever the subtarget
53 // is. Therefore, the initialization of the RegBanks table should be
54 // done only once. Indeed the table of all register banks
55 // (AArch64::RegBanks) is unique in the compiler. At some point, it
56 // will get tablegen'ed and the whole constructor becomes empty.
57
58 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
59 (void)RBGPR;
60 assert(&AArch64::GPRRegBank == &RBGPR &&
61 "The order in RegBanks is messed up");
62
63 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
64 (void)RBFPR;
65 assert(&AArch64::FPRRegBank == &RBFPR &&
66 "The order in RegBanks is messed up");
67
68 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
69 (void)RBCCR;
70 assert(&AArch64::CCRegBank == &RBCCR &&
71 "The order in RegBanks is messed up");
72
73 // The GPR register bank is fully defined by all the registers in
74 // GR64all + its subclasses.
75 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
76 "Subclass not added?");
77 assert(getMaximumSize(RBGPR.getID()) == 128 &&
78 "GPRs should hold up to 128-bit");
79
80 // The FPR register bank is fully defined by all the registers in
81 // GR64all + its subclasses.
82 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
83 "Subclass not added?");
84 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
85 "Subclass not added?");
86 assert(getMaximumSize(RBFPR.getID()) == 512 &&
87 "FPRs should hold up to 512-bit via QQQQ sequence");
88
89 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
90 "Class not added?");
91 assert(getMaximumSize(RBCCR.getID()) == 32 &&
92 "CCR should hold up to 32-bit");
93
94 // Check that the TableGen'ed like file is in sync we our expectations.
95 // First, the Idx.
98 "PartialMappingIdx's are incorrectly ordered");
102 "PartialMappingIdx's are incorrectly ordered");
103// Now, the content.
104// Check partial mapping.
105#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
106 do { \
107 assert( \
108 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
109 #Idx " is incorrectly initialized"); \
110 } while (false)
111
112 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
113 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
114 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
115 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
116 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
117 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
118 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
119 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
120 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
121
122// Check value mapping.
123#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
124 do { \
125 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
126 PartialMappingIdx::PMI_First##RBName, Size, \
127 Offset) && \
128 #RBName #Size " " #Offset " is incorrectly initialized"); \
129 } while (false)
130
131#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
132
133 CHECK_VALUEMAP(GPR, 32);
134 CHECK_VALUEMAP(GPR, 64);
135 CHECK_VALUEMAP(GPR, 128);
136 CHECK_VALUEMAP(FPR, 16);
137 CHECK_VALUEMAP(FPR, 32);
138 CHECK_VALUEMAP(FPR, 64);
139 CHECK_VALUEMAP(FPR, 128);
140 CHECK_VALUEMAP(FPR, 256);
141 CHECK_VALUEMAP(FPR, 512);
142
143// Check the value mapping for 3-operands instructions where all the operands
144// map to the same value mapping.
145#define CHECK_VALUEMAP_3OPS(RBName, Size) \
146 do { \
147 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
148 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
149 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
150 } while (false)
151
152 CHECK_VALUEMAP_3OPS(GPR, 32);
153 CHECK_VALUEMAP_3OPS(GPR, 64);
154 CHECK_VALUEMAP_3OPS(GPR, 128);
160
161#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
162 do { \
163 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
164 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
165 (void)PartialMapDstIdx; \
166 (void)PartialMapSrcIdx; \
167 const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \
168 AArch64::RBNameSrc##RegBankID, \
169 TypeSize::getFixed(Size)); \
170 (void)Map; \
171 assert(Map[0].BreakDown == \
172 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
173 Map[0].NumBreakDowns == 1 && \
174 #RBNameDst #Size " Dst is incorrectly initialized"); \
175 assert(Map[1].BreakDown == \
176 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
177 Map[1].NumBreakDowns == 1 && \
178 #RBNameSrc #Size " Src is incorrectly initialized"); \
179 \
180 } while (false)
181
182 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
184 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
190
191#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
192 do { \
193 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
194 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
195 (void)PartialMapDstIdx; \
196 (void)PartialMapSrcIdx; \
197 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
198 (void)Map; \
199 assert(Map[0].BreakDown == \
200 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
201 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
202 " Dst is incorrectly initialized"); \
203 assert(Map[1].BreakDown == \
204 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
205 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
206 " Src is incorrectly initialized"); \
207 \
208 } while (false)
209
210 CHECK_VALUEMAP_FPEXT(32, 16);
211 CHECK_VALUEMAP_FPEXT(64, 16);
212 CHECK_VALUEMAP_FPEXT(64, 32);
213 CHECK_VALUEMAP_FPEXT(128, 64);
214
215 assert(verify(TRI) && "Invalid register bank information");
216 };
217
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
219}
220
222 const RegisterBank &B,
223 const TypeSize Size) const {
224 // What do we do with different size?
225 // copy are same size.
226 // Will introduce other hooks for different size:
227 // * extract cost.
228 // * build_sequence cost.
229
230 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
231 // FIXME: This should be deduced from the scheduling model.
232 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
233 // FMOVXDr or FMOVWSr.
234 return 5;
235 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
236 // FMOVDXr or FMOVSWr.
237 return 4;
238
240}
241
242const RegisterBank &
244 LLT Ty) const {
245 switch (RC.getID()) {
246 case AArch64::GPR64sponlyRegClassID:
247 return getRegBank(AArch64::GPRRegBankID);
248 default:
250 }
251}
252
255 const MachineInstr &MI) const {
256 const MachineFunction &MF = *MI.getParent()->getParent();
257 const TargetSubtargetInfo &STI = MF.getSubtarget();
258 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
259 const MachineRegisterInfo &MRI = MF.getRegInfo();
260
261 switch (MI.getOpcode()) {
262 case TargetOpcode::G_OR: {
263 // 32 and 64-bit or can be mapped on either FPR or
264 // GPR for the same cost.
265 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
266 if (Size != 32 && Size != 64)
267 break;
268
269 // If the instruction has any implicit-defs or uses,
270 // do not mess with it.
271 if (MI.getNumOperands() != 3)
272 break;
273 InstructionMappings AltMappings;
274 const InstructionMapping &GPRMapping = getInstructionMapping(
275 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
276 /*NumOperands*/ 3);
277 const InstructionMapping &FPRMapping = getInstructionMapping(
278 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
279 /*NumOperands*/ 3);
280
281 AltMappings.push_back(&GPRMapping);
282 AltMappings.push_back(&FPRMapping);
283 return AltMappings;
284 }
285 case TargetOpcode::G_BITCAST: {
286 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
287 if (Size != 32 && Size != 64)
288 break;
289
290 // If the instruction has any implicit-defs or uses,
291 // do not mess with it.
292 if (MI.getNumOperands() != 2)
293 break;
294
295 InstructionMappings AltMappings;
296 const InstructionMapping &GPRMapping = getInstructionMapping(
297 /*ID*/ 1, /*Cost*/ 1,
298 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
299 /*NumOperands*/ 2);
300 const InstructionMapping &FPRMapping = getInstructionMapping(
301 /*ID*/ 2, /*Cost*/ 1,
302 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
303 /*NumOperands*/ 2);
304 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
305 /*ID*/ 3,
306 /*Cost*/
307 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
309 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
310 /*NumOperands*/ 2);
311 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
312 /*ID*/ 3,
313 /*Cost*/
314 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
316 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
317 /*NumOperands*/ 2);
318
319 AltMappings.push_back(&GPRMapping);
320 AltMappings.push_back(&FPRMapping);
321 AltMappings.push_back(&GPRToFPRMapping);
322 AltMappings.push_back(&FPRToGPRMapping);
323 return AltMappings;
324 }
325 case TargetOpcode::G_LOAD: {
326 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
327 if (Size != 64)
328 break;
329
330 // If the instruction has any implicit-defs or uses,
331 // do not mess with it.
332 if (MI.getNumOperands() != 2)
333 break;
334
335 InstructionMappings AltMappings;
336 const InstructionMapping &GPRMapping = getInstructionMapping(
337 /*ID*/ 1, /*Cost*/ 1,
340 // Addresses are GPR 64-bit.
342 /*NumOperands*/ 2);
343 const InstructionMapping &FPRMapping = getInstructionMapping(
344 /*ID*/ 2, /*Cost*/ 1,
347 // Addresses are GPR 64-bit.
349 /*NumOperands*/ 2);
350
351 AltMappings.push_back(&GPRMapping);
352 AltMappings.push_back(&FPRMapping);
353 return AltMappings;
354 }
355 default:
356 break;
357 }
359}
360
361void AArch64RegisterBankInfo::applyMappingImpl(
362 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
363 MachineInstr &MI = OpdMapper.getMI();
364 MachineRegisterInfo &MRI = OpdMapper.getMRI();
365
366 switch (MI.getOpcode()) {
367 case TargetOpcode::G_STORE: {
368 Register Dst = MI.getOperand(0).getReg();
369 LLT Ty = MRI.getType(Dst);
370 if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
371 Ty.getSizeInBits() < 32) {
372 Builder.setInsertPt(*MI.getParent(), MI.getIterator());
373 auto Ext = Builder.buildAnyExt(LLT::scalar(32), Dst);
374 MI.getOperand(0).setReg(Ext.getReg(0));
375 MRI.setRegBank(Ext.getReg(0), AArch64::GPRRegBank);
376 }
377 return applyDefaultMapping(OpdMapper);
378 }
379 case TargetOpcode::G_LOAD: {
380 Register Dst = MI.getOperand(0).getReg();
381 LLT Ty = MRI.getType(Dst);
382 if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
383 Ty.getSizeInBits() < 32) {
384 Builder.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
385 Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
386 Builder.buildTrunc(Dst, ExtReg);
387 MI.getOperand(0).setReg(ExtReg);
388 MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
389 }
390 [[fallthrough]];
391 }
392 case TargetOpcode::G_OR:
393 case TargetOpcode::G_BITCAST:
394 // Those ID must match getInstrAlternativeMappings.
395 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
396 OpdMapper.getInstrMapping().getID() <= 4) &&
397 "Don't know how to handle that ID");
398 return applyDefaultMapping(OpdMapper);
399 case TargetOpcode::G_INSERT_VECTOR_ELT: {
400 // Extend smaller gpr operands to 32 bit.
401 Builder.setInsertPt(*MI.getParent(), MI.getIterator());
402 auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg());
403 MRI.setRegBank(Ext.getReg(0), getRegBank(AArch64::GPRRegBankID));
404 MI.getOperand(2).setReg(Ext.getReg(0));
405 return applyDefaultMapping(OpdMapper);
406 }
407 case AArch64::G_DUP: {
408 // Extend smaller gpr to 32-bits
409 assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
410 "Expected sources smaller than 32-bits");
411 Builder.setInsertPt(*MI.getParent(), MI.getIterator());
412
413 Register ConstReg;
414 auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
415 if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
416 auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
417 ConstReg =
418 Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
419 } else {
420 ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
421 .getReg(0);
422 }
423 MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
424 MI.getOperand(1).setReg(ConstReg);
425 return applyDefaultMapping(OpdMapper);
426 }
427 default:
428 llvm_unreachable("Don't know how to handle that operation");
429 }
430}
431
433AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
434 const MachineInstr &MI) const {
435 const unsigned Opc = MI.getOpcode();
436 const MachineFunction &MF = *MI.getParent()->getParent();
437 const MachineRegisterInfo &MRI = MF.getRegInfo();
438
439 unsigned NumOperands = MI.getNumOperands();
440 assert(NumOperands <= 3 &&
441 "This code is for instructions with 3 or less operands");
442
443 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
444 TypeSize Size = Ty.getSizeInBits();
446
448
449#ifndef NDEBUG
450 // Make sure all the operands are using similar size and type.
451 // Should probably be checked by the machine verifier.
452 // This code won't catch cases where the number of lanes is
453 // different between the operands.
454 // If we want to go to that level of details, it is probably
455 // best to check that the types are the same, period.
456 // Currently, we just check that the register banks are the same
457 // for each types.
458 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
459 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
460 assert(
462 RBIdx, OpTy.getSizeInBits()) ==
464 "Operand has incompatible size");
465 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
466 (void)OpIsFPR;
467 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
468 }
469#endif // End NDEBUG.
470
472 getValueMapping(RBIdx, Size), NumOperands);
473}
474
475/// \returns true if a given intrinsic only uses and defines FPRs.
477 const MachineInstr &MI) {
478 // TODO: Add more intrinsics.
480 default:
481 return false;
482 case Intrinsic::aarch64_neon_uaddlv:
483 case Intrinsic::aarch64_neon_uaddv:
484 case Intrinsic::aarch64_neon_saddv:
485 case Intrinsic::aarch64_neon_umaxv:
486 case Intrinsic::aarch64_neon_smaxv:
487 case Intrinsic::aarch64_neon_uminv:
488 case Intrinsic::aarch64_neon_sminv:
489 case Intrinsic::aarch64_neon_faddv:
490 case Intrinsic::aarch64_neon_fmaxv:
491 case Intrinsic::aarch64_neon_fminv:
492 case Intrinsic::aarch64_neon_fmaxnmv:
493 case Intrinsic::aarch64_neon_fminnmv:
494 case Intrinsic::aarch64_neon_fmulx:
495 case Intrinsic::aarch64_neon_frecpe:
496 case Intrinsic::aarch64_neon_frecps:
497 case Intrinsic::aarch64_neon_frecpx:
498 case Intrinsic::aarch64_neon_frsqrte:
499 case Intrinsic::aarch64_neon_frsqrts:
500 case Intrinsic::aarch64_neon_facge:
501 case Intrinsic::aarch64_neon_facgt:
502 case Intrinsic::aarch64_neon_fabd:
503 case Intrinsic::aarch64_neon_sqrdmlah:
504 case Intrinsic::aarch64_neon_sqrdmlsh:
505 case Intrinsic::aarch64_neon_sqrdmulh:
506 case Intrinsic::aarch64_neon_sqadd:
507 case Intrinsic::aarch64_neon_sqsub:
508 case Intrinsic::aarch64_neon_srshl:
509 case Intrinsic::aarch64_neon_urshl:
510 case Intrinsic::aarch64_neon_sqshl:
511 case Intrinsic::aarch64_neon_uqshl:
512 case Intrinsic::aarch64_neon_sqrshl:
513 case Intrinsic::aarch64_neon_uqrshl:
514 case Intrinsic::aarch64_neon_ushl:
515 case Intrinsic::aarch64_neon_sshl:
516 case Intrinsic::aarch64_neon_sqshrn:
517 case Intrinsic::aarch64_neon_sqshrun:
518 case Intrinsic::aarch64_neon_sqrshrn:
519 case Intrinsic::aarch64_neon_sqrshrun:
520 case Intrinsic::aarch64_neon_uqshrn:
521 case Intrinsic::aarch64_neon_uqrshrn:
522 case Intrinsic::aarch64_crypto_sha1h:
523 case Intrinsic::aarch64_crypto_sha1c:
524 case Intrinsic::aarch64_crypto_sha1p:
525 case Intrinsic::aarch64_crypto_sha1m:
526 case Intrinsic::aarch64_sisd_fcvtxn:
527 case Intrinsic::aarch64_sisd_fabd:
528 return true;
529 case Intrinsic::aarch64_neon_saddlv: {
530 const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
531 return SrcTy.getElementType().getSizeInBits() >= 16 &&
532 SrcTy.getElementCount().getFixedValue() >= 4;
533 }
534 }
535}
536
537bool AArch64RegisterBankInfo::isPHIWithFPConstraints(
538 const MachineInstr &MI, const MachineRegisterInfo &MRI,
539 const AArch64RegisterInfo &TRI, const unsigned Depth) const {
540 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
541 return false;
542
543 return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
544 [&](const MachineInstr &UseMI) {
545 if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
546 return true;
547 return isPHIWithFPConstraints(UseMI, MRI, TRI, Depth + 1);
548 });
549}
550
551bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
554 unsigned Depth) const {
555 unsigned Op = MI.getOpcode();
556 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
557 return true;
558
559 // Do we have an explicit floating point instruction?
561 return true;
562
563 // No. Check if we have a copy-like instruction. If we do, then we could
564 // still be fed by floating point instructions.
565 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
567 return false;
568
569 // Check if we already know the register bank.
570 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
571 if (RB == &AArch64::FPRRegBank)
572 return true;
573 if (RB == &AArch64::GPRRegBank)
574 return false;
575
576 // We don't know anything.
577 //
578 // If we have a phi, we may be able to infer that it will be assigned a FPR
579 // based off of its inputs.
580 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
581 return false;
582
583 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
584 return Op.isReg() &&
585 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
586 });
587}
588
589bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
592 unsigned Depth) const {
593 switch (MI.getOpcode()) {
594 case TargetOpcode::G_FPTOSI:
595 case TargetOpcode::G_FPTOUI:
596 case TargetOpcode::G_FPTOSI_SAT:
597 case TargetOpcode::G_FPTOUI_SAT:
598 case TargetOpcode::G_FCMP:
599 case TargetOpcode::G_LROUND:
600 case TargetOpcode::G_LLROUND:
601 case AArch64::G_PMULL:
602 case AArch64::G_SLI:
603 case AArch64::G_SRI:
604 return true;
605 case TargetOpcode::G_INTRINSIC:
607 case Intrinsic::aarch64_neon_fcvtas:
608 case Intrinsic::aarch64_neon_fcvtau:
609 case Intrinsic::aarch64_neon_fcvtzs:
610 case Intrinsic::aarch64_neon_fcvtzu:
611 case Intrinsic::aarch64_neon_fcvtms:
612 case Intrinsic::aarch64_neon_fcvtmu:
613 case Intrinsic::aarch64_neon_fcvtns:
614 case Intrinsic::aarch64_neon_fcvtnu:
615 case Intrinsic::aarch64_neon_fcvtps:
616 case Intrinsic::aarch64_neon_fcvtpu:
617 return true;
618 default:
619 break;
620 }
621 break;
622 default:
623 break;
624 }
625 return hasFPConstraints(MI, MRI, TRI, Depth);
626}
627
628bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
631 unsigned Depth) const {
632 switch (MI.getOpcode()) {
633 case AArch64::G_DUP:
634 case AArch64::G_SADDLP:
635 case AArch64::G_UADDLP:
636 case TargetOpcode::G_SITOFP:
637 case TargetOpcode::G_UITOFP:
638 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
639 case TargetOpcode::G_INSERT_VECTOR_ELT:
640 case TargetOpcode::G_BUILD_VECTOR:
641 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
642 case AArch64::G_SLI:
643 case AArch64::G_SRI:
644 return true;
645 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
647 case Intrinsic::aarch64_neon_ld1x2:
648 case Intrinsic::aarch64_neon_ld1x3:
649 case Intrinsic::aarch64_neon_ld1x4:
650 case Intrinsic::aarch64_neon_ld2:
651 case Intrinsic::aarch64_neon_ld2lane:
652 case Intrinsic::aarch64_neon_ld2r:
653 case Intrinsic::aarch64_neon_ld3:
654 case Intrinsic::aarch64_neon_ld3lane:
655 case Intrinsic::aarch64_neon_ld3r:
656 case Intrinsic::aarch64_neon_ld4:
657 case Intrinsic::aarch64_neon_ld4lane:
658 case Intrinsic::aarch64_neon_ld4r:
659 return true;
660 default:
661 break;
662 }
663 break;
664 default:
665 break;
666 }
667 return hasFPConstraints(MI, MRI, TRI, Depth);
668}
669
670bool AArch64RegisterBankInfo::prefersFPUse(const MachineInstr &MI,
673 unsigned Depth) const {
674 switch (MI.getOpcode()) {
675 case TargetOpcode::G_SITOFP:
676 case TargetOpcode::G_UITOFP:
677 return MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() ==
678 MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
679 }
680 return onlyDefinesFP(MI, MRI, TRI, Depth);
681}
682
683bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
684 // GMemOperation because we also want to match indexed loads.
685 auto *MemOp = cast<GMemOperation>(&MI);
686 const Value *LdVal = MemOp->getMMO().getValue();
687 if (!LdVal)
688 return false;
689
690 Type *EltTy = nullptr;
691 if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
692 EltTy = GV->getValueType();
693 // Look at the first element of the struct to determine the type we are
694 // loading
695 while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) {
696 if (StructEltTy->getNumElements() == 0)
697 break;
698 EltTy = StructEltTy->getTypeAtIndex(0U);
699 }
700 // Look at the first element of the array to determine its type
701 if (isa<ArrayType>(EltTy))
702 EltTy = EltTy->getArrayElementType();
703 } else if (!isa<Constant>(LdVal)) {
704 // FIXME: grubbing around uses is pretty ugly, but with no more
705 // `getPointerElementType` there's not much else we can do.
706 for (const auto *LdUser : LdVal->users()) {
707 if (isa<LoadInst>(LdUser)) {
708 EltTy = LdUser->getType();
709 break;
710 }
711 if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
712 EltTy = LdUser->getOperand(0)->getType();
713 break;
714 }
715 }
716 }
717 return EltTy && EltTy->isFPOrFPVectorTy();
718}
719
722 const unsigned Opc = MI.getOpcode();
723
724 // Try the default logic for non-generic instructions that are either copies
725 // or already have some operands assigned to banks.
726 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
727 Opc == TargetOpcode::G_PHI) {
730 if (Mapping.isValid())
731 return Mapping;
732 }
733
734 const MachineFunction &MF = *MI.getParent()->getParent();
735 const MachineRegisterInfo &MRI = MF.getRegInfo();
738
739 switch (Opc) {
740 // G_{F|S|U}REM are not listed because they are not legal.
741 // Arithmetic ops.
742 case TargetOpcode::G_ADD:
743 case TargetOpcode::G_SUB:
744 case TargetOpcode::G_PTR_ADD:
745 case TargetOpcode::G_MUL:
746 case TargetOpcode::G_SDIV:
747 case TargetOpcode::G_UDIV:
748 // Bitwise ops.
749 case TargetOpcode::G_AND:
750 case TargetOpcode::G_OR:
751 case TargetOpcode::G_XOR:
752 // Floating point ops.
753 case TargetOpcode::G_FADD:
754 case TargetOpcode::G_FSUB:
755 case TargetOpcode::G_FMUL:
756 case TargetOpcode::G_FDIV:
757 case TargetOpcode::G_FMAXIMUM:
758 case TargetOpcode::G_FMINIMUM:
759 return getSameKindOfOperandsMapping(MI);
760 case TargetOpcode::G_FPEXT: {
761 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
762 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
764 DefaultMappingID, /*Cost*/ 1,
765 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
766 /*NumOperands*/ 2);
767 }
768 // Shifts.
769 case TargetOpcode::G_SHL:
770 case TargetOpcode::G_LSHR:
771 case TargetOpcode::G_ASHR: {
772 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
773 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
774 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
777 return getSameKindOfOperandsMapping(MI);
778 }
779 case TargetOpcode::COPY: {
780 Register DstReg = MI.getOperand(0).getReg();
781 Register SrcReg = MI.getOperand(1).getReg();
782 // Check if one of the register is not a generic register.
783 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
784 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
785 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
786 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
787 if (!DstRB)
788 DstRB = SrcRB;
789 else if (!SrcRB)
790 SrcRB = DstRB;
791 // If both RB are null that means both registers are generic.
792 // We shouldn't be here.
793 assert(DstRB && SrcRB && "Both RegBank were nullptr");
794 TypeSize Size = getSizeInBits(DstReg, MRI, TRI);
796 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
797 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
798 // We only care about the mapping of the destination.
799 /*NumOperands*/ 1);
800 }
801 // Both registers are generic, use G_BITCAST.
802 [[fallthrough]];
803 }
804 case TargetOpcode::G_BITCAST: {
805 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
806 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
807 TypeSize Size = DstTy.getSizeInBits();
808 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
809 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
810 const RegisterBank &DstRB =
811 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
812 const RegisterBank &SrcRB =
813 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
815 DefaultMappingID, copyCost(DstRB, SrcRB, Size),
816 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
817 // We only care about the mapping of the destination for COPY.
818 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
819 }
820 default:
821 break;
822 }
823
824 unsigned NumOperands = MI.getNumOperands();
825 unsigned MappingID = DefaultMappingID;
826
827 // Track the size and bank of each register. We don't do partial mappings.
828 SmallVector<unsigned, 4> OpSize(NumOperands);
829 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
830 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
831 auto &MO = MI.getOperand(Idx);
832 if (!MO.isReg() || !MO.getReg())
833 continue;
834
835 LLT Ty = MRI.getType(MO.getReg());
836 if (!Ty.isValid())
837 continue;
838 OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue();
839
840 // As a top-level guess, vectors including both scalable and non-scalable
841 // ones go in FPRs, scalars and pointers in GPRs.
842 // For floating-point instructions, scalars go in FPRs.
843 if (Ty.isVector())
844 OpRegBankIdx[Idx] = PMI_FirstFPR;
846 (MO.isDef() && onlyDefinesFP(MI, MRI, TRI)) ||
847 (MO.isUse() && onlyUsesFP(MI, MRI, TRI)) ||
848 Ty.getSizeInBits() > 64)
849 OpRegBankIdx[Idx] = PMI_FirstFPR;
850 else
851 OpRegBankIdx[Idx] = PMI_FirstGPR;
852 }
853
854 unsigned Cost = 1;
855 // Some of the floating-point instructions have mixed GPR and FPR operands:
856 // fine-tune the computed mapping.
857 switch (Opc) {
858 case AArch64::G_DUP: {
859 Register ScalarReg = MI.getOperand(1).getReg();
860 LLT ScalarTy = MRI.getType(ScalarReg);
861 auto ScalarDef = MRI.getVRegDef(ScalarReg);
862 // We want to select dup(load) into LD1R.
863 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
864 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
865 // s8 is an exception for G_DUP, which we always want on gpr.
866 else if (ScalarTy.getSizeInBits() != 8 &&
867 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
868 onlyDefinesFP(*ScalarDef, MRI, TRI)))
869 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
870 else {
871 if (ScalarTy.getSizeInBits() < 32 &&
872 getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
873 // Calls applyMappingImpl()
874 MappingID = CustomMappingID;
875 }
876 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
877 }
878 break;
879 }
880 case TargetOpcode::G_TRUNC: {
881 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
882 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
883 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
884 break;
885 }
886 case TargetOpcode::G_SITOFP:
887 case TargetOpcode::G_UITOFP: {
888 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
889 break;
890 // Integer to FP conversions don't necessarily happen between GPR -> FPR
891 // regbanks. They can also be done within an FPR register.
892 Register SrcReg = MI.getOperand(1).getReg();
893 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank &&
894 MRI.getType(SrcReg).getSizeInBits() ==
895 MRI.getType(MI.getOperand(0).getReg()).getSizeInBits())
896 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
897 else
898 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
899 break;
900 }
901 case TargetOpcode::G_FPTOSI_SAT:
902 case TargetOpcode::G_FPTOUI_SAT:
903 case TargetOpcode::G_FPTOSI:
904 case TargetOpcode::G_FPTOUI:
905 case TargetOpcode::G_INTRINSIC_LRINT:
906 case TargetOpcode::G_INTRINSIC_LLRINT:
907 case TargetOpcode::G_LROUND:
908 case TargetOpcode::G_LLROUND: {
909 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
910 if (DstType.isVector())
911 break;
912 if (DstType == LLT::scalar(16)) {
913 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
914 break;
915 }
916 TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
917 TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
918 if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
919 all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
920 [&](const MachineInstr &UseMI) {
921 return onlyUsesFP(UseMI, MRI, TRI) ||
922 prefersFPUse(UseMI, MRI, TRI);
923 }))
924 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
925 else
926 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
927 break;
928 }
929 case TargetOpcode::G_FCMP: {
930 // If the result is a vector, it must use a FPR.
932 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
933 : PMI_FirstGPR;
934 OpRegBankIdx = {Idx0,
935 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
936 break;
937 }
938 case TargetOpcode::G_BITCAST:
939 // This is going to be a cross register bank copy and this is expensive.
940 if (OpRegBankIdx[0] != OpRegBankIdx[1])
941 Cost = copyCost(
942 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
943 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
944 TypeSize::getFixed(OpSize[0]));
945 break;
946 case TargetOpcode::G_LOAD: {
947 // Loading in vector unit is slightly more expensive.
948 // This is actually only true for the LD1R and co instructions,
949 // but anyway for the fast mode this number does not matter and
950 // for the greedy mode the cost of the cross bank copy will
951 // offset this number.
952 // FIXME: Should be derived from the scheduling model.
953 if (OpRegBankIdx[0] != PMI_FirstGPR) {
954 Cost = 2;
955 break;
956 }
957
958 if (cast<GLoad>(MI).isAtomic()) {
959 // Atomics always use GPR destinations. Don't refine any further.
960 OpRegBankIdx[0] = PMI_FirstGPR;
961 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() < 32)
962 MappingID = CustomMappingID;
963 break;
964 }
965
966 // Try to guess the type of the load from the MMO.
967 if (isLoadFromFPType(MI)) {
968 OpRegBankIdx[0] = PMI_FirstFPR;
969 break;
970 }
971
972 // Check if that load feeds fp instructions.
973 // In that case, we want the default mapping to be on FPR
974 // instead of blind map every scalar to GPR.
975 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
976 [&](const MachineInstr &UseMI) {
977 // If we have at least one direct or indirect use
978 // in a FP instruction,
979 // assume this was a floating point load in the IR. If it was
980 // not, we would have had a bitcast before reaching that
981 // instruction.
982 //
983 // Int->FP conversion operations are also captured in
984 // prefersFPUse().
985
986 if (isPHIWithFPConstraints(UseMI, MRI, TRI))
987 return true;
988
989 return onlyUsesFP(UseMI, MRI, TRI) ||
990 prefersFPUse(UseMI, MRI, TRI);
991 }))
992 OpRegBankIdx[0] = PMI_FirstFPR;
993
994 // On GPR, extend any load < 32bits to 32bit.
995 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
996 if (Ty.isScalar() && Ty.getSizeInBits() < 32)
997 MappingID = CustomMappingID;
998 break;
999 }
1000 case TargetOpcode::G_STORE:
1001 // Check if that store is fed by fp instructions.
1002 if (OpRegBankIdx[0] == PMI_FirstGPR) {
1003 Register VReg = MI.getOperand(0).getReg();
1004 if (VReg) {
1005 MachineInstr *DefMI = MRI.getVRegDef(VReg);
1006 if (onlyDefinesFP(*DefMI, MRI, TRI)) {
1007 OpRegBankIdx[0] = PMI_FirstFPR;
1008 break;
1009 }
1010 }
1011
1012 // On GPR, extend any store < 32bits to 32bit.
1013 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1014 if (Ty.isScalar() && Ty.getSizeInBits() < 32)
1015 MappingID = CustomMappingID;
1016 }
1017 break;
1018 case TargetOpcode::G_INDEXED_STORE:
1019 if (OpRegBankIdx[1] == PMI_FirstGPR) {
1020 Register VReg = MI.getOperand(1).getReg();
1021 if (!VReg)
1022 break;
1023 MachineInstr *DefMI = MRI.getVRegDef(VReg);
1024 if (onlyDefinesFP(*DefMI, MRI, TRI))
1025 OpRegBankIdx[1] = PMI_FirstFPR;
1026 break;
1027 }
1028 break;
1029 case TargetOpcode::G_INDEXED_SEXTLOAD:
1030 case TargetOpcode::G_INDEXED_ZEXTLOAD:
1031 // These should always be GPR.
1032 OpRegBankIdx[0] = PMI_FirstGPR;
1033 break;
1034 case TargetOpcode::G_INDEXED_LOAD: {
1035 if (isLoadFromFPType(MI))
1036 OpRegBankIdx[0] = PMI_FirstFPR;
1037 break;
1038 }
1039 case TargetOpcode::G_SELECT: {
1040 // If the destination is FPR, preserve that.
1041 if (OpRegBankIdx[0] != PMI_FirstGPR)
1042 break;
1043
1044 // If we're taking in vectors, we have no choice but to put everything on
1045 // FPRs, except for the condition. The condition must always be on a GPR.
1046 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
1047 if (SrcTy.isVector()) {
1049 break;
1050 }
1051
1052 // Try to minimize the number of copies. If we have more floating point
1053 // constrained values than not, then we'll put everything on FPR. Otherwise,
1054 // everything has to be on GPR.
1055 unsigned NumFP = 0;
1056
1057 // Check if the uses of the result always produce floating point values.
1058 //
1059 // For example:
1060 //
1061 // %z = G_SELECT %cond %x %y
1062 // fpr = G_FOO %z ...
1063 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
1064 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
1065 ++NumFP;
1066
1067 // Check if the defs of the source values always produce floating point
1068 // values.
1069 //
1070 // For example:
1071 //
1072 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
1073 // %z = G_SELECT %cond %x %y
1074 //
1075 // Also check whether or not the sources have already been decided to be
1076 // FPR. Keep track of this.
1077 //
1078 // This doesn't check the condition, since it's just whatever is in NZCV.
1079 // This isn't passed explicitly in a register to fcsel/csel.
1080 for (unsigned Idx = 2; Idx < 4; ++Idx) {
1081 Register VReg = MI.getOperand(Idx).getReg();
1082 MachineInstr *DefMI = MRI.getVRegDef(VReg);
1083 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
1084 onlyDefinesFP(*DefMI, MRI, TRI))
1085 ++NumFP;
1086 }
1087
1088 // If we have more FP constraints than not, then move everything over to
1089 // FPR.
1090 if (NumFP >= 2)
1092
1093 break;
1094 }
1095 case TargetOpcode::G_UNMERGE_VALUES: {
1096 // If the first operand belongs to a FPR register bank, then make sure that
1097 // we preserve that.
1098 if (OpRegBankIdx[0] != PMI_FirstGPR)
1099 break;
1100
1101 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
1102 // UNMERGE into scalars from a vector should always use FPR.
1103 // Likewise if any of the uses are FP instructions.
1104 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
1105 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
1106 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
1107 // Set the register bank of every operand to FPR.
1108 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
1109 Idx < NumOperands; ++Idx)
1110 OpRegBankIdx[Idx] = PMI_FirstFPR;
1111 }
1112 break;
1113 }
1114 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1115 // Destination and source need to be FPRs.
1116 OpRegBankIdx[0] = PMI_FirstFPR;
1117 OpRegBankIdx[1] = PMI_FirstFPR;
1118
1119 // Index needs to be a GPR.
1120 OpRegBankIdx[2] = PMI_FirstGPR;
1121 break;
1122 case AArch64::G_SQSHLU_I:
1123 // Destination and source need to be FPRs.
1124 OpRegBankIdx[0] = PMI_FirstFPR;
1125 OpRegBankIdx[1] = PMI_FirstFPR;
1126
1127 // Shift Index needs to be a GPR.
1128 OpRegBankIdx[2] = PMI_FirstGPR;
1129 break;
1130
1131 case TargetOpcode::G_INSERT_VECTOR_ELT:
1132 OpRegBankIdx[0] = PMI_FirstFPR;
1133 OpRegBankIdx[1] = PMI_FirstFPR;
1134
1135 // The element may be either a GPR or FPR. Preserve that behaviour.
1136 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
1137 OpRegBankIdx[2] = PMI_FirstFPR;
1138 else {
1139 // If the type is i8/i16, and the regank will be GPR, then we change the
1140 // type to i32 in applyMappingImpl.
1141 LLT Ty = MRI.getType(MI.getOperand(2).getReg());
1142 if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) {
1143 // Calls applyMappingImpl()
1144 MappingID = CustomMappingID;
1145 }
1146 OpRegBankIdx[2] = PMI_FirstGPR;
1147 }
1148
1149 // Index needs to be a GPR.
1150 OpRegBankIdx[3] = PMI_FirstGPR;
1151 break;
1152 case TargetOpcode::G_EXTRACT: {
1153 // For s128 sources we have to use fpr unless we know otherwise.
1154 auto Src = MI.getOperand(1).getReg();
1155 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
1156 if (SrcTy.getSizeInBits() != 128)
1157 break;
1158 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
1159 ? PMI_FirstGPR
1160 : PMI_FirstFPR;
1161 OpRegBankIdx[0] = Idx;
1162 OpRegBankIdx[1] = Idx;
1163 break;
1164 }
1165 case TargetOpcode::G_BUILD_VECTOR: {
1166 // If the first source operand belongs to a FPR register bank, then make
1167 // sure that we preserve that.
1168 if (OpRegBankIdx[1] != PMI_FirstGPR)
1169 break;
1170 Register VReg = MI.getOperand(1).getReg();
1171 if (!VReg)
1172 break;
1173
1174 // Get the instruction that defined the source operand reg, and check if
1175 // it's a floating point operation. Or, if it's a type like s16 which
1176 // doesn't have a exact size gpr register class. The exception is if the
1177 // build_vector has all constant operands, which may be better to leave as
1178 // gpr without copies, so it can be matched in imported patterns.
1179 MachineInstr *DefMI = MRI.getVRegDef(VReg);
1180 unsigned DefOpc = DefMI->getOpcode();
1181 const LLT SrcTy = MRI.getType(VReg);
1182 if (all_of(MI.operands(), [&](const MachineOperand &Op) {
1183 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
1184 TargetOpcode::G_CONSTANT;
1185 }))
1186 break;
1188 SrcTy.getSizeInBits() < 32 ||
1189 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1190 // Have a floating point op.
1191 // Make sure every operand gets mapped to a FPR register class.
1192 unsigned NumOperands = MI.getNumOperands();
1193 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1194 OpRegBankIdx[Idx] = PMI_FirstFPR;
1195 }
1196 break;
1197 }
1198 case TargetOpcode::G_VECREDUCE_FADD:
1199 case TargetOpcode::G_VECREDUCE_FMUL:
1200 case TargetOpcode::G_VECREDUCE_FMAX:
1201 case TargetOpcode::G_VECREDUCE_FMIN:
1202 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1203 case TargetOpcode::G_VECREDUCE_FMINIMUM:
1204 case TargetOpcode::G_VECREDUCE_ADD:
1205 case TargetOpcode::G_VECREDUCE_MUL:
1206 case TargetOpcode::G_VECREDUCE_AND:
1207 case TargetOpcode::G_VECREDUCE_OR:
1208 case TargetOpcode::G_VECREDUCE_XOR:
1209 case TargetOpcode::G_VECREDUCE_SMAX:
1210 case TargetOpcode::G_VECREDUCE_SMIN:
1211 case TargetOpcode::G_VECREDUCE_UMAX:
1212 case TargetOpcode::G_VECREDUCE_UMIN:
1213 // Reductions produce a scalar value from a vector, the scalar should be on
1214 // FPR bank.
1215 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1216 break;
1217 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1218 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1219 // These reductions also take a scalar accumulator input.
1220 // Assign them FPR for now.
1221 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1222 break;
1223 case TargetOpcode::G_INTRINSIC:
1224 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1225 switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
1226 case Intrinsic::aarch64_neon_fcvtas:
1227 case Intrinsic::aarch64_neon_fcvtau:
1228 case Intrinsic::aarch64_neon_fcvtzs:
1229 case Intrinsic::aarch64_neon_fcvtzu:
1230 case Intrinsic::aarch64_neon_fcvtms:
1231 case Intrinsic::aarch64_neon_fcvtmu:
1232 case Intrinsic::aarch64_neon_fcvtns:
1233 case Intrinsic::aarch64_neon_fcvtnu:
1234 case Intrinsic::aarch64_neon_fcvtps:
1235 case Intrinsic::aarch64_neon_fcvtpu: {
1236 OpRegBankIdx[2] = PMI_FirstFPR;
1237 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) {
1238 OpRegBankIdx[0] = PMI_FirstFPR;
1239 break;
1240 }
1241 TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
1242 TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
1243 if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
1244 all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
1245 [&](const MachineInstr &UseMI) {
1246 return onlyUsesFP(UseMI, MRI, TRI) ||
1247 prefersFPUse(UseMI, MRI, TRI);
1248 }))
1249 OpRegBankIdx[0] = PMI_FirstFPR;
1250 else
1251 OpRegBankIdx[0] = PMI_FirstGPR;
1252 break;
1253 }
1254 case Intrinsic::aarch64_neon_vcvtfxs2fp:
1255 case Intrinsic::aarch64_neon_vcvtfxu2fp:
1256 case Intrinsic::aarch64_neon_vcvtfp2fxs:
1257 case Intrinsic::aarch64_neon_vcvtfp2fxu:
1258 // Override these intrinsics, because they would have a partial
1259 // mapping. This is needed for 'half' types, which otherwise don't
1260 // get legalised correctly.
1261 OpRegBankIdx[0] = PMI_FirstFPR;
1262 OpRegBankIdx[2] = PMI_FirstFPR;
1263 // OpRegBankIdx[1] is the intrinsic ID.
1264 // OpRegBankIdx[3] is an integer immediate.
1265 break;
1266 default: {
1267 // Check if we know that the intrinsic has any constraints on its register
1268 // banks. If it does, then update the mapping accordingly.
1269 unsigned Idx = 0;
1270 if (onlyDefinesFP(MI, MRI, TRI))
1271 for (const auto &Op : MI.defs()) {
1272 if (Op.isReg())
1273 OpRegBankIdx[Idx] = PMI_FirstFPR;
1274 ++Idx;
1275 }
1276 else
1277 Idx += MI.getNumExplicitDefs();
1278
1279 if (onlyUsesFP(MI, MRI, TRI))
1280 for (const auto &Op : MI.explicit_uses()) {
1281 if (Op.isReg())
1282 OpRegBankIdx[Idx] = PMI_FirstFPR;
1283 ++Idx;
1284 }
1285 break;
1286 }
1287 }
1288 break;
1289 }
1290 }
1291
1292 // Finally construct the computed mapping.
1293 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1294 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1295 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
1296 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
1297 if (!Ty.isValid())
1298 continue;
1299 auto Mapping =
1300 getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx]));
1301 if (!Mapping->isValid())
1303
1304 OpdsMapping[Idx] = Mapping;
1305 }
1306 }
1307
1308 return getInstructionMapping(MappingID, Cost, getOperandsMapping(OpdsMapping),
1309 NumOperands);
1310}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define CHECK_VALUEMAP(RBName, Size)
static bool isFPIntrinsic(const MachineRegisterInfo &MRI, const MachineInstr &MI)
#define CHECK_VALUEMAP_3OPS(RBName, Size)
static const unsigned CustomMappingID
#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)
#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)
#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)
This file declares the targeting of the RegisterBankInfo class for AArch64.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Implement a low-level type suitable for MachineInstr level instruction selection.
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
ppc ctr loops verify
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, TypeSize Size)
static const RegisterBankInfo::ValueMapping * getCopyMapping(unsigned DstBankID, unsigned SrcBankID, TypeSize Size)
Get the pointer to the ValueMapping of the operands of a copy instruction from the SrcBankID register...
static bool checkPartialMappingIdx(PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias, ArrayRef< PartialMappingIdx > Order)
static const RegisterBankInfo::PartialMapping PartMappings[]
static const RegisterBankInfo::ValueMapping * getFPExtMapping(unsigned DstSize, unsigned SrcSize)
Get the instruction mapping for G_FPEXT.
static const RegisterBankInfo::ValueMapping * getValueMapping(PartialMappingIdx RBIdx, TypeSize Size)
Get the pointer to the ValueMapping representing the RegisterBank at RBIdx with a size of Size.
static const RegisterBankInfo::ValueMapping ValMappings[]
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const override
Get a register bank that covers RC.
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
Get the mapping of the different operands of MI on the register bank.
const AArch64RegisterInfo * getRegisterInfo() const override
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getMaximumSize(unsigned RegBankID) const
Get the maximum size in bits that fits in the given register bank.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
virtual const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const
Get a register bank that covers RC.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
static const unsigned DefaultMappingID
Identifier used when the related instruction mapping instance is generated by target independent code...
SmallVector< const InstructionMapping *, 4 > InstructionMappings
Convenient type to represent the alternatives for mapping an instruction.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
LLVM_ABI bool covers(const TargetRegisterClass &RC) const
Check whether this register bank covers RC.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
Type * getArrayElementType() const
Definition Type.h:408
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
iterator_range< user_iterator > users()
Definition Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
bool isPreISelGenericOptimizationHint(unsigned Opcode)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition Threading.h:86
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isPreISelGenericFloatingPointOpcode(unsigned Opc)
Returns whether opcode Opc is a pre-isel generic floating-point opcode, having only floating-point op...
Definition Utils.cpp:1750
The llvm::once_flag structure.
Definition Threading.h:67