LLVM 22.0.0git
AMDGPURegBankLegalizeHelper.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeHelper.cpp -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Implements actual lowering algorithms for each ID that can be used in
10/// Rule.OperandMapping. Similar to legalizer helper but with register banks.
11//
12//===----------------------------------------------------------------------===//
13
16#include "AMDGPUInstrInfo.h"
19#include "GCNSubtarget.h"
25#include "llvm/IR/IntrinsicsAMDGPU.h"
26
27#define DEBUG_TYPE "amdgpu-regbanklegalize"
28
29using namespace llvm;
30using namespace AMDGPU;
31
34 const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
35 : MF(B.getMF()), ST(MF.getSubtarget<GCNSubtarget>()), B(B),
36 MRI(*B.getMRI()), MUI(MUI), RBI(RBI), MORE(MF, nullptr),
37 RBLRules(RBLRules), IsWave32(ST.isWave32()),
38 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
39 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
40 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
41
43 const SetOfRulesForOpcode *RuleSet = RBLRules.getRulesForOpc(MI);
44 if (!RuleSet) {
45 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
46 "No AMDGPU RegBankLegalize rules defined for opcode",
47 MI);
48 return false;
49 }
50
51 const RegBankLLTMapping *Mapping = RuleSet->findMappingForMI(MI, MRI, MUI);
52 if (!Mapping) {
53 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
54 "AMDGPU RegBankLegalize: none of the rules defined with "
55 "'Any' for MI's opcode matched MI",
56 MI);
57 return false;
58 }
59
60 SmallSet<Register, 4> WaterfallSgprs;
61 unsigned OpIdx = 0;
62 if (Mapping->DstOpMapping.size() > 0) {
63 B.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
64 if (!applyMappingDst(MI, OpIdx, Mapping->DstOpMapping))
65 return false;
66 }
67 if (Mapping->SrcOpMapping.size() > 0) {
68 B.setInstr(MI);
69 if (!applyMappingSrc(MI, OpIdx, Mapping->SrcOpMapping, WaterfallSgprs))
70 return false;
71 }
72
73 if (!lower(MI, *Mapping, WaterfallSgprs))
74 return false;
75
76 return true;
77}
78
79bool RegBankLegalizeHelper::executeInWaterfallLoop(
81 SmallSet<Register, 4> &SGPROperandRegs) {
82 // Track use registers which have already been expanded with a readfirstlane
83 // sequence. This may have multiple uses if moving a sequence.
84 DenseMap<Register, Register> WaterfalledRegMap;
85
86 MachineBasicBlock &MBB = B.getMBB();
87 MachineFunction &MF = B.getMF();
88
90 const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
91 unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
92 if (IsWave32) {
93 MovExecOpc = AMDGPU::S_MOV_B32;
94 MovExecTermOpc = AMDGPU::S_MOV_B32_term;
95 XorTermOpc = AMDGPU::S_XOR_B32_term;
96 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
97 ExecReg = AMDGPU::EXEC_LO;
98 } else {
99 MovExecOpc = AMDGPU::S_MOV_B64;
100 MovExecTermOpc = AMDGPU::S_MOV_B64_term;
101 XorTermOpc = AMDGPU::S_XOR_B64_term;
102 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
103 ExecReg = AMDGPU::EXEC;
104 }
105
106#ifndef NDEBUG
107 const int OrigRangeSize = std::distance(Range.begin(), Range.end());
108#endif
109
110 MachineRegisterInfo &MRI = *B.getMRI();
111 Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
112 Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
113
114 // Don't bother using generic instructions/registers for the exec mask.
115 B.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(InitSaveExecReg);
116
117 Register SavedExec = MRI.createVirtualRegister(WaveRC);
118
119 // To insert the loop we need to split the block. Move everything before
120 // this point to a new block, and insert a new empty block before this
121 // instruction.
124 MachineBasicBlock *RestoreExecBB = MF.CreateMachineBasicBlock();
125 MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
127 ++MBBI;
128 MF.insert(MBBI, LoopBB);
129 MF.insert(MBBI, BodyBB);
130 MF.insert(MBBI, RestoreExecBB);
131 MF.insert(MBBI, RemainderBB);
132
133 LoopBB->addSuccessor(BodyBB);
134 BodyBB->addSuccessor(RestoreExecBB);
135 BodyBB->addSuccessor(LoopBB);
136
137 // Move the rest of the block into a new block.
139 RemainderBB->splice(RemainderBB->begin(), &MBB, Range.end(), MBB.end());
140
141 MBB.addSuccessor(LoopBB);
142 RestoreExecBB->addSuccessor(RemainderBB);
143
144 B.setInsertPt(*LoopBB, LoopBB->end());
145
146 // +-MBB:------------+
147 // | ... |
148 // | %0 = G_INST_1 |
149 // | %Dst = MI %Vgpr |
150 // | %1 = G_INST_2 |
151 // | ... |
152 // +-----------------+
153 // ->
154 // +-MBB-------------------------------+
155 // | ... |
156 // | %0 = G_INST_1 |
157 // | %SaveExecReg = S_MOV_B32 $exec_lo |
158 // +----------------|------------------+
159 // | /------------------------------|
160 // V V |
161 // +-LoopBB---------------------------------------------------------------+ |
162 // | %CurrentLaneReg:sgpr(s32) = READFIRSTLANE %Vgpr | |
163 // | instead of executing for each lane, see if other lanes had | |
164 // | same value for %Vgpr and execute for them also. | |
165 // | %CondReg:vcc(s1) = G_ICMP eq %CurrentLaneReg, %Vgpr | |
166 // | %CondRegLM:sreg_32 = ballot %CondReg // copy vcc to sreg32 lane mask | |
167 // | %SavedExec = S_AND_SAVEEXEC_B32 %CondRegLM | |
168 // | exec is active for lanes with the same "CurrentLane value" in Vgpr | |
169 // +----------------|-----------------------------------------------------+ |
170 // V |
171 // +-BodyBB------------------------------------------------------------+ |
172 // | %Dst = MI %CurrentLaneReg:sgpr(s32) | |
173 // | executed only for active lanes and written to Dst | |
174 // | $exec = S_XOR_B32 $exec, %SavedExec | |
175 // | set active lanes to 0 in SavedExec, lanes that did not write to | |
176 // | Dst yet, and set this as new exec (for READFIRSTLANE and ICMP) | |
177 // | SI_WATERFALL_LOOP LoopBB |-----|
178 // +----------------|--------------------------------------------------+
179 // V
180 // +-RestoreExecBB--------------------------+
181 // | $exec_lo = S_MOV_B32_term %SaveExecReg |
182 // +----------------|-----------------------+
183 // V
184 // +-RemainderBB:----------------------+
185 // | %1 = G_INST_2 |
186 // | ... |
187 // +---------------------------------- +
188
189 // Move the instruction into the loop body. Note we moved everything after
190 // Range.end() already into a new block, so Range.end() is no longer valid.
191 BodyBB->splice(BodyBB->end(), &MBB, Range.begin(), MBB.end());
192
193 // Figure out the iterator range after splicing the instructions.
194 MachineBasicBlock::iterator NewBegin = Range.begin()->getIterator();
195 auto NewEnd = BodyBB->end();
196 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
197
198 B.setMBB(*LoopBB);
199 Register CondReg;
200
201 for (MachineInstr &MI : make_range(NewBegin, NewEnd)) {
202 for (MachineOperand &Op : MI.all_uses()) {
203 Register OldReg = Op.getReg();
204 if (!SGPROperandRegs.count(OldReg))
205 continue;
206
207 // See if we already processed this register in another instruction in
208 // the sequence.
209 auto OldVal = WaterfalledRegMap.find(OldReg);
210 if (OldVal != WaterfalledRegMap.end()) {
211 Op.setReg(OldVal->second);
212 continue;
213 }
214
215 Register OpReg = Op.getReg();
216 LLT OpTy = MRI.getType(OpReg);
217
218 // TODO: support for agpr
219 assert(MRI.getRegBank(OpReg) == VgprRB);
220 Register CurrentLaneReg = MRI.createVirtualRegister({SgprRB, OpTy});
221 buildReadFirstLane(B, CurrentLaneReg, OpReg, RBI);
222
223 // Build the comparison(s), CurrentLaneReg == OpReg.
224 unsigned OpSize = OpTy.getSizeInBits();
225 unsigned PartSize = (OpSize % 64 == 0) ? 64 : 32;
226 LLT PartTy = LLT::scalar(PartSize);
227 unsigned NumParts = OpSize / PartSize;
229 SmallVector<Register, 8> CurrentLaneParts;
230
231 if (NumParts == 1) {
232 OpParts.push_back(OpReg);
233 CurrentLaneParts.push_back(CurrentLaneReg);
234 } else {
235 auto UnmergeOp = B.buildUnmerge({VgprRB, PartTy}, OpReg);
236 auto UnmergeCurrLane = B.buildUnmerge({SgprRB, PartTy}, CurrentLaneReg);
237 for (unsigned i = 0; i < NumParts; ++i) {
238 OpParts.push_back(UnmergeOp.getReg(i));
239 CurrentLaneParts.push_back(UnmergeCurrLane.getReg(i));
240 }
241 }
242
243 for (unsigned i = 0; i < NumParts; ++i) {
244 Register CmpReg = MRI.createVirtualRegister(VccRB_S1);
245 B.buildICmp(CmpInst::ICMP_EQ, CmpReg, CurrentLaneParts[i], OpParts[i]);
246
247 if (!CondReg)
248 CondReg = CmpReg;
249 else
250 CondReg = B.buildAnd(VccRB_S1, CondReg, CmpReg).getReg(0);
251 }
252
253 Op.setReg(CurrentLaneReg);
254
255 // Make sure we don't re-process this register again.
256 WaterfalledRegMap.insert(std::pair(OldReg, Op.getReg()));
257 }
258 }
259
260 // Copy vcc to sgpr32/64, ballot becomes a no-op during instruction selection.
261 Register CondRegLM =
262 MRI.createVirtualRegister({WaveRC, LLT::scalar(IsWave32 ? 32 : 64)});
263 B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
264
265 // Update EXEC, save the original EXEC value to SavedExec.
266 B.buildInstr(AndSaveExecOpc)
267 .addDef(SavedExec)
268 .addReg(CondRegLM, RegState::Kill);
269 MRI.setSimpleHint(SavedExec, CondRegLM);
270
271 B.setInsertPt(*BodyBB, BodyBB->end());
272
273 // Update EXEC, switch all done bits to 0 and all todo bits to 1.
274 B.buildInstr(XorTermOpc).addDef(ExecReg).addReg(ExecReg).addReg(SavedExec);
275
276 // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
277 // s_cbranch_scc0?
278
279 // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
280 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
281
282 // Save the EXEC mask before the loop.
283 B.setInsertPt(MBB, MBB.end());
284 B.buildInstr(MovExecOpc).addDef(SaveExecReg).addReg(ExecReg);
285
286 // Restore the EXEC mask after the loop.
287 B.setInsertPt(*RestoreExecBB, RestoreExecBB->begin());
288 B.buildInstr(MovExecTermOpc).addDef(ExecReg).addReg(SaveExecReg);
289
290 // Set the insert point after the original instruction, so any new
291 // instructions will be in the remainder.
292 B.setInsertPt(*RemainderBB, RemainderBB->begin());
293
294 return true;
295}
296
297bool RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
298 ArrayRef<LLT> LLTBreakdown, LLT MergeTy) {
299 MachineFunction &MF = B.getMF();
300 assert(MI.getNumMemOperands() == 1);
301 MachineMemOperand &BaseMMO = **MI.memoperands_begin();
302 Register Dst = MI.getOperand(0).getReg();
303 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
304 Register Base = MI.getOperand(1).getReg();
305 LLT PtrTy = MRI.getType(Base);
306 const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base);
307 LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
308 SmallVector<Register, 4> LoadPartRegs;
309
310 unsigned ByteOffset = 0;
311 for (LLT PartTy : LLTBreakdown) {
312 Register BasePlusOffset;
313 if (ByteOffset == 0) {
314 BasePlusOffset = Base;
315 } else {
316 auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
317 BasePlusOffset =
318 B.buildObjectPtrOffset({PtrRB, PtrTy}, Base, Offset).getReg(0);
319 }
320 auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
321 auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
322 LoadPartRegs.push_back(LoadPart.getReg(0));
323 ByteOffset += PartTy.getSizeInBytes();
324 }
325
326 if (!MergeTy.isValid()) {
327 // Loads are of same size, concat or merge them together.
328 B.buildMergeLikeInstr(Dst, LoadPartRegs);
329 } else {
330 // Loads are not all of same size, need to unmerge them to smaller pieces
331 // of MergeTy type, then merge pieces to Dst.
332 SmallVector<Register, 4> MergeTyParts;
333 for (Register Reg : LoadPartRegs) {
334 if (MRI.getType(Reg) == MergeTy) {
335 MergeTyParts.push_back(Reg);
336 } else {
337 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg);
338 for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
339 MergeTyParts.push_back(Unmerge.getReg(i));
340 }
341 }
342 B.buildMergeLikeInstr(Dst, MergeTyParts);
343 }
344 MI.eraseFromParent();
345 return true;
346}
347
348bool RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
349 LLT MergeTy) {
350 MachineFunction &MF = B.getMF();
351 assert(MI.getNumMemOperands() == 1);
352 MachineMemOperand &BaseMMO = **MI.memoperands_begin();
353 Register Dst = MI.getOperand(0).getReg();
354 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
355 Register Base = MI.getOperand(1).getReg();
356
357 MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy);
358 auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO);
359
360 if (WideTy.isScalar()) {
361 B.buildTrunc(Dst, WideLoad);
362 } else {
363 SmallVector<Register, 4> MergeTyParts;
364 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad);
365
366 LLT DstTy = MRI.getType(Dst);
367 unsigned NumElts = DstTy.getSizeInBits() / MergeTy.getSizeInBits();
368 for (unsigned i = 0; i < NumElts; ++i) {
369 MergeTyParts.push_back(Unmerge.getReg(i));
370 }
371 B.buildMergeLikeInstr(Dst, MergeTyParts);
372 }
373 MI.eraseFromParent();
374 return true;
375}
376
377bool RegBankLegalizeHelper::widenMMOToS32(GAnyLoad &MI) const {
378 Register Dst = MI.getDstReg();
379 Register Ptr = MI.getPointerReg();
380 MachineMemOperand &MMO = MI.getMMO();
381 unsigned MemSize = 8 * MMO.getSize().getValue();
382
383 MachineMemOperand *WideMMO = B.getMF().getMachineMemOperand(&MMO, 0, S32);
384
385 if (MI.getOpcode() == G_LOAD) {
386 B.buildLoad(Dst, Ptr, *WideMMO);
387 } else {
388 auto Load = B.buildLoad(SgprRB_S32, Ptr, *WideMMO);
389
390 if (MI.getOpcode() == G_ZEXTLOAD) {
391 APInt Mask = APInt::getLowBitsSet(S32.getSizeInBits(), MemSize);
392 auto MaskCst = B.buildConstant(SgprRB_S32, Mask);
393 B.buildAnd(Dst, Load, MaskCst);
394 } else {
395 assert(MI.getOpcode() == G_SEXTLOAD);
396 B.buildSExtInReg(Dst, Load, MemSize);
397 }
398 }
399
400 MI.eraseFromParent();
401 return true;
402}
403
404bool RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
405 Register Dst = MI.getOperand(0).getReg();
406 LLT Ty = MRI.getType(Dst);
407 Register Src = MI.getOperand(1).getReg();
408 unsigned Opc = MI.getOpcode();
409 int TrueExtCst = Opc == G_SEXT ? -1 : 1;
410 if (Ty == S32 || Ty == S16) {
411 auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
412 auto False = B.buildConstant({VgprRB, Ty}, 0);
413 B.buildSelect(Dst, Src, True, False);
414 } else if (Ty == S64) {
415 auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
416 auto False = B.buildConstant({VgprRB_S32}, 0);
417 auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
418 MachineInstrBuilder Hi;
419 switch (Opc) {
420 case G_SEXT:
421 Hi = Lo;
422 break;
423 case G_ZEXT:
424 Hi = False;
425 break;
426 case G_ANYEXT:
427 Hi = B.buildUndef({VgprRB_S32});
428 break;
429 default:
431 MF, MORE, "amdgpu-regbanklegalize",
432 "AMDGPU RegBankLegalize: lowerVccExtToSel, Opcode not supported", MI);
433 return false;
434 }
435
436 B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)});
437 } else {
439 MF, MORE, "amdgpu-regbanklegalize",
440 "AMDGPU RegBankLegalize: lowerVccExtToSel, Type not supported", MI);
441 return false;
442 }
443
444 MI.eraseFromParent();
445 return true;
446}
447
448std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(Register Reg) {
449 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
450 auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff);
451 auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
452 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
453 return {Lo.getReg(0), Hi.getReg(0)};
454}
455
456std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(Register Reg) {
457 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
458 auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
459 auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
460 return {Lo.getReg(0), Hi.getReg(0)};
461}
462
463std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
464 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
465 auto Lo = PackedS32;
466 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
467 return {Lo.getReg(0), Hi.getReg(0)};
468}
469
470std::pair<Register, Register>
471RegBankLegalizeHelper::unpackAExtTruncS16(Register Reg) {
472 auto [Lo32, Hi32] = unpackAExt(Reg);
473 return {B.buildTrunc(SgprRB_S16, Lo32).getReg(0),
474 B.buildTrunc(SgprRB_S16, Hi32).getReg(0)};
475}
476
477bool RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
478 Register Lo, Hi;
479 switch (MI.getOpcode()) {
480 case AMDGPU::G_SHL: {
481 auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
482 auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
483 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
484 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
485 break;
486 }
487 case AMDGPU::G_LSHR: {
488 auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
489 auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
490 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
491 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
492 break;
493 }
494 case AMDGPU::G_ASHR: {
495 auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
496 auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
497 Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
498 Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
499 break;
500 }
501 default:
503 MF, MORE, "amdgpu-regbanklegalize",
504 "AMDGPU RegBankLegalize: lowerUnpackBitShift, case not implemented",
505 MI);
506 return false;
507 }
508 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
509 MI.eraseFromParent();
510 return true;
511}
512
513bool RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
514 Register Lo, Hi;
515 switch (MI.getOpcode()) {
516 case AMDGPU::G_SMIN:
517 case AMDGPU::G_SMAX: {
518 // For signed operations, use sign extension
519 auto [Val0_Lo, Val0_Hi] = unpackSExt(MI.getOperand(1).getReg());
520 auto [Val1_Lo, Val1_Hi] = unpackSExt(MI.getOperand(2).getReg());
521 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
522 .getReg(0);
523 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
524 .getReg(0);
525 break;
526 }
527 case AMDGPU::G_UMIN:
528 case AMDGPU::G_UMAX: {
529 // For unsigned operations, use zero extension
530 auto [Val0_Lo, Val0_Hi] = unpackZExt(MI.getOperand(1).getReg());
531 auto [Val1_Lo, Val1_Hi] = unpackZExt(MI.getOperand(2).getReg());
532 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
533 .getReg(0);
534 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
535 .getReg(0);
536 break;
537 }
538 default:
540 MF, MORE, "amdgpu-regbanklegalize",
541 "AMDGPU RegBankLegalize: lowerUnpackMinMax, case not implemented", MI);
542 return false;
543 }
544 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
545 MI.eraseFromParent();
546 return true;
547}
548
549bool RegBankLegalizeHelper::lowerUnpackAExt(MachineInstr &MI) {
550 auto [Op1Lo, Op1Hi] = unpackAExt(MI.getOperand(1).getReg());
551 auto [Op2Lo, Op2Hi] = unpackAExt(MI.getOperand(2).getReg());
552 auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Lo, Op2Lo});
553 auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Hi, Op2Hi});
554 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(),
555 {ResLo.getReg(0), ResHi.getReg(0)});
556 MI.eraseFromParent();
557 return true;
558}
559
562 return (GI->is(Intrinsic::amdgcn_sbfe));
563
564 return MI.getOpcode() == AMDGPU::G_SBFX;
565}
566
567bool RegBankLegalizeHelper::lowerV_BFE(MachineInstr &MI) {
568 Register Dst = MI.getOperand(0).getReg();
569 assert(MRI.getType(Dst) == LLT::scalar(64));
570 bool Signed = isSignedBFE(MI);
571 unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
572 // Extract bitfield from Src, LSBit is the least-significant bit for the
573 // extraction (field offset) and Width is size of bitfield.
574 Register Src = MI.getOperand(FirstOpnd).getReg();
575 Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
576 Register Width = MI.getOperand(FirstOpnd + 2).getReg();
577 // Comments are for signed bitfield extract, similar for unsigned. x is sign
578 // bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.
579
580 // Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
581 unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
582 auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
583
584 auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);
585
586 // Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
587 // << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
588 // >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
589 if (!ConstWidth) {
590 auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
591 auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
592 B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
593 MI.eraseFromParent();
594 return true;
595 }
596
597 uint64_t WidthImm = ConstWidth->Value.getZExtValue();
598 auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
599 Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
600 Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
601 auto Zero = B.buildConstant({VgprRB, S32}, 0);
602 unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
603
604 if (WidthImm <= 32) {
605 // SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
606 auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
607 MachineInstrBuilder Hi;
608 if (Signed) {
609 // SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
610 Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
611 } else {
612 // SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
613 Hi = Zero;
614 }
615 B.buildMergeLikeInstr(Dst, {Lo, Hi});
616 } else {
617 auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
618 // SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
619 auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
620 B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
621 }
622
623 MI.eraseFromParent();
624 return true;
625}
626
627bool RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
628 Register DstReg = MI.getOperand(0).getReg();
629 LLT Ty = MRI.getType(DstReg);
630 bool Signed = isSignedBFE(MI);
631 unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
632 Register Src = MI.getOperand(FirstOpnd).getReg();
633 Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
634 Register Width = MI.getOperand(FirstOpnd + 2).getReg();
635 // For uniform bit field extract there are 4 available instructions, but
636 // LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
637 // field offset in low and size in high 16 bits.
638
639 // Src1 Hi16|Lo16 = Size|FieldOffset
640 auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
641 auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
642 auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
643 auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
644 unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
645 unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
646 unsigned Opc = Ty == S32 ? Opc32 : Opc64;
647
648 // Select machine instruction, because of reg class constraining, insert
649 // copies from reg class to reg bank.
650 auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}},
651 {B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
652 if (!constrainSelectedInstRegOperands(*S_BFE, *ST.getInstrInfo(),
653 *ST.getRegisterInfo(), RBI)) {
655 MF, MORE, "amdgpu-regbanklegalize",
656 "AMDGPU RegBankLegalize: lowerS_BFE, failed to constrain BFE", MI);
657 return false;
658 }
659
660 B.buildCopy(DstReg, S_BFE->getOperand(0).getReg());
661 MI.eraseFromParent();
662 return true;
663}
664
665bool RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
666 Register Dst = MI.getOperand(0).getReg();
667 LLT DstTy = MRI.getType(Dst);
668 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
669 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
670 auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
671 auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
672 unsigned Opc = MI.getOpcode();
673 auto Flags = MI.getFlags();
674 auto Lo =
675 B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, Flags);
676 auto Hi =
677 B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, Flags);
678 B.buildMergeLikeInstr(Dst, {Lo, Hi});
679 MI.eraseFromParent();
680 return true;
681}
682
683bool RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
684 Register Dst = MI.getOperand(0).getReg();
685 assert(MRI.getType(Dst) == V2S16);
686 unsigned Opc = MI.getOpcode();
687 auto Flags = MI.getFlags();
688
689 if (MI.getNumOperands() == 2) {
690 auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
691 auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo}, Flags);
692 auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi}, Flags);
693 B.buildMergeLikeInstr(Dst, {Lo, Hi});
694 MI.eraseFromParent();
695 return true;
696 }
697
698 assert(MI.getNumOperands() == 3);
699 auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
700 auto [Op2Lo, Op2Hi] = unpackAExtTruncS16(MI.getOperand(2).getReg());
701 auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags);
702 auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags);
703 B.buildMergeLikeInstr(Dst, {Lo, Hi});
704 MI.eraseFromParent();
705 return true;
706}
707
708bool RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
709 Register Dst = MI.getOperand(0).getReg();
710 LLT DstTy = MRI.getType(Dst);
711 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 ||
712 (DstTy.isPointer() && DstTy.getSizeInBits() == 64));
713 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
714 auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
715 auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg());
716 Register Cond = MI.getOperand(1).getReg();
717 auto Flags = MI.getFlags();
718 auto Lo =
719 B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags);
720 auto Hi =
721 B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags);
722
723 B.buildMergeLikeInstr(Dst, {Lo, Hi});
724 MI.eraseFromParent();
725 return true;
726}
727
728bool RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &MI) {
729 auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
730 int Amt = MI.getOperand(2).getImm();
731 Register Lo, Hi;
732 // Hi|Lo: s sign bit, ?/x bits changed/not changed by sign-extend
733 if (Amt <= 32) {
734 auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0));
735 if (Amt == 32) {
736 // Hi|Lo: ????????|sxxxxxxx -> ssssssss|sxxxxxxx
737 Lo = Freeze.getReg(0);
738 } else {
739 // Hi|Lo: ????????|???sxxxx -> ssssssss|ssssxxxx
740 Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
741 }
742
743 auto SignExtCst = B.buildConstant(SgprRB_S32, 31);
744 Hi = B.buildAShr(VgprRB_S32, Lo, SignExtCst).getReg(0);
745 } else {
746 // Hi|Lo: ?????sxx|xxxxxxxx -> ssssssxx|xxxxxxxx
747 Lo = Op1.getReg(0);
748 Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
749 }
750
751 B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
752 MI.eraseFromParent();
753 return true;
754}
755
756bool RegBankLegalizeHelper::lower(MachineInstr &MI,
757 const RegBankLLTMapping &Mapping,
758 SmallSet<Register, 4> &WaterfallSgprs) {
759
760 switch (Mapping.LoweringMethod) {
761 case DoNotLower:
762 break;
763 case VccExtToSel:
764 return lowerVccExtToSel(MI);
765 case UniExtToSel: {
766 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
767 auto True = B.buildConstant({SgprRB, Ty},
768 MI.getOpcode() == AMDGPU::G_SEXT ? -1 : 1);
769 auto False = B.buildConstant({SgprRB, Ty}, 0);
770 // Input to G_{Z|S}EXT is 'Legalizer legal' S1. Most common case is compare.
771 // We are making select here. S1 cond was already 'any-extended to S32' +
772 // 'AND with 1 to clean high bits' by Sgpr32AExtBoolInReg.
773 B.buildSelect(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), True,
774 False);
775 MI.eraseFromParent();
776 return true;
777 }
778 case UnpackBitShift:
779 return lowerUnpackBitShift(MI);
780 case UnpackMinMax:
781 return lowerUnpackMinMax(MI);
782 case ScalarizeToS16:
783 return lowerSplitTo16(MI);
784 case Ext32To64: {
785 const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
786 MachineInstrBuilder Hi;
787 switch (MI.getOpcode()) {
788 case AMDGPU::G_ZEXT: {
789 Hi = B.buildConstant({RB, S32}, 0);
790 break;
791 }
792 case AMDGPU::G_SEXT: {
793 // Replicate sign bit from 32-bit extended part.
794 auto ShiftAmt = B.buildConstant({RB, S32}, 31);
795 Hi = B.buildAShr({RB, S32}, MI.getOperand(1).getReg(), ShiftAmt);
796 break;
797 }
798 case AMDGPU::G_ANYEXT: {
799 Hi = B.buildUndef({RB, S32});
800 break;
801 }
802 default:
803 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
804 "AMDGPU RegBankLegalize: Ext32To64, unsuported opcode",
805 MI);
806 return false;
807 }
808
809 B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
810 {MI.getOperand(1).getReg(), Hi});
811 MI.eraseFromParent();
812 return true;
813 }
814 case UniCstExt: {
815 uint64_t ConstVal = MI.getOperand(1).getCImm()->getZExtValue();
816 B.buildConstant(MI.getOperand(0).getReg(), ConstVal);
817
818 MI.eraseFromParent();
819 return true;
820 }
821 case VgprToVccCopy: {
822 Register Src = MI.getOperand(1).getReg();
823 LLT Ty = MRI.getType(Src);
824 // Take lowest bit from each lane and put it in lane mask.
825 // Lowering via compare, but we need to clean high bits first as compare
826 // compares all bits in register.
827 Register BoolSrc = MRI.createVirtualRegister({VgprRB, Ty});
828 if (Ty == S64) {
829 auto Src64 = B.buildUnmerge(VgprRB_S32, Src);
830 auto One = B.buildConstant(VgprRB_S32, 1);
831 auto AndLo = B.buildAnd(VgprRB_S32, Src64.getReg(0), One);
832 auto Zero = B.buildConstant(VgprRB_S32, 0);
833 auto AndHi = B.buildAnd(VgprRB_S32, Src64.getReg(1), Zero);
834 B.buildMergeLikeInstr(BoolSrc, {AndLo, AndHi});
835 } else {
836 assert(Ty == S32 || Ty == S16);
837 auto One = B.buildConstant({VgprRB, Ty}, 1);
838 B.buildAnd(BoolSrc, Src, One);
839 }
840 auto Zero = B.buildConstant({VgprRB, Ty}, 0);
841 B.buildICmp(CmpInst::ICMP_NE, MI.getOperand(0).getReg(), BoolSrc, Zero);
842 MI.eraseFromParent();
843 return true;
844 }
845 case V_BFE:
846 return lowerV_BFE(MI);
847 case S_BFE:
848 return lowerS_BFE(MI);
849 case SplitTo32:
850 return lowerSplitTo32(MI);
851 case SplitTo32Select:
852 return lowerSplitTo32Select(MI);
854 return lowerSplitTo32SExtInReg(MI);
855 case SplitLoad: {
856 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
857 unsigned Size = DstTy.getSizeInBits();
858 // Even split to 128-bit loads
859 if (Size > 128) {
860 LLT B128;
861 if (DstTy.isVector()) {
862 LLT EltTy = DstTy.getElementType();
863 B128 = LLT::fixed_vector(128 / EltTy.getSizeInBits(), EltTy);
864 } else {
865 B128 = LLT::scalar(128);
866 }
867 if (Size / 128 == 2)
868 splitLoad(MI, {B128, B128});
869 else if (Size / 128 == 4)
870 splitLoad(MI, {B128, B128, B128, B128});
871 else {
872 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
873 "AMDGPU RegBankLegalize: SplitLoad, unsuported type",
874 MI);
875 return false;
876 }
877 }
878 // 64 and 32 bit load
879 else if (DstTy == S96)
880 splitLoad(MI, {S64, S32}, S32);
881 else if (DstTy == V3S32)
882 splitLoad(MI, {V2S32, S32}, S32);
883 else if (DstTy == V6S16)
884 splitLoad(MI, {V4S16, V2S16}, V2S16);
885 else {
886 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
887 "AMDGPU RegBankLegalize: SplitLoad, unsuported type",
888 MI);
889 return false;
890 }
891 return true;
892 }
893 case WidenLoad: {
894 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
895 if (DstTy == S96)
896 widenLoad(MI, S128);
897 else if (DstTy == V3S32)
898 widenLoad(MI, V4S32, S32);
899 else if (DstTy == V6S16)
900 widenLoad(MI, V8S16, V2S16);
901 else {
902 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
903 "AMDGPU RegBankLegalize: WidenLoad, unsuported type",
904 MI);
905 return false;
906 }
907 return true;
908 }
909 case UnpackAExt:
910 return lowerUnpackAExt(MI);
911 case WidenMMOToS32:
912 return widenMMOToS32(cast<GAnyLoad>(MI));
913 }
914
915 if (!WaterfallSgprs.empty()) {
916 MachineBasicBlock::iterator I = MI.getIterator();
917 if (!executeInWaterfallLoop(B, make_range(I, std::next(I)), WaterfallSgprs))
918 return false;
919 }
920 return true;
921}
922
923LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
924 switch (ID) {
925 case Vcc:
926 case UniInVcc:
927 return LLT::scalar(1);
928 case Sgpr16:
929 case Vgpr16:
930 case UniInVgprS16:
931 return LLT::scalar(16);
932 case Sgpr32:
933 case Sgpr32_WF:
934 case Sgpr32Trunc:
935 case Sgpr32AExt:
937 case Sgpr32SExt:
938 case Sgpr32ZExt:
939 case UniInVgprS32:
940 case Vgpr32:
941 case Vgpr32SExt:
942 case Vgpr32ZExt:
943 return LLT::scalar(32);
944 case Sgpr64:
945 case Vgpr64:
946 case UniInVgprS64:
947 return LLT::scalar(64);
948 case Sgpr128:
949 case Vgpr128:
950 return LLT::scalar(128);
951 case SgprP0:
952 case VgprP0:
953 return LLT::pointer(0, 64);
954 case SgprP1:
955 case VgprP1:
956 return LLT::pointer(1, 64);
957 case SgprP3:
958 case VgprP3:
959 return LLT::pointer(3, 32);
960 case SgprP4:
961 case VgprP4:
962 return LLT::pointer(4, 64);
963 case SgprP5:
964 case VgprP5:
965 return LLT::pointer(5, 32);
966 case SgprP8:
967 return LLT::pointer(8, 128);
968 case SgprV2S16:
969 case VgprV2S16:
970 case UniInVgprV2S16:
971 return LLT::fixed_vector(2, 16);
972 case SgprV2S32:
973 case VgprV2S32:
974 return LLT::fixed_vector(2, 32);
975 case SgprV4S32:
976 case SgprV4S32_WF:
977 case VgprV4S32:
978 case UniInVgprV4S32:
979 return LLT::fixed_vector(4, 32);
980 default:
981 return LLT();
982 }
983}
984
985LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
986 switch (ID) {
987 case SgprB32:
988 case VgprB32:
989 case UniInVgprB32:
990 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
991 isAnyPtr(Ty, 32))
992 return Ty;
993 return LLT();
994 case SgprPtr32:
995 case VgprPtr32:
996 return isAnyPtr(Ty, 32) ? Ty : LLT();
997 case SgprPtr64:
998 case VgprPtr64:
999 return isAnyPtr(Ty, 64) ? Ty : LLT();
1000 case SgprPtr128:
1001 case VgprPtr128:
1002 return isAnyPtr(Ty, 128) ? Ty : LLT();
1003 case SgprB64:
1004 case VgprB64:
1005 case UniInVgprB64:
1006 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
1007 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
1008 return Ty;
1009 return LLT();
1010 case SgprB96:
1011 case VgprB96:
1012 case UniInVgprB96:
1013 if (Ty == LLT::scalar(96) || Ty == LLT::fixed_vector(3, 32) ||
1014 Ty == LLT::fixed_vector(6, 16))
1015 return Ty;
1016 return LLT();
1017 case SgprB128:
1018 case VgprB128:
1019 case UniInVgprB128:
1020 if (Ty == LLT::scalar(128) || Ty == LLT::fixed_vector(4, 32) ||
1021 Ty == LLT::fixed_vector(2, 64) || isAnyPtr(Ty, 128))
1022 return Ty;
1023 return LLT();
1024 case SgprB256:
1025 case VgprB256:
1026 case UniInVgprB256:
1027 if (Ty == LLT::scalar(256) || Ty == LLT::fixed_vector(8, 32) ||
1028 Ty == LLT::fixed_vector(4, 64) || Ty == LLT::fixed_vector(16, 16))
1029 return Ty;
1030 return LLT();
1031 case SgprB512:
1032 case VgprB512:
1033 case UniInVgprB512:
1034 if (Ty == LLT::scalar(512) || Ty == LLT::fixed_vector(16, 32) ||
1035 Ty == LLT::fixed_vector(8, 64))
1036 return Ty;
1037 return LLT();
1038 default:
1039 return LLT();
1040 }
1041}
1042
1043const RegisterBank *
1044RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
1045 switch (ID) {
1046 case Vcc:
1047 return VccRB;
1048 case Sgpr16:
1049 case Sgpr32:
1050 case Sgpr32_WF:
1051 case Sgpr64:
1052 case Sgpr128:
1053 case SgprP0:
1054 case SgprP1:
1055 case SgprP3:
1056 case SgprP4:
1057 case SgprP5:
1058 case SgprP8:
1059 case SgprPtr32:
1060 case SgprPtr64:
1061 case SgprPtr128:
1062 case SgprV2S16:
1063 case SgprV2S32:
1064 case SgprV4S32:
1065 case SgprV4S32_WF:
1066 case SgprB32:
1067 case SgprB64:
1068 case SgprB96:
1069 case SgprB128:
1070 case SgprB256:
1071 case SgprB512:
1072 case UniInVcc:
1073 case UniInVgprS16:
1074 case UniInVgprS32:
1075 case UniInVgprS64:
1076 case UniInVgprV2S16:
1077 case UniInVgprV4S32:
1078 case UniInVgprB32:
1079 case UniInVgprB64:
1080 case UniInVgprB96:
1081 case UniInVgprB128:
1082 case UniInVgprB256:
1083 case UniInVgprB512:
1084 case Sgpr32Trunc:
1085 case Sgpr32AExt:
1087 case Sgpr32SExt:
1088 case Sgpr32ZExt:
1089 return SgprRB;
1090 case Vgpr16:
1091 case Vgpr32:
1092 case Vgpr64:
1093 case Vgpr128:
1094 case VgprP0:
1095 case VgprP1:
1096 case VgprP3:
1097 case VgprP4:
1098 case VgprP5:
1099 case VgprPtr32:
1100 case VgprPtr64:
1101 case VgprPtr128:
1102 case VgprV2S16:
1103 case VgprV2S32:
1104 case VgprV4S32:
1105 case VgprB32:
1106 case VgprB64:
1107 case VgprB96:
1108 case VgprB128:
1109 case VgprB256:
1110 case VgprB512:
1111 case Vgpr32SExt:
1112 case Vgpr32ZExt:
1113 return VgprRB;
1114 default:
1115 return nullptr;
1116 }
1117}
1118
1119bool RegBankLegalizeHelper::applyMappingDst(
1120 MachineInstr &MI, unsigned &OpIdx,
1121 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs) {
1122 // Defs start from operand 0
1123 for (; OpIdx < MethodIDs.size(); ++OpIdx) {
1124 if (MethodIDs[OpIdx] == None)
1125 continue;
1126 MachineOperand &Op = MI.getOperand(OpIdx);
1127 Register Reg = Op.getReg();
1128 LLT Ty = MRI.getType(Reg);
1129 [[maybe_unused]] const RegisterBank *RB = MRI.getRegBank(Reg);
1130
1131 switch (MethodIDs[OpIdx]) {
1132 // vcc, sgpr and vgpr scalars, pointers and vectors
1133 case Vcc:
1134 case Sgpr16:
1135 case Sgpr32:
1136 case Sgpr64:
1137 case Sgpr128:
1138 case SgprP0:
1139 case SgprP1:
1140 case SgprP3:
1141 case SgprP4:
1142 case SgprP5:
1143 case SgprP8:
1144 case SgprV2S16:
1145 case SgprV2S32:
1146 case SgprV4S32:
1147 case Vgpr16:
1148 case Vgpr32:
1149 case Vgpr64:
1150 case Vgpr128:
1151 case VgprP0:
1152 case VgprP1:
1153 case VgprP3:
1154 case VgprP4:
1155 case VgprP5:
1156 case VgprV2S16:
1157 case VgprV2S32:
1158 case VgprV4S32: {
1159 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1160 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
1161 break;
1162 }
1163 // sgpr and vgpr B-types
1164 case SgprB32:
1165 case SgprB64:
1166 case SgprB96:
1167 case SgprB128:
1168 case SgprB256:
1169 case SgprB512:
1170 case SgprPtr32:
1171 case SgprPtr64:
1172 case SgprPtr128:
1173 case VgprB32:
1174 case VgprB64:
1175 case VgprB96:
1176 case VgprB128:
1177 case VgprB256:
1178 case VgprB512:
1179 case VgprPtr32:
1180 case VgprPtr64:
1181 case VgprPtr128: {
1182 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
1183 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
1184 break;
1185 }
1186 // uniform in vcc/vgpr: scalars, vectors and B-types
1187 case UniInVcc: {
1188 assert(Ty == S1);
1189 assert(RB == SgprRB);
1190 Register NewDst = MRI.createVirtualRegister(VccRB_S1);
1191 Op.setReg(NewDst);
1192 auto CopyS32_Vcc =
1193 B.buildInstr(AMDGPU::G_AMDGPU_COPY_SCC_VCC, {SgprRB_S32}, {NewDst});
1194 B.buildTrunc(Reg, CopyS32_Vcc);
1195 break;
1196 }
1197 case UniInVgprS16: {
1198 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1199 assert(RB == SgprRB);
1200 Register NewVgprDstS16 = MRI.createVirtualRegister({VgprRB, S16});
1201 Register NewVgprDstS32 = MRI.createVirtualRegister({VgprRB, S32});
1202 Register NewSgprDstS32 = MRI.createVirtualRegister({SgprRB, S32});
1203 Op.setReg(NewVgprDstS16);
1204 B.buildAnyExt(NewVgprDstS32, NewVgprDstS16);
1205 buildReadAnyLane(B, NewSgprDstS32, NewVgprDstS32, RBI);
1206 B.buildTrunc(Reg, NewSgprDstS32);
1207 break;
1208 }
1209 case UniInVgprS32:
1210 case UniInVgprS64:
1211 case UniInVgprV2S16:
1212 case UniInVgprV4S32: {
1213 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1214 assert(RB == SgprRB);
1215 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
1216 Op.setReg(NewVgprDst);
1217 buildReadAnyLane(B, Reg, NewVgprDst, RBI);
1218 break;
1219 }
1220 case UniInVgprB32:
1221 case UniInVgprB64:
1222 case UniInVgprB96:
1223 case UniInVgprB128:
1224 case UniInVgprB256:
1225 case UniInVgprB512: {
1226 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
1227 assert(RB == SgprRB);
1228 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
1229 Op.setReg(NewVgprDst);
1230 AMDGPU::buildReadAnyLane(B, Reg, NewVgprDst, RBI);
1231 break;
1232 }
1233 // sgpr trunc
1234 case Sgpr32Trunc: {
1235 assert(Ty.getSizeInBits() < 32);
1236 assert(RB == SgprRB);
1237 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
1238 Op.setReg(NewDst);
1239 if (!MRI.use_empty(Reg))
1240 B.buildTrunc(Reg, NewDst);
1241 break;
1242 }
1243 case InvalidMapping: {
1245 MF, MORE, "amdgpu-regbanklegalize",
1246 "AMDGPU RegBankLegalize: missing fast rule ('Div' or 'Uni') for", MI);
1247 return false;
1248 }
1249 default:
1251 MF, MORE, "amdgpu-regbanklegalize",
1252 "AMDGPU RegBankLegalize: applyMappingDst, ID not supported", MI);
1253 return false;
1254 }
1255 }
1256
1257 return true;
1258}
1259
1260bool RegBankLegalizeHelper::applyMappingSrc(
1261 MachineInstr &MI, unsigned &OpIdx,
1262 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
1263 SmallSet<Register, 4> &SgprWaterfallOperandRegs) {
1264 for (unsigned i = 0; i < MethodIDs.size(); ++OpIdx, ++i) {
1265 if (MethodIDs[i] == None || MethodIDs[i] == IntrId || MethodIDs[i] == Imm)
1266 continue;
1267
1268 MachineOperand &Op = MI.getOperand(OpIdx);
1269 Register Reg = Op.getReg();
1270 LLT Ty = MRI.getType(Reg);
1271 const RegisterBank *RB = MRI.getRegBank(Reg);
1272
1273 switch (MethodIDs[i]) {
1274 case Vcc: {
1275 assert(Ty == S1);
1276 assert(RB == VccRB || RB == SgprRB);
1277 if (RB == SgprRB) {
1278 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1279 auto CopyVcc_Scc =
1280 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {VccRB_S1}, {Aext});
1281 Op.setReg(CopyVcc_Scc.getReg(0));
1282 }
1283 break;
1284 }
1285 // sgpr scalars, pointers and vectors
1286 case Sgpr16:
1287 case Sgpr32:
1288 case Sgpr64:
1289 case Sgpr128:
1290 case SgprP0:
1291 case SgprP1:
1292 case SgprP3:
1293 case SgprP4:
1294 case SgprP5:
1295 case SgprP8:
1296 case SgprV2S16:
1297 case SgprV2S32:
1298 case SgprV4S32: {
1299 assert(Ty == getTyFromID(MethodIDs[i]));
1300 assert(RB == getRegBankFromID(MethodIDs[i]));
1301 break;
1302 }
1303 // sgpr B-types
1304 case SgprB32:
1305 case SgprB64:
1306 case SgprB96:
1307 case SgprB128:
1308 case SgprB256:
1309 case SgprB512:
1310 case SgprPtr32:
1311 case SgprPtr64:
1312 case SgprPtr128: {
1313 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1314 assert(RB == getRegBankFromID(MethodIDs[i]));
1315 break;
1316 }
1317 // vgpr scalars, pointers and vectors
1318 case Vgpr16:
1319 case Vgpr32:
1320 case Vgpr64:
1321 case Vgpr128:
1322 case VgprP0:
1323 case VgprP1:
1324 case VgprP3:
1325 case VgprP4:
1326 case VgprP5:
1327 case VgprV2S16:
1328 case VgprV2S32:
1329 case VgprV4S32: {
1330 assert(Ty == getTyFromID(MethodIDs[i]));
1331 if (RB != VgprRB) {
1332 auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
1333 Op.setReg(CopyToVgpr.getReg(0));
1334 }
1335 break;
1336 }
1337 // vgpr B-types
1338 case VgprB32:
1339 case VgprB64:
1340 case VgprB96:
1341 case VgprB128:
1342 case VgprB256:
1343 case VgprB512:
1344 case VgprPtr32:
1345 case VgprPtr64:
1346 case VgprPtr128: {
1347 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1348 if (RB != VgprRB) {
1349 auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
1350 Op.setReg(CopyToVgpr.getReg(0));
1351 }
1352 break;
1353 }
1354 // sgpr waterfall, scalars and vectors
1355 case Sgpr32_WF:
1356 case SgprV4S32_WF: {
1357 assert(Ty == getTyFromID(MethodIDs[i]));
1358 if (RB != SgprRB)
1359 SgprWaterfallOperandRegs.insert(Reg);
1360 break;
1361 }
1362 // sgpr and vgpr scalars with extend
1363 case Sgpr32AExt: {
1364 // Note: this ext allows S1, and it is meant to be combined away.
1365 assert(Ty.getSizeInBits() < 32);
1366 assert(RB == SgprRB);
1367 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1368 Op.setReg(Aext.getReg(0));
1369 break;
1370 }
1371 case Sgpr32AExtBoolInReg: {
1372 // Note: this ext allows S1, and it is meant to be combined away.
1373 assert(Ty.getSizeInBits() == 1);
1374 assert(RB == SgprRB);
1375 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1376 // Zext SgprS1 is not legal, make AND with 1 instead. This instruction is
1377 // most of times meant to be combined away in AMDGPURegBankCombiner.
1378 auto Cst1 = B.buildConstant(SgprRB_S32, 1);
1379 auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1);
1380 Op.setReg(BoolInReg.getReg(0));
1381 break;
1382 }
1383 case Sgpr32SExt: {
1384 assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
1385 assert(RB == SgprRB);
1386 auto Sext = B.buildSExt(SgprRB_S32, Reg);
1387 Op.setReg(Sext.getReg(0));
1388 break;
1389 }
1390 case Sgpr32ZExt: {
1391 assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
1392 assert(RB == SgprRB);
1393 auto Zext = B.buildZExt({SgprRB, S32}, Reg);
1394 Op.setReg(Zext.getReg(0));
1395 break;
1396 }
1397 case Vgpr32SExt: {
1398 // Note this ext allows S1, and it is meant to be combined away.
1399 assert(Ty.getSizeInBits() < 32);
1400 assert(RB == VgprRB);
1401 auto Sext = B.buildSExt({VgprRB, S32}, Reg);
1402 Op.setReg(Sext.getReg(0));
1403 break;
1404 }
1405 case Vgpr32ZExt: {
1406 // Note this ext allows S1, and it is meant to be combined away.
1407 assert(Ty.getSizeInBits() < 32);
1408 assert(RB == VgprRB);
1409 auto Zext = B.buildZExt({VgprRB, S32}, Reg);
1410 Op.setReg(Zext.getReg(0));
1411 break;
1412 }
1413 default:
1415 MF, MORE, "amdgpu-regbanklegalize",
1416 "AMDGPU RegBankLegalize: applyMappingSrc, ID not supported", MI);
1417 return false;
1418 }
1419 }
1420 return true;
1421}
1422
1424 Register Dst = MI.getOperand(0).getReg();
1425 LLT Ty = MRI.getType(Dst);
1426
1427 if (Ty == LLT::scalar(1) && MUI.isUniform(Dst)) {
1428 B.setInsertPt(*MI.getParent(), MI.getParent()->getFirstNonPHI());
1429
1430 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
1431 MI.getOperand(0).setReg(NewDst);
1432 B.buildTrunc(Dst, NewDst);
1433
1434 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1435 Register UseReg = MI.getOperand(i).getReg();
1436
1437 auto DefMI = MRI.getVRegDef(UseReg)->getIterator();
1438 MachineBasicBlock *DefMBB = DefMI->getParent();
1439
1440 B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
1441
1442 auto NewUse = B.buildAnyExt(SgprRB_S32, UseReg);
1443 MI.getOperand(i).setReg(NewUse.getReg(0));
1444 }
1445
1446 return true;
1447 }
1448
1449 // ALL divergent i1 phis should have been lowered and inst-selected into PHI
1450 // with sgpr reg class and S1 LLT in AMDGPUGlobalISelDivergenceLowering pass.
1451 // Note: this includes divergent phis that don't require lowering.
1452 if (Ty == LLT::scalar(1) && MUI.isDivergent(Dst)) {
1453 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
1454 "AMDGPU RegBankLegalize: Can't lower divergent S1 G_PHI",
1455 MI);
1456 return false;
1457 }
1458
1459 // We accept all types that can fit in some register class.
1460 // Uniform G_PHIs have all sgpr registers.
1461 // Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr.
1462 if (Ty == LLT::scalar(32) || Ty == LLT::pointer(1, 64) ||
1463 Ty == LLT::pointer(4, 64)) {
1464 return true;
1465 }
1466
1467 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
1468 "AMDGPU RegBankLegalize: type not supported for G_PHI",
1469 MI);
1470 return false;
1471}
1472
1473[[maybe_unused]] static bool verifyRegBankOnOperands(MachineInstr &MI,
1474 const RegisterBank *RB,
1476 unsigned StartOpIdx,
1477 unsigned EndOpIdx) {
1478 for (unsigned i = StartOpIdx; i <= EndOpIdx; ++i) {
1479 if (MRI.getRegBankOrNull(MI.getOperand(i).getReg()) != RB)
1480 return false;
1481 }
1482 return true;
1483}
1484
1486 const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
1487 // Put RB on all registers
1488 unsigned NumDefs = MI.getNumDefs();
1489 unsigned NumOperands = MI.getNumOperands();
1490
1491 assert(verifyRegBankOnOperands(MI, RB, MRI, 0, NumDefs - 1));
1492 if (RB == SgprRB)
1493 assert(verifyRegBankOnOperands(MI, RB, MRI, NumDefs, NumOperands - 1));
1494
1495 if (RB == VgprRB) {
1496 B.setInstr(MI);
1497 for (unsigned i = NumDefs; i < NumOperands; ++i) {
1498 Register Reg = MI.getOperand(i).getReg();
1499 if (MRI.getRegBank(Reg) != RB) {
1500 auto Copy = B.buildCopy({VgprRB, MRI.getType(Reg)}, Reg);
1501 MI.getOperand(i).setReg(Copy.getReg(0));
1502 }
1503 }
1504 }
1505}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Provides AMDGPU specific target descriptions.
static bool isSignedBFE(MachineInstr &MI)
static bool verifyRegBankOnOperands(MachineInstr &MI, const RegisterBank *RB, MachineRegisterInfo &MRI, unsigned StartOpIdx, unsigned EndOpIdx)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
static Register UseReg(const MachineOperand &MO)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI, const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
@ ICMP_NE
not equal
Definition InstrTypes.h:698
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
const SIRegisterInfo * getRegisterInfo() const override
Represents a call to an intrinsic.
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
TypeSize getValue() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
Representation of each machine instruction.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void push_back(const T &Elt)
A range adaptor for a pair of iterators.
bool isAnyPtr(LLT Ty, unsigned Width)
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
@ Offset
Definition DWP.cpp:532
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI void reportGISelFailure(MachineFunction &MF, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R)
Report an ISel error as a missed optimization remark to the LLVMContext's diagnostic stream.
Definition Utils.cpp:259
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping