LLVM 20.0.0git
SIShrinkInstructions.cpp
Go to the documentation of this file.
1//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// The pass tries to use the 32-bit encoding for instructions when possible.
8//===----------------------------------------------------------------------===//
9//
10
12#include "AMDGPU.h"
13#include "GCNSubtarget.h"
16#include "llvm/ADT/Statistic.h"
18
19#define DEBUG_TYPE "si-shrink-instructions"
20
21STATISTIC(NumInstructionsShrunk,
22 "Number of 64-bit instruction reduced to 32-bit.");
23STATISTIC(NumLiteralConstantsFolded,
24 "Number of literal constants folded into 32-bit instructions.");
25
26using namespace llvm;
27
28namespace {
29
30class SIShrinkInstructions {
33 const GCNSubtarget *ST;
34 const SIInstrInfo *TII;
35 const SIRegisterInfo *TRI;
36
37 bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
38 bool shouldShrinkTrue16(MachineInstr &MI) const;
39 bool isKImmOperand(const MachineOperand &Src) const;
40 bool isKUImmOperand(const MachineOperand &Src) const;
41 bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
42 void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
43 void shrinkScalarCompare(MachineInstr &MI) const;
44 void shrinkMIMG(MachineInstr &MI) const;
45 void shrinkMadFma(MachineInstr &MI) const;
46 bool shrinkScalarLogicOp(MachineInstr &MI) const;
47 bool tryReplaceDeadSDST(MachineInstr &MI) const;
49 Register Reg, unsigned SubReg) const;
50 bool instReadsReg(const MachineInstr *MI, unsigned Reg,
51 unsigned SubReg) const;
52 bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
53 unsigned SubReg) const;
54 TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
55 unsigned I) const;
56 void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
57 MachineInstr *matchSwap(MachineInstr &MovT) const;
58
59public:
60 SIShrinkInstructions() = default;
61 bool run(MachineFunction &MF);
62};
63
64class SIShrinkInstructionsLegacy : public MachineFunctionPass {
65
66public:
67 static char ID;
68
69 SIShrinkInstructionsLegacy() : MachineFunctionPass(ID) {}
70
71 bool runOnMachineFunction(MachineFunction &MF) override;
72
73 StringRef getPassName() const override { return "SI Shrink Instructions"; }
74
75 void getAnalysisUsage(AnalysisUsage &AU) const override {
76 AU.setPreservesCFG();
78 }
79};
80
81} // End anonymous namespace.
82
83INITIALIZE_PASS(SIShrinkInstructionsLegacy, DEBUG_TYPE,
84 "SI Shrink Instructions", false, false)
85
86char SIShrinkInstructionsLegacy::ID = 0;
87
89 return new SIShrinkInstructionsLegacy();
90}
91
92/// This function checks \p MI for operands defined by a move immediate
93/// instruction and then folds the literal constant into the instruction if it
94/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
95bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
96 bool TryToCommute) const {
97 assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
98
99 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
100
101 // Try to fold Src0
102 MachineOperand &Src0 = MI.getOperand(Src0Idx);
103 if (Src0.isReg()) {
104 Register Reg = Src0.getReg();
105 if (Reg.isVirtual()) {
106 MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
107 if (Def && Def->isMoveImmediate()) {
108 MachineOperand &MovSrc = Def->getOperand(1);
109 bool ConstantFolded = false;
110
111 if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
112 if (MovSrc.isImm()) {
113 Src0.ChangeToImmediate(MovSrc.getImm());
114 ConstantFolded = true;
115 } else if (MovSrc.isFI()) {
116 Src0.ChangeToFrameIndex(MovSrc.getIndex());
117 ConstantFolded = true;
118 } else if (MovSrc.isGlobal()) {
119 Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
120 MovSrc.getTargetFlags());
121 ConstantFolded = true;
122 }
123 }
124
125 if (ConstantFolded) {
126 if (MRI->use_nodbg_empty(Reg))
127 Def->eraseFromParent();
128 ++NumLiteralConstantsFolded;
129 return true;
130 }
131 }
132 }
133 }
134
135 // We have failed to fold src0, so commute the instruction and try again.
136 if (TryToCommute && MI.isCommutable()) {
137 if (TII->commuteInstruction(MI)) {
138 if (foldImmediates(MI, false))
139 return true;
140
141 // Commute back.
142 TII->commuteInstruction(MI);
143 }
144 }
145
146 return false;
147}
148
149/// Do not shrink the instruction if its registers are not expressible in the
150/// shrunk encoding.
151bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
152 for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
153 const MachineOperand &MO = MI.getOperand(I);
154 if (MO.isReg()) {
155 Register Reg = MO.getReg();
156 assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
157 "True16 Instructions post-RA");
158 if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
159 !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
160 return false;
161
162 if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
163 !AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
164 return false;
165 }
166 }
167 return true;
168}
169
170bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
171 return isInt<16>(SignExtend64(Src.getImm(), 32)) &&
172 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
173}
174
175bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
176 return isUInt<16>(Src.getImm()) &&
177 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
178}
179
180bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
181 bool &IsUnsigned) const {
182 if (isInt<16>(SignExtend64(Src.getImm(), 32))) {
183 IsUnsigned = false;
184 return !TII->isInlineConstant(Src);
185 }
186
187 if (isUInt<16>(Src.getImm())) {
188 IsUnsigned = true;
189 return !TII->isInlineConstant(Src);
190 }
191
192 return false;
193}
194
195/// \returns the opcode of an instruction a move immediate of the constant \p
196/// Src can be replaced with if the constant is replaced with \p ModifiedImm.
197/// i.e.
198///
199/// If the bitreverse of a constant is an inline immediate, reverse the
200/// immediate and return the bitreverse opcode.
201///
202/// If the bitwise negation of a constant is an inline immediate, reverse the
203/// immediate and return the bitwise not opcode.
205 const MachineOperand &Src,
206 int32_t &ModifiedImm, bool Scalar) {
207 if (TII->isInlineConstant(Src))
208 return 0;
209 int32_t SrcImm = static_cast<int32_t>(Src.getImm());
210
211 if (!Scalar) {
212 // We could handle the scalar case with here, but we would need to check
213 // that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth
214 // it, as the reasonable values are already covered by s_movk_i32.
215 ModifiedImm = ~SrcImm;
216 if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))
217 return AMDGPU::V_NOT_B32_e32;
218 }
219
220 ModifiedImm = reverseBits<int32_t>(SrcImm);
221 if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))
222 return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;
223
224 return 0;
225}
226
227/// Copy implicit register operands from specified instruction to this
228/// instruction that are not part of the instruction definition.
229void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
230 MachineInstr &MI) const {
231 MachineFunction &MF = *MI.getMF();
232 for (unsigned i = MI.getDesc().getNumOperands() +
233 MI.getDesc().implicit_uses().size() +
234 MI.getDesc().implicit_defs().size(),
235 e = MI.getNumOperands();
236 i != e; ++i) {
237 const MachineOperand &MO = MI.getOperand(i);
238 if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
239 NewMI.addOperand(MF, MO);
240 }
241}
242
243void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
244 if (!ST->hasSCmpK())
245 return;
246
247 // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
248 // get constants on the RHS.
249 if (!MI.getOperand(0).isReg())
250 TII->commuteInstruction(MI, false, 0, 1);
251
252 // cmpk requires src0 to be a register
253 const MachineOperand &Src0 = MI.getOperand(0);
254 if (!Src0.isReg())
255 return;
256
257 MachineOperand &Src1 = MI.getOperand(1);
258 if (!Src1.isImm())
259 return;
260
261 int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
262 if (SOPKOpc == -1)
263 return;
264
265 // eq/ne is special because the imm16 can be treated as signed or unsigned,
266 // and initially selected to the unsigned versions.
267 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
268 bool HasUImm;
269 if (isKImmOrKUImmOperand(Src1, HasUImm)) {
270 if (!HasUImm) {
271 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
272 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
273 Src1.setImm(SignExtend32(Src1.getImm(), 32));
274 }
275
276 MI.setDesc(TII->get(SOPKOpc));
277 }
278
279 return;
280 }
281
282 const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
283
284 if ((SIInstrInfo::sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
285 (!SIInstrInfo::sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
286 if (!SIInstrInfo::sopkIsZext(SOPKOpc))
287 Src1.setImm(SignExtend64(Src1.getImm(), 32));
288 MI.setDesc(NewDesc);
289 }
290}
291
292// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
293void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
294 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
295 if (!Info)
296 return;
297
298 uint8_t NewEncoding;
299 switch (Info->MIMGEncoding) {
300 case AMDGPU::MIMGEncGfx10NSA:
301 NewEncoding = AMDGPU::MIMGEncGfx10Default;
302 break;
303 case AMDGPU::MIMGEncGfx11NSA:
304 NewEncoding = AMDGPU::MIMGEncGfx11Default;
305 break;
306 default:
307 return;
308 }
309
310 int VAddr0Idx =
311 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
312 unsigned NewAddrDwords = Info->VAddrDwords;
313 const TargetRegisterClass *RC;
314
315 if (Info->VAddrDwords == 2) {
316 RC = &AMDGPU::VReg_64RegClass;
317 } else if (Info->VAddrDwords == 3) {
318 RC = &AMDGPU::VReg_96RegClass;
319 } else if (Info->VAddrDwords == 4) {
320 RC = &AMDGPU::VReg_128RegClass;
321 } else if (Info->VAddrDwords == 5) {
322 RC = &AMDGPU::VReg_160RegClass;
323 } else if (Info->VAddrDwords == 6) {
324 RC = &AMDGPU::VReg_192RegClass;
325 } else if (Info->VAddrDwords == 7) {
326 RC = &AMDGPU::VReg_224RegClass;
327 } else if (Info->VAddrDwords == 8) {
328 RC = &AMDGPU::VReg_256RegClass;
329 } else if (Info->VAddrDwords == 9) {
330 RC = &AMDGPU::VReg_288RegClass;
331 } else if (Info->VAddrDwords == 10) {
332 RC = &AMDGPU::VReg_320RegClass;
333 } else if (Info->VAddrDwords == 11) {
334 RC = &AMDGPU::VReg_352RegClass;
335 } else if (Info->VAddrDwords == 12) {
336 RC = &AMDGPU::VReg_384RegClass;
337 } else {
338 RC = &AMDGPU::VReg_512RegClass;
339 NewAddrDwords = 16;
340 }
341
342 unsigned VgprBase = 0;
343 unsigned NextVgpr = 0;
344 bool IsUndef = true;
345 bool IsKill = NewAddrDwords == Info->VAddrDwords;
346 const unsigned NSAMaxSize = ST->getNSAMaxSize();
347 const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
348 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;
349 for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {
350 const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
351 unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
352 unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
353 assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");
354
355 if (Idx == 0) {
356 VgprBase = Vgpr;
357 NextVgpr = Vgpr + Dwords;
358 } else if (Vgpr == NextVgpr) {
359 NextVgpr = Vgpr + Dwords;
360 } else {
361 return;
362 }
363
364 if (!Op.isUndef())
365 IsUndef = false;
366 if (!Op.isKill())
367 IsKill = false;
368 }
369
370 if (VgprBase + NewAddrDwords > 256)
371 return;
372
373 // Further check for implicit tied operands - this may be present if TFE is
374 // enabled
375 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
376 int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
377 unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
378 unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
379 int ToUntie = -1;
380 if (TFEVal || LWEVal) {
381 // TFE/LWE is enabled so we need to deal with an implicit tied operand
382 for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
383 if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
384 MI.getOperand(i).isImplicit()) {
385 // This is the tied operand
386 assert(
387 ToUntie == -1 &&
388 "found more than one tied implicit operand when expecting only 1");
389 ToUntie = i;
390 MI.untieRegOperand(ToUntie);
391 }
392 }
393 }
394
395 unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,
396 Info->VDataDwords, NewAddrDwords);
397 MI.setDesc(TII->get(NewOpcode));
398 MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
399 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
400 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
401
402 for (unsigned i = 1; i < EndVAddr; ++i)
403 MI.removeOperand(VAddr0Idx + 1);
404
405 if (ToUntie >= 0) {
406 MI.tieOperands(
407 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
408 ToUntie - (EndVAddr - 1));
409 }
410}
411
412// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
413void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
414 // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
415 // there is no reason to try to shrink them.
416 if (!ST->hasVOP3Literal())
417 return;
418
419 // There is no advantage to doing this pre-RA.
420 if (!MF->getProperties().hasProperty(
421 MachineFunctionProperties::Property::NoVRegs))
422 return;
423
424 if (TII->hasAnyModifiersSet(MI))
425 return;
426
427 const unsigned Opcode = MI.getOpcode();
428 MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
429 MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);
430 MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
431 unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
432
433 bool Swap;
434
435 // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.
436 if (Src2.isImm() && !TII->isInlineConstant(Src2)) {
437 if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))
438 Swap = false;
439 else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
440 Swap = true;
441 else
442 return;
443
444 switch (Opcode) {
445 default:
446 llvm_unreachable("Unexpected mad/fma opcode!");
447 case AMDGPU::V_MAD_F32_e64:
448 NewOpcode = AMDGPU::V_MADAK_F32;
449 break;
450 case AMDGPU::V_FMA_F32_e64:
451 NewOpcode = AMDGPU::V_FMAAK_F32;
452 break;
453 case AMDGPU::V_MAD_F16_e64:
454 NewOpcode = AMDGPU::V_MADAK_F16;
455 break;
456 case AMDGPU::V_FMA_F16_e64:
457 case AMDGPU::V_FMA_F16_gfx9_e64:
458 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
459 : AMDGPU::V_FMAAK_F16;
460 break;
461 }
462 }
463
464 // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.
465 if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {
466 if (Src1.isImm() && !TII->isInlineConstant(Src1))
467 Swap = false;
468 else if (Src0.isImm() && !TII->isInlineConstant(Src0))
469 Swap = true;
470 else
471 return;
472
473 switch (Opcode) {
474 default:
475 llvm_unreachable("Unexpected mad/fma opcode!");
476 case AMDGPU::V_MAD_F32_e64:
477 NewOpcode = AMDGPU::V_MADMK_F32;
478 break;
479 case AMDGPU::V_FMA_F32_e64:
480 NewOpcode = AMDGPU::V_FMAMK_F32;
481 break;
482 case AMDGPU::V_MAD_F16_e64:
483 NewOpcode = AMDGPU::V_MADMK_F16;
484 break;
485 case AMDGPU::V_FMA_F16_e64:
486 case AMDGPU::V_FMA_F16_gfx9_e64:
487 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
488 : AMDGPU::V_FMAMK_F16;
489 break;
490 }
491 }
492
493 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
494 return;
495
496 if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
497 return;
498
499 if (Swap) {
500 // Swap Src0 and Src1 by building a new instruction.
501 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),
502 MI.getOperand(0).getReg())
503 .add(Src1)
504 .add(Src0)
505 .add(Src2)
506 .setMIFlags(MI.getFlags());
507 MI.eraseFromParent();
508 } else {
509 TII->removeModOperands(MI);
510 MI.setDesc(TII->get(NewOpcode));
511 }
512}
513
514/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
515/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
516/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
517/// XNOR (as a ^ b == ~(a ^ ~b)).
518/// \returns true if the caller should continue the machine function iterator
519bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
520 unsigned Opc = MI.getOpcode();
521 const MachineOperand *Dest = &MI.getOperand(0);
522 MachineOperand *Src0 = &MI.getOperand(1);
523 MachineOperand *Src1 = &MI.getOperand(2);
524 MachineOperand *SrcReg = Src0;
525 MachineOperand *SrcImm = Src1;
526
527 if (!SrcImm->isImm() ||
528 AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
529 return false;
530
531 uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
532 uint32_t NewImm = 0;
533
534 if (Opc == AMDGPU::S_AND_B32) {
535 if (isPowerOf2_32(~Imm)) {
536 NewImm = llvm::countr_one(Imm);
537 Opc = AMDGPU::S_BITSET0_B32;
538 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
539 NewImm = ~Imm;
540 Opc = AMDGPU::S_ANDN2_B32;
541 }
542 } else if (Opc == AMDGPU::S_OR_B32) {
543 if (isPowerOf2_32(Imm)) {
544 NewImm = llvm::countr_zero(Imm);
545 Opc = AMDGPU::S_BITSET1_B32;
546 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
547 NewImm = ~Imm;
548 Opc = AMDGPU::S_ORN2_B32;
549 }
550 } else if (Opc == AMDGPU::S_XOR_B32) {
551 if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
552 NewImm = ~Imm;
553 Opc = AMDGPU::S_XNOR_B32;
554 }
555 } else {
556 llvm_unreachable("unexpected opcode");
557 }
558
559 if (NewImm != 0) {
560 if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
561 MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
562 MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
563 return true;
564 }
565
566 if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
567 const bool IsUndef = SrcReg->isUndef();
568 const bool IsKill = SrcReg->isKill();
569 MI.setDesc(TII->get(Opc));
570 if (Opc == AMDGPU::S_BITSET0_B32 ||
571 Opc == AMDGPU::S_BITSET1_B32) {
572 Src0->ChangeToImmediate(NewImm);
573 // Remove the immediate and add the tied input.
574 MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,
575 /*isImp*/ false, IsKill,
576 /*isDead*/ false, IsUndef);
577 MI.tieOperands(0, 2);
578 } else {
579 SrcImm->setImm(NewImm);
580 }
581 }
582 }
583
584 return false;
585}
586
587// This is the same as MachineInstr::readsRegister/modifiesRegister except
588// it takes subregs into account.
589bool SIShrinkInstructions::instAccessReg(
591 unsigned SubReg) const {
592 for (const MachineOperand &MO : R) {
593 if (!MO.isReg())
594 continue;
595
596 if (Reg.isPhysical() && MO.getReg().isPhysical()) {
597 if (TRI->regsOverlap(Reg, MO.getReg()))
598 return true;
599 } else if (MO.getReg() == Reg && Reg.isVirtual()) {
600 LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &
601 TRI->getSubRegIndexLaneMask(MO.getSubReg());
602 if (Overlap.any())
603 return true;
604 }
605 }
606 return false;
607}
608
609bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,
610 unsigned SubReg) const {
611 return instAccessReg(MI->uses(), Reg, SubReg);
612}
613
614bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,
615 unsigned SubReg) const {
616 return instAccessReg(MI->defs(), Reg, SubReg);
617}
618
620SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
621 unsigned I) const {
622 if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {
623 if (Reg.isPhysical()) {
624 Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));
625 } else {
626 Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));
627 }
628 }
629 return TargetInstrInfo::RegSubRegPair(Reg, Sub);
630}
631
632void SIShrinkInstructions::dropInstructionKeepingImpDefs(
633 MachineInstr &MI) const {
634 for (unsigned i = MI.getDesc().getNumOperands() +
635 MI.getDesc().implicit_uses().size() +
636 MI.getDesc().implicit_defs().size(),
637 e = MI.getNumOperands();
638 i != e; ++i) {
639 const MachineOperand &Op = MI.getOperand(i);
640 if (!Op.isDef())
641 continue;
642 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
643 TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
644 }
645
646 MI.eraseFromParent();
647}
648
649// Match:
650// mov t, x
651// mov x, y
652// mov y, t
653//
654// =>
655//
656// mov t, x (t is potentially dead and move eliminated)
657// v_swap_b32 x, y
658//
659// Returns next valid instruction pointer if was able to create v_swap_b32.
660//
661// This shall not be done too early not to prevent possible folding which may
662// remove matched moves, and this should preferably be done before RA to
663// release saved registers and also possibly after RA which can insert copies
664// too.
665//
666// This is really just a generic peephole that is not a canonical shrinking,
667// although requirements match the pass placement and it reduces code size too.
668MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
669 assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
670 MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
671 MovT.getOpcode() == AMDGPU::COPY);
672
673 Register T = MovT.getOperand(0).getReg();
674 unsigned Tsub = MovT.getOperand(0).getSubReg();
675 MachineOperand &Xop = MovT.getOperand(1);
676
677 if (!Xop.isReg())
678 return nullptr;
679 Register X = Xop.getReg();
680 unsigned Xsub = Xop.getSubReg();
681
682 unsigned Size = TII->getOpSize(MovT, 0);
683
684 // We can't match v_swap_b16 pre-RA, because VGPR_16_Lo128 registers
685 // are not allocatble.
686 if (Size == 2 && X.isVirtual())
687 return nullptr;
688
689 if (!TRI->isVGPR(*MRI, X))
690 return nullptr;
691
692 const unsigned SearchLimit = 16;
693 unsigned Count = 0;
694 bool KilledT = false;
695 for (auto Iter = std::next(MovT.getIterator()),
696 E = MovT.getParent()->instr_end();
697 Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
698
699 MachineInstr *MovY = &*Iter;
700 KilledT = MovY->killsRegister(T, TRI);
701
702 if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
703 MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
704 MovY->getOpcode() != AMDGPU::COPY) ||
705 !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T ||
706 MovY->getOperand(1).getSubReg() != Tsub)
707 continue;
708
709 Register Y = MovY->getOperand(0).getReg();
710 unsigned Ysub = MovY->getOperand(0).getSubReg();
711
712 if (!TRI->isVGPR(*MRI, Y))
713 continue;
714
715 MachineInstr *MovX = nullptr;
716 for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
717 I != IY; ++I) {
718 if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||
719 instModifiesReg(&*I, T, Tsub) ||
720 (MovX && instModifiesReg(&*I, X, Xsub))) {
721 MovX = nullptr;
722 break;
723 }
724 if (!instReadsReg(&*I, Y, Ysub)) {
725 if (!MovX && instModifiesReg(&*I, X, Xsub)) {
726 MovX = nullptr;
727 break;
728 }
729 continue;
730 }
731 if (MovX ||
732 (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
733 I->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
734 I->getOpcode() != AMDGPU::COPY) ||
735 I->getOperand(0).getReg() != X ||
736 I->getOperand(0).getSubReg() != Xsub) {
737 MovX = nullptr;
738 break;
739 }
740
741 if (Size > 4 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
742 continue;
743
744 MovX = &*I;
745 }
746
747 if (!MovX)
748 continue;
749
750 LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);
751
754 if (Size == 2) {
755 auto *MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
756 TII->get(AMDGPU::V_SWAP_B16))
757 .addDef(X)
758 .addDef(Y)
759 .addReg(Y)
760 .addReg(X)
761 .getInstr();
762 Swaps.push_back(MIB);
763 } else {
764 assert(Size > 0 && Size % 4 == 0);
765 for (unsigned I = 0; I < Size / 4; ++I) {
767 X1 = getSubRegForIndex(X, Xsub, I);
768 Y1 = getSubRegForIndex(Y, Ysub, I);
769 auto *MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
770 TII->get(AMDGPU::V_SWAP_B32))
771 .addDef(X1.Reg, 0, X1.SubReg)
772 .addDef(Y1.Reg, 0, Y1.SubReg)
773 .addReg(Y1.Reg, 0, Y1.SubReg)
774 .addReg(X1.Reg, 0, X1.SubReg)
775 .getInstr();
776 Swaps.push_back(MIB);
777 }
778 }
779 // Drop implicit EXEC.
780 if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
781 for (MachineInstr *Swap : Swaps) {
782 Swap->removeOperand(Swap->getNumExplicitOperands());
783 Swap->copyImplicitOps(*MBB.getParent(), *MovX);
784 }
785 }
786 MovX->eraseFromParent();
787 dropInstructionKeepingImpDefs(*MovY);
788 MachineInstr *Next = &*std::next(MovT.getIterator());
789
790 if (T.isVirtual() && MRI->use_nodbg_empty(T)) {
791 dropInstructionKeepingImpDefs(MovT);
792 } else {
793 Xop.setIsKill(false);
794 for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
795 unsigned OpNo = MovT.getNumExplicitOperands() + I;
796 const MachineOperand &Op = MovT.getOperand(OpNo);
797 if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))
798 MovT.removeOperand(OpNo);
799 }
800 }
801
802 return Next;
803 }
804
805 return nullptr;
806}
807
808// If an instruction has dead sdst replace it with NULL register on gfx1030+
809bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
810 if (!ST->hasGFX10_3Insts())
811 return false;
812
813 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
814 if (!Op)
815 return false;
816 Register SDstReg = Op->getReg();
817 if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
818 return false;
819
820 Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
821 return true;
822}
823
824bool SIShrinkInstructions::run(MachineFunction &MF) {
825
826 this->MF = &MF;
827 MRI = &MF.getRegInfo();
829 TII = ST->getInstrInfo();
830 TRI = &TII->getRegisterInfo();
831
832 unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
833
834 std::vector<unsigned> I1Defs;
835
836 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
837 BI != BE; ++BI) {
838
839 MachineBasicBlock &MBB = *BI;
841 for (I = MBB.begin(); I != MBB.end(); I = Next) {
842 Next = std::next(I);
843 MachineInstr &MI = *I;
844
845 if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
846 // If this has a literal constant source that is the same as the
847 // reversed bits of an inline immediate, replace with a bitreverse of
848 // that constant. This saves 4 bytes in the common case of materializing
849 // sign bits.
850
851 // Test if we are after regalloc. We only want to do this after any
852 // optimizations happen because this will confuse them.
853 // XXX - not exactly a check for post-regalloc run.
854 MachineOperand &Src = MI.getOperand(1);
855 if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
856 int32_t ModImm;
857 unsigned ModOpcode =
858 canModifyToInlineImmOp32(TII, Src, ModImm, /*Scalar=*/false);
859 if (ModOpcode != 0) {
860 MI.setDesc(TII->get(ModOpcode));
861 Src.setImm(static_cast<int64_t>(ModImm));
862 continue;
863 }
864 }
865 }
866
867 if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
868 MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
869 MI.getOpcode() == AMDGPU::COPY)) {
870 if (auto *NextMI = matchSwap(MI)) {
871 Next = NextMI->getIterator();
872 continue;
873 }
874 }
875
876 // Try to use S_ADDK_I32 and S_MULK_I32.
877 if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
878 MI.getOpcode() == AMDGPU::S_MUL_I32) {
879 const MachineOperand *Dest = &MI.getOperand(0);
880 MachineOperand *Src0 = &MI.getOperand(1);
881 MachineOperand *Src1 = &MI.getOperand(2);
882
883 if (!Src0->isReg() && Src1->isReg()) {
884 if (TII->commuteInstruction(MI, false, 1, 2))
885 std::swap(Src0, Src1);
886 }
887
888 // FIXME: This could work better if hints worked with subregisters. If
889 // we have a vector add of a constant, we usually don't get the correct
890 // allocation due to the subregister usage.
891 if (Dest->getReg().isVirtual() && Src0->isReg()) {
892 MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
893 MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
894 continue;
895 }
896
897 if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
898 if (Src1->isImm() && isKImmOperand(*Src1)) {
899 unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
900 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
901
902 Src1->setImm(SignExtend64(Src1->getImm(), 32));
903 MI.setDesc(TII->get(Opc));
904 MI.tieOperands(0, 1);
905 }
906 }
907 }
908
909 // Try to use s_cmpk_*
910 if (MI.isCompare() && TII->isSOPC(MI)) {
911 shrinkScalarCompare(MI);
912 continue;
913 }
914
915 // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
916 if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
917 const MachineOperand &Dst = MI.getOperand(0);
918 MachineOperand &Src = MI.getOperand(1);
919
920 if (Src.isImm() && Dst.getReg().isPhysical()) {
921 unsigned ModOpc;
922 int32_t ModImm;
923 if (isKImmOperand(Src)) {
924 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
925 Src.setImm(SignExtend64(Src.getImm(), 32));
926 } else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
927 /*Scalar=*/true))) {
928 MI.setDesc(TII->get(ModOpc));
929 Src.setImm(static_cast<int64_t>(ModImm));
930 }
931 }
932
933 continue;
934 }
935
936 // Shrink scalar logic operations.
937 if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
938 MI.getOpcode() == AMDGPU::S_OR_B32 ||
939 MI.getOpcode() == AMDGPU::S_XOR_B32) {
940 if (shrinkScalarLogicOp(MI))
941 continue;
942 }
943
944 if (TII->isMIMG(MI.getOpcode()) &&
945 ST->getGeneration() >= AMDGPUSubtarget::GFX10 &&
947 MachineFunctionProperties::Property::NoVRegs)) {
948 shrinkMIMG(MI);
949 continue;
950 }
951
952 if (!TII->isVOP3(MI))
953 continue;
954
955 if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
956 MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
957 MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
958 MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
959 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
960 shrinkMadFma(MI);
961 continue;
962 }
963
964 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
965 // If there is no chance we will shrink it and use VCC as sdst to get
966 // a 32 bit form try to replace dead sdst with NULL.
967 tryReplaceDeadSDST(MI);
968 continue;
969 }
970
971 if (!TII->canShrink(MI, *MRI)) {
972 // Try commuting the instruction and see if that enables us to shrink
973 // it.
974 if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
975 !TII->canShrink(MI, *MRI)) {
976 tryReplaceDeadSDST(MI);
977 continue;
978 }
979 }
980
981 int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
982
983 if (TII->isVOPC(Op32)) {
984 MachineOperand &Op0 = MI.getOperand(0);
985 if (Op0.isReg()) {
986 // Exclude VOPCX instructions as these don't explicitly write a
987 // dst.
988 Register DstReg = Op0.getReg();
989 if (DstReg.isVirtual()) {
990 // VOPC instructions can only write to the VCC register. We can't
991 // force them to use VCC here, because this is only one register and
992 // cannot deal with sequences which would require multiple copies of
993 // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
994 //
995 // So, instead of forcing the instruction to write to VCC, we
996 // provide a hint to the register allocator to use VCC and then we
997 // will run this pass again after RA and shrink it if it outputs to
998 // VCC.
999 MRI->setRegAllocationHint(DstReg, 0, VCCReg);
1000 continue;
1001 }
1002 if (DstReg != VCCReg)
1003 continue;
1004 }
1005 }
1006
1007 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
1008 // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
1009 // instructions.
1010 const MachineOperand *Src2 =
1011 TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1012 if (!Src2->isReg())
1013 continue;
1014 Register SReg = Src2->getReg();
1015 if (SReg.isVirtual()) {
1016 MRI->setRegAllocationHint(SReg, 0, VCCReg);
1017 continue;
1018 }
1019 if (SReg != VCCReg)
1020 continue;
1021 }
1022
1023 // Check for the bool flag output for instructions like V_ADD_I32_e64.
1024 const MachineOperand *SDst = TII->getNamedOperand(MI,
1025 AMDGPU::OpName::sdst);
1026
1027 if (SDst) {
1028 bool Next = false;
1029
1030 if (SDst->getReg() != VCCReg) {
1031 if (SDst->getReg().isVirtual())
1032 MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);
1033 Next = true;
1034 }
1035
1036 // All of the instructions with carry outs also have an SGPR input in
1037 // src2.
1038 const MachineOperand *Src2 = TII->getNamedOperand(MI,
1039 AMDGPU::OpName::src2);
1040 if (Src2 && Src2->getReg() != VCCReg) {
1041 if (Src2->getReg().isVirtual())
1042 MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);
1043 Next = true;
1044 }
1045
1046 if (Next)
1047 continue;
1048 }
1049
1050 // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to
1051 // fold an immediate into the shrunk instruction as a literal operand. In
1052 // GFX10 VOP3 instructions can take a literal operand anyway, so there is
1053 // no advantage to doing this.
1054 if (ST->hasVOP3Literal() &&
1056 MachineFunctionProperties::Property::NoVRegs))
1057 continue;
1058
1059 if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
1060 !shouldShrinkTrue16(MI))
1061 continue;
1062
1063 // We can shrink this instruction
1064 LLVM_DEBUG(dbgs() << "Shrinking " << MI);
1065
1066 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
1067 ++NumInstructionsShrunk;
1068
1069 // Copy extra operands not present in the instruction definition.
1070 copyExtraImplicitOps(*Inst32, MI);
1071
1072 // Copy deadness from the old explicit vcc def to the new implicit def.
1073 if (SDst && SDst->isDead())
1074 Inst32->findRegisterDefOperand(VCCReg, /*TRI=*/nullptr)->setIsDead();
1075
1076 MI.eraseFromParent();
1077 foldImmediates(*Inst32);
1078
1079 LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
1080 }
1081 }
1082 return false;
1083}
1084
1085bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {
1086 if (skipFunction(MF.getFunction()))
1087 return false;
1088
1089 return SIShrinkInstructions().run(MF);
1090}
1091
1095 if (MF.getFunction().hasOptNone() || !SIShrinkInstructions().run(MF))
1096 return PreservedAnalyses::all();
1097
1099 PA.preserveSet<CFGAnalyses>();
1100 return PA;
1101}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ModifiedImm, bool Scalar)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:701
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
Definition: MachineInstr.h:657
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
static bool sopkIsZext(unsigned Opcode)
Definition: SIInstrInfo.h:869
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &)
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
self_iterator getIterator()
Definition: ilist_node.h:132
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isTrue16Inst(unsigned Opc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:563
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
FunctionPass * createSIShrinkInstructionsLegacyPass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
constexpr bool any() const
Definition: LaneBitmask.h:53
A pair composed of a register and a sub-register index.