LLVM 20.0.0git
SIShrinkInstructions.cpp
Go to the documentation of this file.
1//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// The pass tries to use the 32-bit encoding for instructions when possible.
8//===----------------------------------------------------------------------===//
9//
10
11#include "AMDGPU.h"
12#include "GCNSubtarget.h"
15#include "llvm/ADT/Statistic.h"
17
18#define DEBUG_TYPE "si-shrink-instructions"
19
20STATISTIC(NumInstructionsShrunk,
21 "Number of 64-bit instruction reduced to 32-bit.");
22STATISTIC(NumLiteralConstantsFolded,
23 "Number of literal constants folded into 32-bit instructions.");
24
25using namespace llvm;
26
27namespace {
28
29class SIShrinkInstructions : public MachineFunctionPass {
32 const GCNSubtarget *ST;
33 const SIInstrInfo *TII;
34 const SIRegisterInfo *TRI;
35
36public:
37 static char ID;
38
39public:
40 SIShrinkInstructions() : MachineFunctionPass(ID) {
41 }
42
43 bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
44 bool shouldShrinkTrue16(MachineInstr &MI) const;
45 bool isKImmOperand(const MachineOperand &Src) const;
46 bool isKUImmOperand(const MachineOperand &Src) const;
47 bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
48 void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
49 void shrinkScalarCompare(MachineInstr &MI) const;
50 void shrinkMIMG(MachineInstr &MI) const;
51 void shrinkMadFma(MachineInstr &MI) const;
52 bool shrinkScalarLogicOp(MachineInstr &MI) const;
53 bool tryReplaceDeadSDST(MachineInstr &MI) const;
55 Register Reg, unsigned SubReg) const;
56 bool instReadsReg(const MachineInstr *MI, unsigned Reg,
57 unsigned SubReg) const;
58 bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
59 unsigned SubReg) const;
60 TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
61 unsigned I) const;
62 void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
63 MachineInstr *matchSwap(MachineInstr &MovT) const;
64
65 bool runOnMachineFunction(MachineFunction &MF) override;
66
67 StringRef getPassName() const override { return "SI Shrink Instructions"; }
68
69 void getAnalysisUsage(AnalysisUsage &AU) const override {
70 AU.setPreservesCFG();
72 }
73};
74
75} // End anonymous namespace.
76
77INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
78 "SI Shrink Instructions", false, false)
79
80char SIShrinkInstructions::ID = 0;
81
83 return new SIShrinkInstructions();
84}
85
86/// This function checks \p MI for operands defined by a move immediate
87/// instruction and then folds the literal constant into the instruction if it
88/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
89bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
90 bool TryToCommute) const {
91 assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
92
93 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
94
95 // Try to fold Src0
96 MachineOperand &Src0 = MI.getOperand(Src0Idx);
97 if (Src0.isReg()) {
98 Register Reg = Src0.getReg();
99 if (Reg.isVirtual()) {
100 MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
101 if (Def && Def->isMoveImmediate()) {
102 MachineOperand &MovSrc = Def->getOperand(1);
103 bool ConstantFolded = false;
104
105 if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
106 if (MovSrc.isImm()) {
107 Src0.ChangeToImmediate(MovSrc.getImm());
108 ConstantFolded = true;
109 } else if (MovSrc.isFI()) {
110 Src0.ChangeToFrameIndex(MovSrc.getIndex());
111 ConstantFolded = true;
112 } else if (MovSrc.isGlobal()) {
113 Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
114 MovSrc.getTargetFlags());
115 ConstantFolded = true;
116 }
117 }
118
119 if (ConstantFolded) {
120 if (MRI->use_nodbg_empty(Reg))
121 Def->eraseFromParent();
122 ++NumLiteralConstantsFolded;
123 return true;
124 }
125 }
126 }
127 }
128
129 // We have failed to fold src0, so commute the instruction and try again.
130 if (TryToCommute && MI.isCommutable()) {
131 if (TII->commuteInstruction(MI)) {
132 if (foldImmediates(MI, false))
133 return true;
134
135 // Commute back.
136 TII->commuteInstruction(MI);
137 }
138 }
139
140 return false;
141}
142
143/// Do not shrink the instruction if its registers are not expressible in the
144/// shrunk encoding.
145bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
146 for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
147 const MachineOperand &MO = MI.getOperand(I);
148 if (MO.isReg()) {
149 Register Reg = MO.getReg();
150 assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
151 "True16 Instructions post-RA");
152 if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
153 !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
154 return false;
155
156 if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
157 !AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
158 return false;
159 }
160 }
161 return true;
162}
163
164bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
165 return isInt<16>(SignExtend64(Src.getImm(), 32)) &&
166 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
167}
168
169bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
170 return isUInt<16>(Src.getImm()) &&
171 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
172}
173
174bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
175 bool &IsUnsigned) const {
176 if (isInt<16>(SignExtend64(Src.getImm(), 32))) {
177 IsUnsigned = false;
178 return !TII->isInlineConstant(Src);
179 }
180
181 if (isUInt<16>(Src.getImm())) {
182 IsUnsigned = true;
183 return !TII->isInlineConstant(Src);
184 }
185
186 return false;
187}
188
189/// \returns the opcode of an instruction a move immediate of the constant \p
190/// Src can be replaced with if the constant is replaced with \p ModifiedImm.
191/// i.e.
192///
193/// If the bitreverse of a constant is an inline immediate, reverse the
194/// immediate and return the bitreverse opcode.
195///
196/// If the bitwise negation of a constant is an inline immediate, reverse the
197/// immediate and return the bitwise not opcode.
199 const MachineOperand &Src,
200 int32_t &ModifiedImm, bool Scalar) {
201 if (TII->isInlineConstant(Src))
202 return 0;
203 int32_t SrcImm = static_cast<int32_t>(Src.getImm());
204
205 if (!Scalar) {
206 // We could handle the scalar case with here, but we would need to check
207 // that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth
208 // it, as the reasonable values are already covered by s_movk_i32.
209 ModifiedImm = ~SrcImm;
210 if (TII->isInlineConstant(APInt(32, ModifiedImm)))
211 return AMDGPU::V_NOT_B32_e32;
212 }
213
214 ModifiedImm = reverseBits<int32_t>(SrcImm);
215 if (TII->isInlineConstant(APInt(32, ModifiedImm)))
216 return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;
217
218 return 0;
219}
220
221/// Copy implicit register operands from specified instruction to this
222/// instruction that are not part of the instruction definition.
223void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
224 MachineInstr &MI) const {
225 MachineFunction &MF = *MI.getMF();
226 for (unsigned i = MI.getDesc().getNumOperands() +
227 MI.getDesc().implicit_uses().size() +
228 MI.getDesc().implicit_defs().size(),
229 e = MI.getNumOperands();
230 i != e; ++i) {
231 const MachineOperand &MO = MI.getOperand(i);
232 if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
233 NewMI.addOperand(MF, MO);
234 }
235}
236
237void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
238 if (!ST->hasSCmpK())
239 return;
240
241 // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
242 // get constants on the RHS.
243 if (!MI.getOperand(0).isReg())
244 TII->commuteInstruction(MI, false, 0, 1);
245
246 // cmpk requires src0 to be a register
247 const MachineOperand &Src0 = MI.getOperand(0);
248 if (!Src0.isReg())
249 return;
250
251 MachineOperand &Src1 = MI.getOperand(1);
252 if (!Src1.isImm())
253 return;
254
255 int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
256 if (SOPKOpc == -1)
257 return;
258
259 // eq/ne is special because the imm16 can be treated as signed or unsigned,
260 // and initially selected to the unsigned versions.
261 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
262 bool HasUImm;
263 if (isKImmOrKUImmOperand(Src1, HasUImm)) {
264 if (!HasUImm) {
265 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
266 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
267 Src1.setImm(SignExtend32(Src1.getImm(), 32));
268 }
269
270 MI.setDesc(TII->get(SOPKOpc));
271 }
272
273 return;
274 }
275
276 const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
277
278 if ((SIInstrInfo::sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
279 (!SIInstrInfo::sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
280 if (!SIInstrInfo::sopkIsZext(SOPKOpc))
281 Src1.setImm(SignExtend64(Src1.getImm(), 32));
282 MI.setDesc(NewDesc);
283 }
284}
285
286// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
287void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
288 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
289 if (!Info)
290 return;
291
292 uint8_t NewEncoding;
293 switch (Info->MIMGEncoding) {
294 case AMDGPU::MIMGEncGfx10NSA:
295 NewEncoding = AMDGPU::MIMGEncGfx10Default;
296 break;
297 case AMDGPU::MIMGEncGfx11NSA:
298 NewEncoding = AMDGPU::MIMGEncGfx11Default;
299 break;
300 default:
301 return;
302 }
303
304 int VAddr0Idx =
305 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
306 unsigned NewAddrDwords = Info->VAddrDwords;
307 const TargetRegisterClass *RC;
308
309 if (Info->VAddrDwords == 2) {
310 RC = &AMDGPU::VReg_64RegClass;
311 } else if (Info->VAddrDwords == 3) {
312 RC = &AMDGPU::VReg_96RegClass;
313 } else if (Info->VAddrDwords == 4) {
314 RC = &AMDGPU::VReg_128RegClass;
315 } else if (Info->VAddrDwords == 5) {
316 RC = &AMDGPU::VReg_160RegClass;
317 } else if (Info->VAddrDwords == 6) {
318 RC = &AMDGPU::VReg_192RegClass;
319 } else if (Info->VAddrDwords == 7) {
320 RC = &AMDGPU::VReg_224RegClass;
321 } else if (Info->VAddrDwords == 8) {
322 RC = &AMDGPU::VReg_256RegClass;
323 } else if (Info->VAddrDwords == 9) {
324 RC = &AMDGPU::VReg_288RegClass;
325 } else if (Info->VAddrDwords == 10) {
326 RC = &AMDGPU::VReg_320RegClass;
327 } else if (Info->VAddrDwords == 11) {
328 RC = &AMDGPU::VReg_352RegClass;
329 } else if (Info->VAddrDwords == 12) {
330 RC = &AMDGPU::VReg_384RegClass;
331 } else {
332 RC = &AMDGPU::VReg_512RegClass;
333 NewAddrDwords = 16;
334 }
335
336 unsigned VgprBase = 0;
337 unsigned NextVgpr = 0;
338 bool IsUndef = true;
339 bool IsKill = NewAddrDwords == Info->VAddrDwords;
340 const unsigned NSAMaxSize = ST->getNSAMaxSize();
341 const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
342 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;
343 for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {
344 const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
345 unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
346 unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
347 assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");
348
349 if (Idx == 0) {
350 VgprBase = Vgpr;
351 NextVgpr = Vgpr + Dwords;
352 } else if (Vgpr == NextVgpr) {
353 NextVgpr = Vgpr + Dwords;
354 } else {
355 return;
356 }
357
358 if (!Op.isUndef())
359 IsUndef = false;
360 if (!Op.isKill())
361 IsKill = false;
362 }
363
364 if (VgprBase + NewAddrDwords > 256)
365 return;
366
367 // Further check for implicit tied operands - this may be present if TFE is
368 // enabled
369 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
370 int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
371 unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
372 unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
373 int ToUntie = -1;
374 if (TFEVal || LWEVal) {
375 // TFE/LWE is enabled so we need to deal with an implicit tied operand
376 for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
377 if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
378 MI.getOperand(i).isImplicit()) {
379 // This is the tied operand
380 assert(
381 ToUntie == -1 &&
382 "found more than one tied implicit operand when expecting only 1");
383 ToUntie = i;
384 MI.untieRegOperand(ToUntie);
385 }
386 }
387 }
388
389 unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,
390 Info->VDataDwords, NewAddrDwords);
391 MI.setDesc(TII->get(NewOpcode));
392 MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
393 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
394 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
395
396 for (unsigned i = 1; i < EndVAddr; ++i)
397 MI.removeOperand(VAddr0Idx + 1);
398
399 if (ToUntie >= 0) {
400 MI.tieOperands(
401 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
402 ToUntie - (EndVAddr - 1));
403 }
404}
405
406// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
407void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
408 // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
409 // there is no reason to try to shrink them.
410 if (!ST->hasVOP3Literal())
411 return;
412
413 // There is no advantage to doing this pre-RA.
414 if (!MF->getProperties().hasProperty(
415 MachineFunctionProperties::Property::NoVRegs))
416 return;
417
418 if (TII->hasAnyModifiersSet(MI))
419 return;
420
421 const unsigned Opcode = MI.getOpcode();
422 MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
423 MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);
424 MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
425 unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
426
427 bool Swap;
428
429 // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.
430 if (Src2.isImm() && !TII->isInlineConstant(Src2)) {
431 if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))
432 Swap = false;
433 else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
434 Swap = true;
435 else
436 return;
437
438 switch (Opcode) {
439 default:
440 llvm_unreachable("Unexpected mad/fma opcode!");
441 case AMDGPU::V_MAD_F32_e64:
442 NewOpcode = AMDGPU::V_MADAK_F32;
443 break;
444 case AMDGPU::V_FMA_F32_e64:
445 NewOpcode = AMDGPU::V_FMAAK_F32;
446 break;
447 case AMDGPU::V_MAD_F16_e64:
448 NewOpcode = AMDGPU::V_MADAK_F16;
449 break;
450 case AMDGPU::V_FMA_F16_e64:
451 case AMDGPU::V_FMA_F16_gfx9_e64:
452 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
453 : AMDGPU::V_FMAAK_F16;
454 break;
455 }
456 }
457
458 // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.
459 if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {
460 if (Src1.isImm() && !TII->isInlineConstant(Src1))
461 Swap = false;
462 else if (Src0.isImm() && !TII->isInlineConstant(Src0))
463 Swap = true;
464 else
465 return;
466
467 switch (Opcode) {
468 default:
469 llvm_unreachable("Unexpected mad/fma opcode!");
470 case AMDGPU::V_MAD_F32_e64:
471 NewOpcode = AMDGPU::V_MADMK_F32;
472 break;
473 case AMDGPU::V_FMA_F32_e64:
474 NewOpcode = AMDGPU::V_FMAMK_F32;
475 break;
476 case AMDGPU::V_MAD_F16_e64:
477 NewOpcode = AMDGPU::V_MADMK_F16;
478 break;
479 case AMDGPU::V_FMA_F16_e64:
480 case AMDGPU::V_FMA_F16_gfx9_e64:
481 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
482 : AMDGPU::V_FMAMK_F16;
483 break;
484 }
485 }
486
487 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
488 return;
489
490 if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
491 return;
492
493 if (Swap) {
494 // Swap Src0 and Src1 by building a new instruction.
495 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),
496 MI.getOperand(0).getReg())
497 .add(Src1)
498 .add(Src0)
499 .add(Src2)
500 .setMIFlags(MI.getFlags());
501 MI.eraseFromParent();
502 } else {
503 TII->removeModOperands(MI);
504 MI.setDesc(TII->get(NewOpcode));
505 }
506}
507
508/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
509/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
510/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
511/// XNOR (as a ^ b == ~(a ^ ~b)).
512/// \returns true if the caller should continue the machine function iterator
513bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
514 unsigned Opc = MI.getOpcode();
515 const MachineOperand *Dest = &MI.getOperand(0);
516 MachineOperand *Src0 = &MI.getOperand(1);
517 MachineOperand *Src1 = &MI.getOperand(2);
518 MachineOperand *SrcReg = Src0;
519 MachineOperand *SrcImm = Src1;
520
521 if (!SrcImm->isImm() ||
522 AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
523 return false;
524
525 uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
526 uint32_t NewImm = 0;
527
528 if (Opc == AMDGPU::S_AND_B32) {
529 if (isPowerOf2_32(~Imm)) {
530 NewImm = llvm::countr_one(Imm);
531 Opc = AMDGPU::S_BITSET0_B32;
532 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
533 NewImm = ~Imm;
534 Opc = AMDGPU::S_ANDN2_B32;
535 }
536 } else if (Opc == AMDGPU::S_OR_B32) {
537 if (isPowerOf2_32(Imm)) {
538 NewImm = llvm::countr_zero(Imm);
539 Opc = AMDGPU::S_BITSET1_B32;
540 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
541 NewImm = ~Imm;
542 Opc = AMDGPU::S_ORN2_B32;
543 }
544 } else if (Opc == AMDGPU::S_XOR_B32) {
545 if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
546 NewImm = ~Imm;
547 Opc = AMDGPU::S_XNOR_B32;
548 }
549 } else {
550 llvm_unreachable("unexpected opcode");
551 }
552
553 if (NewImm != 0) {
554 if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
555 MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
556 MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
557 return true;
558 }
559
560 if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
561 const bool IsUndef = SrcReg->isUndef();
562 const bool IsKill = SrcReg->isKill();
563 MI.setDesc(TII->get(Opc));
564 if (Opc == AMDGPU::S_BITSET0_B32 ||
565 Opc == AMDGPU::S_BITSET1_B32) {
566 Src0->ChangeToImmediate(NewImm);
567 // Remove the immediate and add the tied input.
568 MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,
569 /*isImp*/ false, IsKill,
570 /*isDead*/ false, IsUndef);
571 MI.tieOperands(0, 2);
572 } else {
573 SrcImm->setImm(NewImm);
574 }
575 }
576 }
577
578 return false;
579}
580
581// This is the same as MachineInstr::readsRegister/modifiesRegister except
582// it takes subregs into account.
583bool SIShrinkInstructions::instAccessReg(
585 unsigned SubReg) const {
586 for (const MachineOperand &MO : R) {
587 if (!MO.isReg())
588 continue;
589
590 if (Reg.isPhysical() && MO.getReg().isPhysical()) {
591 if (TRI->regsOverlap(Reg, MO.getReg()))
592 return true;
593 } else if (MO.getReg() == Reg && Reg.isVirtual()) {
594 LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &
595 TRI->getSubRegIndexLaneMask(MO.getSubReg());
596 if (Overlap.any())
597 return true;
598 }
599 }
600 return false;
601}
602
603bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,
604 unsigned SubReg) const {
605 return instAccessReg(MI->uses(), Reg, SubReg);
606}
607
608bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,
609 unsigned SubReg) const {
610 return instAccessReg(MI->defs(), Reg, SubReg);
611}
612
614SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
615 unsigned I) const {
616 if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {
617 if (Reg.isPhysical()) {
618 Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));
619 } else {
620 Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));
621 }
622 }
623 return TargetInstrInfo::RegSubRegPair(Reg, Sub);
624}
625
626void SIShrinkInstructions::dropInstructionKeepingImpDefs(
627 MachineInstr &MI) const {
628 for (unsigned i = MI.getDesc().getNumOperands() +
629 MI.getDesc().implicit_uses().size() +
630 MI.getDesc().implicit_defs().size(),
631 e = MI.getNumOperands();
632 i != e; ++i) {
633 const MachineOperand &Op = MI.getOperand(i);
634 if (!Op.isDef())
635 continue;
636 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
637 TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
638 }
639
640 MI.eraseFromParent();
641}
642
643// Match:
644// mov t, x
645// mov x, y
646// mov y, t
647//
648// =>
649//
650// mov t, x (t is potentially dead and move eliminated)
651// v_swap_b32 x, y
652//
653// Returns next valid instruction pointer if was able to create v_swap_b32.
654//
655// This shall not be done too early not to prevent possible folding which may
656// remove matched moves, and this should preferably be done before RA to
657// release saved registers and also possibly after RA which can insert copies
658// too.
659//
660// This is really just a generic peephole that is not a canonical shrinking,
661// although requirements match the pass placement and it reduces code size too.
662MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
663 assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
664 MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
665 MovT.getOpcode() == AMDGPU::COPY);
666
667 Register T = MovT.getOperand(0).getReg();
668 unsigned Tsub = MovT.getOperand(0).getSubReg();
669 MachineOperand &Xop = MovT.getOperand(1);
670
671 if (!Xop.isReg())
672 return nullptr;
673 Register X = Xop.getReg();
674 unsigned Xsub = Xop.getSubReg();
675
676 unsigned Size = TII->getOpSize(MovT, 0);
677
678 // We can't match v_swap_b16 pre-RA, because VGPR_16_Lo128 registers
679 // are not allocatble.
680 if (Size == 2 && X.isVirtual())
681 return nullptr;
682
683 if (!TRI->isVGPR(*MRI, X))
684 return nullptr;
685
686 const unsigned SearchLimit = 16;
687 unsigned Count = 0;
688 bool KilledT = false;
689 for (auto Iter = std::next(MovT.getIterator()),
690 E = MovT.getParent()->instr_end();
691 Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
692
693 MachineInstr *MovY = &*Iter;
694 KilledT = MovY->killsRegister(T, TRI);
695
696 if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
697 MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
698 MovY->getOpcode() != AMDGPU::COPY) ||
699 !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T ||
700 MovY->getOperand(1).getSubReg() != Tsub)
701 continue;
702
703 Register Y = MovY->getOperand(0).getReg();
704 unsigned Ysub = MovY->getOperand(0).getSubReg();
705
706 if (!TRI->isVGPR(*MRI, Y))
707 continue;
708
709 MachineInstr *MovX = nullptr;
710 for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
711 I != IY; ++I) {
712 if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||
713 instModifiesReg(&*I, T, Tsub) ||
714 (MovX && instModifiesReg(&*I, X, Xsub))) {
715 MovX = nullptr;
716 break;
717 }
718 if (!instReadsReg(&*I, Y, Ysub)) {
719 if (!MovX && instModifiesReg(&*I, X, Xsub)) {
720 MovX = nullptr;
721 break;
722 }
723 continue;
724 }
725 if (MovX ||
726 (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
727 I->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
728 I->getOpcode() != AMDGPU::COPY) ||
729 I->getOperand(0).getReg() != X ||
730 I->getOperand(0).getSubReg() != Xsub) {
731 MovX = nullptr;
732 break;
733 }
734
735 if (Size > 4 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
736 continue;
737
738 MovX = &*I;
739 }
740
741 if (!MovX)
742 continue;
743
744 LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);
745
748 if (Size == 2) {
749 auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
750 TII->get(AMDGPU::V_SWAP_B16))
751 .addDef(X)
752 .addDef(Y)
753 .addReg(Y)
754 .addReg(X)
755 .getInstr();
756 Swaps.push_back(MIB);
757 } else {
758 assert(Size > 0 && Size % 4 == 0);
759 for (unsigned I = 0; I < Size / 4; ++I) {
761 X1 = getSubRegForIndex(X, Xsub, I);
762 Y1 = getSubRegForIndex(Y, Ysub, I);
763 auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
764 TII->get(AMDGPU::V_SWAP_B32))
765 .addDef(X1.Reg, 0, X1.SubReg)
766 .addDef(Y1.Reg, 0, Y1.SubReg)
767 .addReg(Y1.Reg, 0, Y1.SubReg)
768 .addReg(X1.Reg, 0, X1.SubReg)
769 .getInstr();
770 Swaps.push_back(MIB);
771 }
772 }
773 // Drop implicit EXEC.
774 if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
775 for (MachineInstr *Swap : Swaps) {
776 Swap->removeOperand(Swap->getNumExplicitOperands());
777 Swap->copyImplicitOps(*MBB.getParent(), *MovX);
778 }
779 }
780 MovX->eraseFromParent();
781 dropInstructionKeepingImpDefs(*MovY);
782 MachineInstr *Next = &*std::next(MovT.getIterator());
783
784 if (T.isVirtual() && MRI->use_nodbg_empty(T)) {
785 dropInstructionKeepingImpDefs(MovT);
786 } else {
787 Xop.setIsKill(false);
788 for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
789 unsigned OpNo = MovT.getNumExplicitOperands() + I;
790 const MachineOperand &Op = MovT.getOperand(OpNo);
791 if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))
792 MovT.removeOperand(OpNo);
793 }
794 }
795
796 return Next;
797 }
798
799 return nullptr;
800}
801
802// If an instruction has dead sdst replace it with NULL register on gfx1030+
803bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
804 if (!ST->hasGFX10_3Insts())
805 return false;
806
807 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
808 if (!Op)
809 return false;
810 Register SDstReg = Op->getReg();
811 if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
812 return false;
813
814 Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
815 return true;
816}
817
818bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
819 if (skipFunction(MF.getFunction()))
820 return false;
821
822 this->MF = &MF;
823 MRI = &MF.getRegInfo();
825 TII = ST->getInstrInfo();
826 TRI = &TII->getRegisterInfo();
827
828 unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
829
830 std::vector<unsigned> I1Defs;
831
832 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
833 BI != BE; ++BI) {
834
835 MachineBasicBlock &MBB = *BI;
837 for (I = MBB.begin(); I != MBB.end(); I = Next) {
838 Next = std::next(I);
839 MachineInstr &MI = *I;
840
841 if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
842 // If this has a literal constant source that is the same as the
843 // reversed bits of an inline immediate, replace with a bitreverse of
844 // that constant. This saves 4 bytes in the common case of materializing
845 // sign bits.
846
847 // Test if we are after regalloc. We only want to do this after any
848 // optimizations happen because this will confuse them.
849 // XXX - not exactly a check for post-regalloc run.
850 MachineOperand &Src = MI.getOperand(1);
851 if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
852 int32_t ModImm;
853 unsigned ModOpcode =
854 canModifyToInlineImmOp32(TII, Src, ModImm, /*Scalar=*/false);
855 if (ModOpcode != 0) {
856 MI.setDesc(TII->get(ModOpcode));
857 Src.setImm(static_cast<int64_t>(ModImm));
858 continue;
859 }
860 }
861 }
862
863 if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
864 MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
865 MI.getOpcode() == AMDGPU::COPY)) {
866 if (auto *NextMI = matchSwap(MI)) {
867 Next = NextMI->getIterator();
868 continue;
869 }
870 }
871
872 // Try to use S_ADDK_I32 and S_MULK_I32.
873 if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
874 MI.getOpcode() == AMDGPU::S_MUL_I32) {
875 const MachineOperand *Dest = &MI.getOperand(0);
876 MachineOperand *Src0 = &MI.getOperand(1);
877 MachineOperand *Src1 = &MI.getOperand(2);
878
879 if (!Src0->isReg() && Src1->isReg()) {
880 if (TII->commuteInstruction(MI, false, 1, 2))
881 std::swap(Src0, Src1);
882 }
883
884 // FIXME: This could work better if hints worked with subregisters. If
885 // we have a vector add of a constant, we usually don't get the correct
886 // allocation due to the subregister usage.
887 if (Dest->getReg().isVirtual() && Src0->isReg()) {
888 MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
889 MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
890 continue;
891 }
892
893 if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
894 if (Src1->isImm() && isKImmOperand(*Src1)) {
895 unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
896 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
897
898 Src1->setImm(SignExtend64(Src1->getImm(), 32));
899 MI.setDesc(TII->get(Opc));
900 MI.tieOperands(0, 1);
901 }
902 }
903 }
904
905 // Try to use s_cmpk_*
906 if (MI.isCompare() && TII->isSOPC(MI)) {
907 shrinkScalarCompare(MI);
908 continue;
909 }
910
911 // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
912 if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
913 const MachineOperand &Dst = MI.getOperand(0);
914 MachineOperand &Src = MI.getOperand(1);
915
916 if (Src.isImm() && Dst.getReg().isPhysical()) {
917 unsigned ModOpc;
918 int32_t ModImm;
919 if (isKImmOperand(Src)) {
920 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
921 Src.setImm(SignExtend64(Src.getImm(), 32));
922 } else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
923 /*Scalar=*/true))) {
924 MI.setDesc(TII->get(ModOpc));
925 Src.setImm(static_cast<int64_t>(ModImm));
926 }
927 }
928
929 continue;
930 }
931
932 // Shrink scalar logic operations.
933 if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
934 MI.getOpcode() == AMDGPU::S_OR_B32 ||
935 MI.getOpcode() == AMDGPU::S_XOR_B32) {
936 if (shrinkScalarLogicOp(MI))
937 continue;
938 }
939
940 if (TII->isMIMG(MI.getOpcode()) &&
941 ST->getGeneration() >= AMDGPUSubtarget::GFX10 &&
943 MachineFunctionProperties::Property::NoVRegs)) {
944 shrinkMIMG(MI);
945 continue;
946 }
947
948 if (!TII->isVOP3(MI))
949 continue;
950
951 if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
952 MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
953 MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
954 MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
955 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
956 shrinkMadFma(MI);
957 continue;
958 }
959
960 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
961 // If there is no chance we will shrink it and use VCC as sdst to get
962 // a 32 bit form try to replace dead sdst with NULL.
963 tryReplaceDeadSDST(MI);
964 continue;
965 }
966
967 if (!TII->canShrink(MI, *MRI)) {
968 // Try commuting the instruction and see if that enables us to shrink
969 // it.
970 if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
971 !TII->canShrink(MI, *MRI)) {
972 tryReplaceDeadSDST(MI);
973 continue;
974 }
975 }
976
977 int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
978
979 if (TII->isVOPC(Op32)) {
980 MachineOperand &Op0 = MI.getOperand(0);
981 if (Op0.isReg()) {
982 // Exclude VOPCX instructions as these don't explicitly write a
983 // dst.
984 Register DstReg = Op0.getReg();
985 if (DstReg.isVirtual()) {
986 // VOPC instructions can only write to the VCC register. We can't
987 // force them to use VCC here, because this is only one register and
988 // cannot deal with sequences which would require multiple copies of
989 // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
990 //
991 // So, instead of forcing the instruction to write to VCC, we
992 // provide a hint to the register allocator to use VCC and then we
993 // will run this pass again after RA and shrink it if it outputs to
994 // VCC.
995 MRI->setRegAllocationHint(DstReg, 0, VCCReg);
996 continue;
997 }
998 if (DstReg != VCCReg)
999 continue;
1000 }
1001 }
1002
1003 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
1004 // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
1005 // instructions.
1006 const MachineOperand *Src2 =
1007 TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1008 if (!Src2->isReg())
1009 continue;
1010 Register SReg = Src2->getReg();
1011 if (SReg.isVirtual()) {
1012 MRI->setRegAllocationHint(SReg, 0, VCCReg);
1013 continue;
1014 }
1015 if (SReg != VCCReg)
1016 continue;
1017 }
1018
1019 // Check for the bool flag output for instructions like V_ADD_I32_e64.
1020 const MachineOperand *SDst = TII->getNamedOperand(MI,
1021 AMDGPU::OpName::sdst);
1022
1023 if (SDst) {
1024 bool Next = false;
1025
1026 if (SDst->getReg() != VCCReg) {
1027 if (SDst->getReg().isVirtual())
1028 MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);
1029 Next = true;
1030 }
1031
1032 // All of the instructions with carry outs also have an SGPR input in
1033 // src2.
1034 const MachineOperand *Src2 = TII->getNamedOperand(MI,
1035 AMDGPU::OpName::src2);
1036 if (Src2 && Src2->getReg() != VCCReg) {
1037 if (Src2->getReg().isVirtual())
1038 MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);
1039 Next = true;
1040 }
1041
1042 if (Next)
1043 continue;
1044 }
1045
1046 // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to
1047 // fold an immediate into the shrunk instruction as a literal operand. In
1048 // GFX10 VOP3 instructions can take a literal operand anyway, so there is
1049 // no advantage to doing this.
1050 if (ST->hasVOP3Literal() &&
1052 MachineFunctionProperties::Property::NoVRegs))
1053 continue;
1054
1055 if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
1056 !shouldShrinkTrue16(MI))
1057 continue;
1058
1059 // We can shrink this instruction
1060 LLVM_DEBUG(dbgs() << "Shrinking " << MI);
1061
1062 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
1063 ++NumInstructionsShrunk;
1064
1065 // Copy extra operands not present in the instruction definition.
1066 copyExtraImplicitOps(*Inst32, MI);
1067
1068 // Copy deadness from the old explicit vcc def to the new implicit def.
1069 if (SDst && SDst->isDead())
1070 Inst32->findRegisterDefOperand(VCCReg, /*TRI=*/nullptr)->setIsDead();
1071
1072 MI.eraseFromParent();
1073 foldImmediates(*Inst32);
1074
1075 LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
1076 }
1077 }
1078 return false;
1079}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ModifiedImm, bool Scalar)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Class for arbitrary precision integers.
Definition: APInt.h:78
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
Definition: MachineInstr.h:651
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
static bool sopkIsZext(unsigned Opcode)
Definition: SIInstrInfo.h:863
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
self_iterator getIterator()
Definition: ilist_node.h:132
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isTrue16Inst(unsigned Opc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FunctionPass * createSIShrinkInstructionsPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:563
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
constexpr bool any() const
Definition: LaneBitmask.h:53
A pair composed of a register and a sub-register index.