LLVM 20.0.0git
SIShrinkInstructions.cpp
Go to the documentation of this file.
1//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// The pass tries to use the 32-bit encoding for instructions when possible.
8//===----------------------------------------------------------------------===//
9//
10
12#include "AMDGPU.h"
13#include "GCNSubtarget.h"
16#include "llvm/ADT/Statistic.h"
18
19#define DEBUG_TYPE "si-shrink-instructions"
20
21STATISTIC(NumInstructionsShrunk,
22 "Number of 64-bit instruction reduced to 32-bit.");
23STATISTIC(NumLiteralConstantsFolded,
24 "Number of literal constants folded into 32-bit instructions.");
25
26using namespace llvm;
27
28namespace {
29
30class SIShrinkInstructions {
33 const GCNSubtarget *ST;
34 const SIInstrInfo *TII;
35 const SIRegisterInfo *TRI;
36
37 bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
38 bool shouldShrinkTrue16(MachineInstr &MI) const;
39 bool isKImmOperand(const MachineOperand &Src) const;
40 bool isKUImmOperand(const MachineOperand &Src) const;
41 bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
42 void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
43 void shrinkScalarCompare(MachineInstr &MI) const;
44 void shrinkMIMG(MachineInstr &MI) const;
45 void shrinkMadFma(MachineInstr &MI) const;
46 bool shrinkScalarLogicOp(MachineInstr &MI) const;
47 bool tryReplaceDeadSDST(MachineInstr &MI) const;
49 Register Reg, unsigned SubReg) const;
50 bool instReadsReg(const MachineInstr *MI, unsigned Reg,
51 unsigned SubReg) const;
52 bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
53 unsigned SubReg) const;
54 TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
55 unsigned I) const;
56 void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
57 MachineInstr *matchSwap(MachineInstr &MovT) const;
58
59public:
60 SIShrinkInstructions() = default;
61 bool run(MachineFunction &MF);
62};
63
64class SIShrinkInstructionsLegacy : public MachineFunctionPass {
65
66public:
67 static char ID;
68
69 SIShrinkInstructionsLegacy() : MachineFunctionPass(ID) {}
70
71 bool runOnMachineFunction(MachineFunction &MF) override;
72
73 StringRef getPassName() const override { return "SI Shrink Instructions"; }
74
75 void getAnalysisUsage(AnalysisUsage &AU) const override {
76 AU.setPreservesCFG();
78 }
79};
80
81} // End anonymous namespace.
82
83INITIALIZE_PASS(SIShrinkInstructionsLegacy, DEBUG_TYPE,
84 "SI Shrink Instructions", false, false)
85
86char SIShrinkInstructionsLegacy::ID = 0;
87
89 return new SIShrinkInstructionsLegacy();
90}
91
92/// This function checks \p MI for operands defined by a move immediate
93/// instruction and then folds the literal constant into the instruction if it
94/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
95bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
96 bool TryToCommute) const {
97 assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
98
99 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
100
101 // Try to fold Src0
102 MachineOperand &Src0 = MI.getOperand(Src0Idx);
103 if (Src0.isReg()) {
104 Register Reg = Src0.getReg();
105 if (Reg.isVirtual()) {
106 MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
107 if (Def && Def->isMoveImmediate()) {
108 MachineOperand &MovSrc = Def->getOperand(1);
109 bool ConstantFolded = false;
110
111 if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
112 if (MovSrc.isImm()) {
113 Src0.ChangeToImmediate(MovSrc.getImm());
114 ConstantFolded = true;
115 } else if (MovSrc.isFI()) {
116 Src0.ChangeToFrameIndex(MovSrc.getIndex());
117 ConstantFolded = true;
118 } else if (MovSrc.isGlobal()) {
119 Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
120 MovSrc.getTargetFlags());
121 ConstantFolded = true;
122 }
123 }
124
125 if (ConstantFolded) {
126 if (MRI->use_nodbg_empty(Reg))
127 Def->eraseFromParent();
128 ++NumLiteralConstantsFolded;
129 return true;
130 }
131 }
132 }
133 }
134
135 // We have failed to fold src0, so commute the instruction and try again.
136 if (TryToCommute && MI.isCommutable()) {
137 if (TII->commuteInstruction(MI)) {
138 if (foldImmediates(MI, false))
139 return true;
140
141 // Commute back.
142 TII->commuteInstruction(MI);
143 }
144 }
145
146 return false;
147}
148
149/// Do not shrink the instruction if its registers are not expressible in the
150/// shrunk encoding.
151bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
152 for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
153 const MachineOperand &MO = MI.getOperand(I);
154 if (MO.isReg()) {
155 Register Reg = MO.getReg();
156 assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
157 "True16 Instructions post-RA");
158 if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
159 !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
160 return false;
161
162 if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
163 !AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
164 return false;
165 }
166 }
167 return true;
168}
169
170bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
171 return isInt<16>(SignExtend64(Src.getImm(), 32)) &&
172 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
173}
174
175bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
176 return isUInt<16>(Src.getImm()) &&
177 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
178}
179
180bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
181 bool &IsUnsigned) const {
182 if (isInt<16>(SignExtend64(Src.getImm(), 32))) {
183 IsUnsigned = false;
184 return !TII->isInlineConstant(Src);
185 }
186
187 if (isUInt<16>(Src.getImm())) {
188 IsUnsigned = true;
189 return !TII->isInlineConstant(Src);
190 }
191
192 return false;
193}
194
195/// \returns the opcode of an instruction a move immediate of the constant \p
196/// Src can be replaced with if the constant is replaced with \p ModifiedImm.
197/// i.e.
198///
199/// If the bitreverse of a constant is an inline immediate, reverse the
200/// immediate and return the bitreverse opcode.
201///
202/// If the bitwise negation of a constant is an inline immediate, reverse the
203/// immediate and return the bitwise not opcode.
205 const MachineOperand &Src,
206 int32_t &ModifiedImm, bool Scalar) {
207 if (TII->isInlineConstant(Src))
208 return 0;
209 int32_t SrcImm = static_cast<int32_t>(Src.getImm());
210
211 if (!Scalar) {
212 // We could handle the scalar case with here, but we would need to check
213 // that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth
214 // it, as the reasonable values are already covered by s_movk_i32.
215 ModifiedImm = ~SrcImm;
216 if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))
217 return AMDGPU::V_NOT_B32_e32;
218 }
219
220 ModifiedImm = reverseBits<int32_t>(SrcImm);
221 if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))
222 return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;
223
224 return 0;
225}
226
227/// Copy implicit register operands from specified instruction to this
228/// instruction that are not part of the instruction definition.
229void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
230 MachineInstr &MI) const {
231 MachineFunction &MF = *MI.getMF();
232 for (unsigned i = MI.getDesc().getNumOperands() +
233 MI.getDesc().implicit_uses().size() +
234 MI.getDesc().implicit_defs().size(),
235 e = MI.getNumOperands();
236 i != e; ++i) {
237 const MachineOperand &MO = MI.getOperand(i);
238 if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
239 NewMI.addOperand(MF, MO);
240 }
241}
242
243void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
244 if (!ST->hasSCmpK())
245 return;
246
247 // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
248 // get constants on the RHS.
249 if (!MI.getOperand(0).isReg())
250 TII->commuteInstruction(MI, false, 0, 1);
251
252 // cmpk requires src0 to be a register
253 const MachineOperand &Src0 = MI.getOperand(0);
254 if (!Src0.isReg())
255 return;
256
257 MachineOperand &Src1 = MI.getOperand(1);
258 if (!Src1.isImm())
259 return;
260
261 int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
262 if (SOPKOpc == -1)
263 return;
264
265 // eq/ne is special because the imm16 can be treated as signed or unsigned,
266 // and initially selected to the unsigned versions.
267 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
268 bool HasUImm;
269 if (isKImmOrKUImmOperand(Src1, HasUImm)) {
270 if (!HasUImm) {
271 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
272 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
273 Src1.setImm(SignExtend32(Src1.getImm(), 32));
274 }
275
276 MI.setDesc(TII->get(SOPKOpc));
277 }
278
279 return;
280 }
281
282 const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
283
284 if ((SIInstrInfo::sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
285 (!SIInstrInfo::sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
286 if (!SIInstrInfo::sopkIsZext(SOPKOpc))
287 Src1.setImm(SignExtend64(Src1.getImm(), 32));
288 MI.setDesc(NewDesc);
289 }
290}
291
292// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
293void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
294 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
295 if (!Info)
296 return;
297
298 uint8_t NewEncoding;
299 switch (Info->MIMGEncoding) {
300 case AMDGPU::MIMGEncGfx10NSA:
301 NewEncoding = AMDGPU::MIMGEncGfx10Default;
302 break;
303 case AMDGPU::MIMGEncGfx11NSA:
304 NewEncoding = AMDGPU::MIMGEncGfx11Default;
305 break;
306 default:
307 return;
308 }
309
310 int VAddr0Idx =
311 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
312 unsigned NewAddrDwords = Info->VAddrDwords;
313 const TargetRegisterClass *RC;
314
315 if (Info->VAddrDwords == 2) {
316 RC = &AMDGPU::VReg_64RegClass;
317 } else if (Info->VAddrDwords == 3) {
318 RC = &AMDGPU::VReg_96RegClass;
319 } else if (Info->VAddrDwords == 4) {
320 RC = &AMDGPU::VReg_128RegClass;
321 } else if (Info->VAddrDwords == 5) {
322 RC = &AMDGPU::VReg_160RegClass;
323 } else if (Info->VAddrDwords == 6) {
324 RC = &AMDGPU::VReg_192RegClass;
325 } else if (Info->VAddrDwords == 7) {
326 RC = &AMDGPU::VReg_224RegClass;
327 } else if (Info->VAddrDwords == 8) {
328 RC = &AMDGPU::VReg_256RegClass;
329 } else if (Info->VAddrDwords == 9) {
330 RC = &AMDGPU::VReg_288RegClass;
331 } else if (Info->VAddrDwords == 10) {
332 RC = &AMDGPU::VReg_320RegClass;
333 } else if (Info->VAddrDwords == 11) {
334 RC = &AMDGPU::VReg_352RegClass;
335 } else if (Info->VAddrDwords == 12) {
336 RC = &AMDGPU::VReg_384RegClass;
337 } else {
338 RC = &AMDGPU::VReg_512RegClass;
339 NewAddrDwords = 16;
340 }
341
342 unsigned VgprBase = 0;
343 unsigned NextVgpr = 0;
344 bool IsUndef = true;
345 bool IsKill = NewAddrDwords == Info->VAddrDwords;
346 const unsigned NSAMaxSize = ST->getNSAMaxSize();
347 const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
348 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;
349 for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {
350 const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
351 unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
352 unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
353 assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");
354
355 if (Idx == 0) {
356 VgprBase = Vgpr;
357 NextVgpr = Vgpr + Dwords;
358 } else if (Vgpr == NextVgpr) {
359 NextVgpr = Vgpr + Dwords;
360 } else {
361 return;
362 }
363
364 if (!Op.isUndef())
365 IsUndef = false;
366 if (!Op.isKill())
367 IsKill = false;
368 }
369
370 if (VgprBase + NewAddrDwords > 256)
371 return;
372
373 // Further check for implicit tied operands - this may be present if TFE is
374 // enabled
375 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
376 int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
377 unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
378 unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
379 int ToUntie = -1;
380 if (TFEVal || LWEVal) {
381 // TFE/LWE is enabled so we need to deal with an implicit tied operand
382 for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
383 if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
384 MI.getOperand(i).isImplicit()) {
385 // This is the tied operand
386 assert(
387 ToUntie == -1 &&
388 "found more than one tied implicit operand when expecting only 1");
389 ToUntie = i;
390 MI.untieRegOperand(ToUntie);
391 }
392 }
393 }
394
395 unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,
396 Info->VDataDwords, NewAddrDwords);
397 MI.setDesc(TII->get(NewOpcode));
398 MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
399 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
400 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
401
402 for (unsigned i = 1; i < EndVAddr; ++i)
403 MI.removeOperand(VAddr0Idx + 1);
404
405 if (ToUntie >= 0) {
406 MI.tieOperands(
407 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
408 ToUntie - (EndVAddr - 1));
409 }
410}
411
412// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
413void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
414 // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
415 // there is no reason to try to shrink them.
416 if (!ST->hasVOP3Literal())
417 return;
418
419 // There is no advantage to doing this pre-RA.
420 if (!MF->getProperties().hasProperty(
421 MachineFunctionProperties::Property::NoVRegs))
422 return;
423
424 if (TII->hasAnyModifiersSet(MI))
425 return;
426
427 const unsigned Opcode = MI.getOpcode();
428 MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
429 MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);
430 MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
431 unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
432
433 bool Swap;
434
435 // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.
436 if (Src2.isImm() && !TII->isInlineConstant(Src2)) {
437 if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))
438 Swap = false;
439 else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
440 Swap = true;
441 else
442 return;
443
444 switch (Opcode) {
445 default:
446 llvm_unreachable("Unexpected mad/fma opcode!");
447 case AMDGPU::V_MAD_F32_e64:
448 NewOpcode = AMDGPU::V_MADAK_F32;
449 break;
450 case AMDGPU::V_FMA_F32_e64:
451 NewOpcode = AMDGPU::V_FMAAK_F32;
452 break;
453 case AMDGPU::V_MAD_F16_e64:
454 NewOpcode = AMDGPU::V_MADAK_F16;
455 break;
456 case AMDGPU::V_FMA_F16_e64:
457 case AMDGPU::V_FMA_F16_gfx9_e64:
458 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
459 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
460 : AMDGPU::V_FMAAK_F16;
461 break;
462 }
463 }
464
465 // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.
466 if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {
467 if (Src1.isImm() && !TII->isInlineConstant(Src1))
468 Swap = false;
469 else if (Src0.isImm() && !TII->isInlineConstant(Src0))
470 Swap = true;
471 else
472 return;
473
474 switch (Opcode) {
475 default:
476 llvm_unreachable("Unexpected mad/fma opcode!");
477 case AMDGPU::V_MAD_F32_e64:
478 NewOpcode = AMDGPU::V_MADMK_F32;
479 break;
480 case AMDGPU::V_FMA_F32_e64:
481 NewOpcode = AMDGPU::V_FMAMK_F32;
482 break;
483 case AMDGPU::V_MAD_F16_e64:
484 NewOpcode = AMDGPU::V_MADMK_F16;
485 break;
486 case AMDGPU::V_FMA_F16_e64:
487 case AMDGPU::V_FMA_F16_gfx9_e64:
488 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
489 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
490 : AMDGPU::V_FMAMK_F16;
491 break;
492 }
493 }
494
495 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
496 return;
497
498 if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
499 return;
500
501 if (Swap) {
502 // Swap Src0 and Src1 by building a new instruction.
503 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),
504 MI.getOperand(0).getReg())
505 .add(Src1)
506 .add(Src0)
507 .add(Src2)
508 .setMIFlags(MI.getFlags());
509 MI.eraseFromParent();
510 } else {
511 TII->removeModOperands(MI);
512 MI.setDesc(TII->get(NewOpcode));
513 }
514}
515
516/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
517/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
518/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
519/// XNOR (as a ^ b == ~(a ^ ~b)).
520/// \returns true if the caller should continue the machine function iterator
521bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
522 unsigned Opc = MI.getOpcode();
523 const MachineOperand *Dest = &MI.getOperand(0);
524 MachineOperand *Src0 = &MI.getOperand(1);
525 MachineOperand *Src1 = &MI.getOperand(2);
526 MachineOperand *SrcReg = Src0;
527 MachineOperand *SrcImm = Src1;
528
529 if (!SrcImm->isImm() ||
530 AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
531 return false;
532
533 uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
534 uint32_t NewImm = 0;
535
536 if (Opc == AMDGPU::S_AND_B32) {
537 if (isPowerOf2_32(~Imm)) {
538 NewImm = llvm::countr_one(Imm);
539 Opc = AMDGPU::S_BITSET0_B32;
540 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
541 NewImm = ~Imm;
542 Opc = AMDGPU::S_ANDN2_B32;
543 }
544 } else if (Opc == AMDGPU::S_OR_B32) {
545 if (isPowerOf2_32(Imm)) {
546 NewImm = llvm::countr_zero(Imm);
547 Opc = AMDGPU::S_BITSET1_B32;
548 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
549 NewImm = ~Imm;
550 Opc = AMDGPU::S_ORN2_B32;
551 }
552 } else if (Opc == AMDGPU::S_XOR_B32) {
553 if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
554 NewImm = ~Imm;
555 Opc = AMDGPU::S_XNOR_B32;
556 }
557 } else {
558 llvm_unreachable("unexpected opcode");
559 }
560
561 if (NewImm != 0) {
562 if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
563 MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
564 MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
565 return true;
566 }
567
568 if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
569 const bool IsUndef = SrcReg->isUndef();
570 const bool IsKill = SrcReg->isKill();
571 MI.setDesc(TII->get(Opc));
572 if (Opc == AMDGPU::S_BITSET0_B32 ||
573 Opc == AMDGPU::S_BITSET1_B32) {
574 Src0->ChangeToImmediate(NewImm);
575 // Remove the immediate and add the tied input.
576 MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,
577 /*isImp*/ false, IsKill,
578 /*isDead*/ false, IsUndef);
579 MI.tieOperands(0, 2);
580 } else {
581 SrcImm->setImm(NewImm);
582 }
583 }
584 }
585
586 return false;
587}
588
589// This is the same as MachineInstr::readsRegister/modifiesRegister except
590// it takes subregs into account.
591bool SIShrinkInstructions::instAccessReg(
593 unsigned SubReg) const {
594 for (const MachineOperand &MO : R) {
595 if (!MO.isReg())
596 continue;
597
598 if (Reg.isPhysical() && MO.getReg().isPhysical()) {
599 if (TRI->regsOverlap(Reg, MO.getReg()))
600 return true;
601 } else if (MO.getReg() == Reg && Reg.isVirtual()) {
602 LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &
603 TRI->getSubRegIndexLaneMask(MO.getSubReg());
604 if (Overlap.any())
605 return true;
606 }
607 }
608 return false;
609}
610
611bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,
612 unsigned SubReg) const {
613 return instAccessReg(MI->uses(), Reg, SubReg);
614}
615
616bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,
617 unsigned SubReg) const {
618 return instAccessReg(MI->defs(), Reg, SubReg);
619}
620
622SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
623 unsigned I) const {
624 if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {
625 if (Reg.isPhysical()) {
626 Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));
627 } else {
628 Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));
629 }
630 }
631 return TargetInstrInfo::RegSubRegPair(Reg, Sub);
632}
633
634void SIShrinkInstructions::dropInstructionKeepingImpDefs(
635 MachineInstr &MI) const {
636 for (unsigned i = MI.getDesc().getNumOperands() +
637 MI.getDesc().implicit_uses().size() +
638 MI.getDesc().implicit_defs().size(),
639 e = MI.getNumOperands();
640 i != e; ++i) {
641 const MachineOperand &Op = MI.getOperand(i);
642 if (!Op.isDef())
643 continue;
644 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
645 TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
646 }
647
648 MI.eraseFromParent();
649}
650
651// Match:
652// mov t, x
653// mov x, y
654// mov y, t
655//
656// =>
657//
658// mov t, x (t is potentially dead and move eliminated)
659// v_swap_b32 x, y
660//
661// Returns next valid instruction pointer if was able to create v_swap_b32.
662//
663// This shall not be done too early not to prevent possible folding which may
664// remove matched moves, and this should preferably be done before RA to
665// release saved registers and also possibly after RA which can insert copies
666// too.
667//
668// This is really just a generic peephole that is not a canonical shrinking,
669// although requirements match the pass placement and it reduces code size too.
670MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
671 assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
672 MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
673 MovT.getOpcode() == AMDGPU::COPY);
674
675 Register T = MovT.getOperand(0).getReg();
676 unsigned Tsub = MovT.getOperand(0).getSubReg();
677 MachineOperand &Xop = MovT.getOperand(1);
678
679 if (!Xop.isReg())
680 return nullptr;
681 Register X = Xop.getReg();
682 unsigned Xsub = Xop.getSubReg();
683
684 unsigned Size = TII->getOpSize(MovT, 0);
685
686 // We can't match v_swap_b16 pre-RA, because VGPR_16_Lo128 registers
687 // are not allocatble.
688 if (Size == 2 && X.isVirtual())
689 return nullptr;
690
691 if (!TRI->isVGPR(*MRI, X))
692 return nullptr;
693
694 const unsigned SearchLimit = 16;
695 unsigned Count = 0;
696 bool KilledT = false;
697 for (auto Iter = std::next(MovT.getIterator()),
698 E = MovT.getParent()->instr_end();
699 Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
700
701 MachineInstr *MovY = &*Iter;
702 KilledT = MovY->killsRegister(T, TRI);
703
704 if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
705 MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
706 MovY->getOpcode() != AMDGPU::COPY) ||
707 !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T ||
708 MovY->getOperand(1).getSubReg() != Tsub)
709 continue;
710
711 Register Y = MovY->getOperand(0).getReg();
712 unsigned Ysub = MovY->getOperand(0).getSubReg();
713
714 if (!TRI->isVGPR(*MRI, Y))
715 continue;
716
717 MachineInstr *MovX = nullptr;
718 for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
719 I != IY; ++I) {
720 if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||
721 instModifiesReg(&*I, T, Tsub) ||
722 (MovX && instModifiesReg(&*I, X, Xsub))) {
723 MovX = nullptr;
724 break;
725 }
726 if (!instReadsReg(&*I, Y, Ysub)) {
727 if (!MovX && instModifiesReg(&*I, X, Xsub)) {
728 MovX = nullptr;
729 break;
730 }
731 continue;
732 }
733 if (MovX ||
734 (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
735 I->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
736 I->getOpcode() != AMDGPU::COPY) ||
737 I->getOperand(0).getReg() != X ||
738 I->getOperand(0).getSubReg() != Xsub) {
739 MovX = nullptr;
740 break;
741 }
742
743 if (Size > 4 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
744 continue;
745
746 MovX = &*I;
747 }
748
749 if (!MovX)
750 continue;
751
752 LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);
753
756 if (Size == 2) {
757 auto *MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
758 TII->get(AMDGPU::V_SWAP_B16))
759 .addDef(X)
760 .addDef(Y)
761 .addReg(Y)
762 .addReg(X)
763 .getInstr();
764 Swaps.push_back(MIB);
765 } else {
766 assert(Size > 0 && Size % 4 == 0);
767 for (unsigned I = 0; I < Size / 4; ++I) {
769 X1 = getSubRegForIndex(X, Xsub, I);
770 Y1 = getSubRegForIndex(Y, Ysub, I);
771 auto *MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
772 TII->get(AMDGPU::V_SWAP_B32))
773 .addDef(X1.Reg, 0, X1.SubReg)
774 .addDef(Y1.Reg, 0, Y1.SubReg)
775 .addReg(Y1.Reg, 0, Y1.SubReg)
776 .addReg(X1.Reg, 0, X1.SubReg)
777 .getInstr();
778 Swaps.push_back(MIB);
779 }
780 }
781 // Drop implicit EXEC.
782 if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
783 for (MachineInstr *Swap : Swaps) {
784 Swap->removeOperand(Swap->getNumExplicitOperands());
785 Swap->copyImplicitOps(*MBB.getParent(), *MovX);
786 }
787 }
788 MovX->eraseFromParent();
789 dropInstructionKeepingImpDefs(*MovY);
790 MachineInstr *Next = &*std::next(MovT.getIterator());
791
792 if (T.isVirtual() && MRI->use_nodbg_empty(T)) {
793 dropInstructionKeepingImpDefs(MovT);
794 } else {
795 Xop.setIsKill(false);
796 for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
797 unsigned OpNo = MovT.getNumExplicitOperands() + I;
798 const MachineOperand &Op = MovT.getOperand(OpNo);
799 if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))
800 MovT.removeOperand(OpNo);
801 }
802 }
803
804 return Next;
805 }
806
807 return nullptr;
808}
809
810// If an instruction has dead sdst replace it with NULL register on gfx1030+
811bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
812 if (!ST->hasGFX10_3Insts())
813 return false;
814
815 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
816 if (!Op)
817 return false;
818 Register SDstReg = Op->getReg();
819 if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
820 return false;
821
822 Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
823 return true;
824}
825
826bool SIShrinkInstructions::run(MachineFunction &MF) {
827
828 this->MF = &MF;
829 MRI = &MF.getRegInfo();
831 TII = ST->getInstrInfo();
832 TRI = &TII->getRegisterInfo();
833
834 unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
835
836 std::vector<unsigned> I1Defs;
837
838 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
839 BI != BE; ++BI) {
840
841 MachineBasicBlock &MBB = *BI;
843 for (I = MBB.begin(); I != MBB.end(); I = Next) {
844 Next = std::next(I);
845 MachineInstr &MI = *I;
846
847 if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
848 // If this has a literal constant source that is the same as the
849 // reversed bits of an inline immediate, replace with a bitreverse of
850 // that constant. This saves 4 bytes in the common case of materializing
851 // sign bits.
852
853 // Test if we are after regalloc. We only want to do this after any
854 // optimizations happen because this will confuse them.
855 // XXX - not exactly a check for post-regalloc run.
856 MachineOperand &Src = MI.getOperand(1);
857 if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
858 int32_t ModImm;
859 unsigned ModOpcode =
860 canModifyToInlineImmOp32(TII, Src, ModImm, /*Scalar=*/false);
861 if (ModOpcode != 0) {
862 MI.setDesc(TII->get(ModOpcode));
863 Src.setImm(static_cast<int64_t>(ModImm));
864 continue;
865 }
866 }
867 }
868
869 if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
870 MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
871 MI.getOpcode() == AMDGPU::COPY)) {
872 if (auto *NextMI = matchSwap(MI)) {
873 Next = NextMI->getIterator();
874 continue;
875 }
876 }
877
878 // Try to use S_ADDK_I32 and S_MULK_I32.
879 if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
880 MI.getOpcode() == AMDGPU::S_MUL_I32) {
881 const MachineOperand *Dest = &MI.getOperand(0);
882 MachineOperand *Src0 = &MI.getOperand(1);
883 MachineOperand *Src1 = &MI.getOperand(2);
884
885 if (!Src0->isReg() && Src1->isReg()) {
886 if (TII->commuteInstruction(MI, false, 1, 2))
887 std::swap(Src0, Src1);
888 }
889
890 // FIXME: This could work better if hints worked with subregisters. If
891 // we have a vector add of a constant, we usually don't get the correct
892 // allocation due to the subregister usage.
893 if (Dest->getReg().isVirtual() && Src0->isReg()) {
894 MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
895 MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
896 continue;
897 }
898
899 if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
900 if (Src1->isImm() && isKImmOperand(*Src1)) {
901 unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
902 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
903
904 Src1->setImm(SignExtend64(Src1->getImm(), 32));
905 MI.setDesc(TII->get(Opc));
906 MI.tieOperands(0, 1);
907 }
908 }
909 }
910
911 // Try to use s_cmpk_*
912 if (MI.isCompare() && TII->isSOPC(MI)) {
913 shrinkScalarCompare(MI);
914 continue;
915 }
916
917 // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
918 if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
919 const MachineOperand &Dst = MI.getOperand(0);
920 MachineOperand &Src = MI.getOperand(1);
921
922 if (Src.isImm() && Dst.getReg().isPhysical()) {
923 unsigned ModOpc;
924 int32_t ModImm;
925 if (isKImmOperand(Src)) {
926 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
927 Src.setImm(SignExtend64(Src.getImm(), 32));
928 } else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
929 /*Scalar=*/true))) {
930 MI.setDesc(TII->get(ModOpc));
931 Src.setImm(static_cast<int64_t>(ModImm));
932 }
933 }
934
935 continue;
936 }
937
938 // Shrink scalar logic operations.
939 if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
940 MI.getOpcode() == AMDGPU::S_OR_B32 ||
941 MI.getOpcode() == AMDGPU::S_XOR_B32) {
942 if (shrinkScalarLogicOp(MI))
943 continue;
944 }
945
946 if (TII->isMIMG(MI.getOpcode()) &&
947 ST->getGeneration() >= AMDGPUSubtarget::GFX10 &&
949 MachineFunctionProperties::Property::NoVRegs)) {
950 shrinkMIMG(MI);
951 continue;
952 }
953
954 if (!TII->isVOP3(MI))
955 continue;
956
957 if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
958 MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
959 MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
960 MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
961 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
962 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
963 shrinkMadFma(MI);
964 continue;
965 }
966
967 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
968 // If there is no chance we will shrink it and use VCC as sdst to get
969 // a 32 bit form try to replace dead sdst with NULL.
970 tryReplaceDeadSDST(MI);
971 continue;
972 }
973
974 if (!TII->canShrink(MI, *MRI)) {
975 // Try commuting the instruction and see if that enables us to shrink
976 // it.
977 if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
978 !TII->canShrink(MI, *MRI)) {
979 tryReplaceDeadSDST(MI);
980 continue;
981 }
982 }
983
984 int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
985
986 if (TII->isVOPC(Op32)) {
987 MachineOperand &Op0 = MI.getOperand(0);
988 if (Op0.isReg()) {
989 // Exclude VOPCX instructions as these don't explicitly write a
990 // dst.
991 Register DstReg = Op0.getReg();
992 if (DstReg.isVirtual()) {
993 // VOPC instructions can only write to the VCC register. We can't
994 // force them to use VCC here, because this is only one register and
995 // cannot deal with sequences which would require multiple copies of
996 // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
997 //
998 // So, instead of forcing the instruction to write to VCC, we
999 // provide a hint to the register allocator to use VCC and then we
1000 // will run this pass again after RA and shrink it if it outputs to
1001 // VCC.
1002 MRI->setRegAllocationHint(DstReg, 0, VCCReg);
1003 continue;
1004 }
1005 if (DstReg != VCCReg)
1006 continue;
1007 }
1008 }
1009
1010 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
1011 // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
1012 // instructions.
1013 const MachineOperand *Src2 =
1014 TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1015 if (!Src2->isReg())
1016 continue;
1017 Register SReg = Src2->getReg();
1018 if (SReg.isVirtual()) {
1019 MRI->setRegAllocationHint(SReg, 0, VCCReg);
1020 continue;
1021 }
1022 if (SReg != VCCReg)
1023 continue;
1024 }
1025
1026 // Check for the bool flag output for instructions like V_ADD_I32_e64.
1027 const MachineOperand *SDst = TII->getNamedOperand(MI,
1028 AMDGPU::OpName::sdst);
1029
1030 if (SDst) {
1031 bool Next = false;
1032
1033 if (SDst->getReg() != VCCReg) {
1034 if (SDst->getReg().isVirtual())
1035 MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);
1036 Next = true;
1037 }
1038
1039 // All of the instructions with carry outs also have an SGPR input in
1040 // src2.
1041 const MachineOperand *Src2 = TII->getNamedOperand(MI,
1042 AMDGPU::OpName::src2);
1043 if (Src2 && Src2->getReg() != VCCReg) {
1044 if (Src2->getReg().isVirtual())
1045 MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);
1046 Next = true;
1047 }
1048
1049 if (Next)
1050 continue;
1051 }
1052
1053 // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to
1054 // fold an immediate into the shrunk instruction as a literal operand. In
1055 // GFX10 VOP3 instructions can take a literal operand anyway, so there is
1056 // no advantage to doing this.
1057 if (ST->hasVOP3Literal() &&
1059 MachineFunctionProperties::Property::NoVRegs))
1060 continue;
1061
1062 if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
1063 !shouldShrinkTrue16(MI))
1064 continue;
1065
1066 // We can shrink this instruction
1067 LLVM_DEBUG(dbgs() << "Shrinking " << MI);
1068
1069 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
1070 ++NumInstructionsShrunk;
1071
1072 // Copy extra operands not present in the instruction definition.
1073 copyExtraImplicitOps(*Inst32, MI);
1074
1075 // Copy deadness from the old explicit vcc def to the new implicit def.
1076 if (SDst && SDst->isDead())
1077 Inst32->findRegisterDefOperand(VCCReg, /*TRI=*/nullptr)->setIsDead();
1078
1079 MI.eraseFromParent();
1080 foldImmediates(*Inst32);
1081
1082 LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
1083 }
1084 }
1085 return false;
1086}
1087
1088bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {
1089 if (skipFunction(MF.getFunction()))
1090 return false;
1091
1092 return SIShrinkInstructions().run(MF);
1093}
1094
1098 if (MF.getFunction().hasOptNone() || !SIShrinkInstructions().run(MF))
1099 return PreservedAnalyses::all();
1100
1102 PA.preserveSet<CFGAnalyses>();
1103 return PA;
1104}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ModifiedImm, bool Scalar)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:701
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
Definition: MachineInstr.h:657
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
static bool sopkIsZext(unsigned Opcode)
Definition: SIInstrInfo.h:869
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &)
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
self_iterator getIterator()
Definition: ilist_node.h:132
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isTrue16Inst(unsigned Opc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:563
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
FunctionPass * createSIShrinkInstructionsLegacyPass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
constexpr bool any() const
Definition: LaneBitmask.h:53
A pair composed of a register and a sub-register index.