LLVM 19.0.0git
SIShrinkInstructions.cpp
Go to the documentation of this file.
1//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// The pass tries to use the 32-bit encoding for instructions when possible.
8//===----------------------------------------------------------------------===//
9//
10
11#include "AMDGPU.h"
12#include "GCNSubtarget.h"
15#include "llvm/ADT/Statistic.h"
17
18#define DEBUG_TYPE "si-shrink-instructions"
19
20STATISTIC(NumInstructionsShrunk,
21 "Number of 64-bit instruction reduced to 32-bit.");
22STATISTIC(NumLiteralConstantsFolded,
23 "Number of literal constants folded into 32-bit instructions.");
24
25using namespace llvm;
26
27namespace {
28
29class SIShrinkInstructions : public MachineFunctionPass {
32 const GCNSubtarget *ST;
33 const SIInstrInfo *TII;
34 const SIRegisterInfo *TRI;
35
36public:
37 static char ID;
38
39public:
40 SIShrinkInstructions() : MachineFunctionPass(ID) {
41 }
42
43 bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
44 bool shouldShrinkTrue16(MachineInstr &MI) const;
45 bool isKImmOperand(const MachineOperand &Src) const;
46 bool isKUImmOperand(const MachineOperand &Src) const;
47 bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
48 void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
49 void shrinkScalarCompare(MachineInstr &MI) const;
50 void shrinkMIMG(MachineInstr &MI) const;
51 void shrinkMadFma(MachineInstr &MI) const;
52 bool shrinkScalarLogicOp(MachineInstr &MI) const;
53 bool tryReplaceDeadSDST(MachineInstr &MI) const;
55 Register Reg, unsigned SubReg) const;
56 bool instReadsReg(const MachineInstr *MI, unsigned Reg,
57 unsigned SubReg) const;
58 bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
59 unsigned SubReg) const;
60 TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
61 unsigned I) const;
62 void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
63 MachineInstr *matchSwap(MachineInstr &MovT) const;
64
65 bool runOnMachineFunction(MachineFunction &MF) override;
66
67 StringRef getPassName() const override { return "SI Shrink Instructions"; }
68
69 void getAnalysisUsage(AnalysisUsage &AU) const override {
70 AU.setPreservesCFG();
72 }
73};
74
75} // End anonymous namespace.
76
77INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
78 "SI Shrink Instructions", false, false)
79
80char SIShrinkInstructions::ID = 0;
81
83 return new SIShrinkInstructions();
84}
85
86/// This function checks \p MI for operands defined by a move immediate
87/// instruction and then folds the literal constant into the instruction if it
88/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
89bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
90 bool TryToCommute) const {
91 assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
92
93 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
94
95 // Try to fold Src0
96 MachineOperand &Src0 = MI.getOperand(Src0Idx);
97 if (Src0.isReg()) {
98 Register Reg = Src0.getReg();
99 if (Reg.isVirtual()) {
100 MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
101 if (Def && Def->isMoveImmediate()) {
102 MachineOperand &MovSrc = Def->getOperand(1);
103 bool ConstantFolded = false;
104
105 if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
106 if (MovSrc.isImm()) {
107 Src0.ChangeToImmediate(MovSrc.getImm());
108 ConstantFolded = true;
109 } else if (MovSrc.isFI()) {
110 Src0.ChangeToFrameIndex(MovSrc.getIndex());
111 ConstantFolded = true;
112 } else if (MovSrc.isGlobal()) {
113 Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
114 MovSrc.getTargetFlags());
115 ConstantFolded = true;
116 }
117 }
118
119 if (ConstantFolded) {
120 if (MRI->use_nodbg_empty(Reg))
121 Def->eraseFromParent();
122 ++NumLiteralConstantsFolded;
123 return true;
124 }
125 }
126 }
127 }
128
129 // We have failed to fold src0, so commute the instruction and try again.
130 if (TryToCommute && MI.isCommutable()) {
131 if (TII->commuteInstruction(MI)) {
132 if (foldImmediates(MI, false))
133 return true;
134
135 // Commute back.
136 TII->commuteInstruction(MI);
137 }
138 }
139
140 return false;
141}
142
143/// Do not shrink the instruction if its registers are not expressible in the
144/// shrunk encoding.
145bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
146 for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
147 const MachineOperand &MO = MI.getOperand(I);
148 if (MO.isReg()) {
149 Register Reg = MO.getReg();
150 assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
151 "True16 Instructions post-RA");
152 if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
153 !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
154 return false;
155 }
156 }
157 return true;
158}
159
160bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
161 return isInt<16>(SignExtend64(Src.getImm(), 32)) &&
162 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
163}
164
165bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
166 return isUInt<16>(Src.getImm()) &&
167 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
168}
169
170bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
171 bool &IsUnsigned) const {
172 if (isInt<16>(SignExtend64(Src.getImm(), 32))) {
173 IsUnsigned = false;
174 return !TII->isInlineConstant(Src);
175 }
176
177 if (isUInt<16>(Src.getImm())) {
178 IsUnsigned = true;
179 return !TII->isInlineConstant(Src);
180 }
181
182 return false;
183}
184
185/// \returns the opcode of an instruction a move immediate of the constant \p
186/// Src can be replaced with if the constant is replaced with \p ModifiedImm.
187/// i.e.
188///
189/// If the bitreverse of a constant is an inline immediate, reverse the
190/// immediate and return the bitreverse opcode.
191///
192/// If the bitwise negation of a constant is an inline immediate, reverse the
193/// immediate and return the bitwise not opcode.
195 const MachineOperand &Src,
196 int32_t &ModifiedImm, bool Scalar) {
197 if (TII->isInlineConstant(Src))
198 return 0;
199 int32_t SrcImm = static_cast<int32_t>(Src.getImm());
200
201 if (!Scalar) {
202 // We could handle the scalar case with here, but we would need to check
203 // that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth
204 // it, as the reasonable values are already covered by s_movk_i32.
205 ModifiedImm = ~SrcImm;
206 if (TII->isInlineConstant(APInt(32, ModifiedImm)))
207 return AMDGPU::V_NOT_B32_e32;
208 }
209
210 ModifiedImm = reverseBits<int32_t>(SrcImm);
211 if (TII->isInlineConstant(APInt(32, ModifiedImm)))
212 return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;
213
214 return 0;
215}
216
217/// Copy implicit register operands from specified instruction to this
218/// instruction that are not part of the instruction definition.
219void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
220 MachineInstr &MI) const {
221 MachineFunction &MF = *MI.getMF();
222 for (unsigned i = MI.getDesc().getNumOperands() +
223 MI.getDesc().implicit_uses().size() +
224 MI.getDesc().implicit_defs().size(),
225 e = MI.getNumOperands();
226 i != e; ++i) {
227 const MachineOperand &MO = MI.getOperand(i);
228 if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
229 NewMI.addOperand(MF, MO);
230 }
231}
232
233void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
234 if (!ST->hasSCmpK())
235 return;
236
237 // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
238 // get constants on the RHS.
239 if (!MI.getOperand(0).isReg())
240 TII->commuteInstruction(MI, false, 0, 1);
241
242 // cmpk requires src0 to be a register
243 const MachineOperand &Src0 = MI.getOperand(0);
244 if (!Src0.isReg())
245 return;
246
247 MachineOperand &Src1 = MI.getOperand(1);
248 if (!Src1.isImm())
249 return;
250
251 int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
252 if (SOPKOpc == -1)
253 return;
254
255 // eq/ne is special because the imm16 can be treated as signed or unsigned,
256 // and initially selected to the unsigned versions.
257 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
258 bool HasUImm;
259 if (isKImmOrKUImmOperand(Src1, HasUImm)) {
260 if (!HasUImm) {
261 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
262 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
263 Src1.setImm(SignExtend32(Src1.getImm(), 32));
264 }
265
266 MI.setDesc(TII->get(SOPKOpc));
267 }
268
269 return;
270 }
271
272 const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
273
274 if ((SIInstrInfo::sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
275 (!SIInstrInfo::sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
276 if (!SIInstrInfo::sopkIsZext(SOPKOpc))
277 Src1.setImm(SignExtend64(Src1.getImm(), 32));
278 MI.setDesc(NewDesc);
279 }
280}
281
282// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
283void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
284 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
285 if (!Info)
286 return;
287
288 uint8_t NewEncoding;
289 switch (Info->MIMGEncoding) {
290 case AMDGPU::MIMGEncGfx10NSA:
291 NewEncoding = AMDGPU::MIMGEncGfx10Default;
292 break;
293 case AMDGPU::MIMGEncGfx11NSA:
294 NewEncoding = AMDGPU::MIMGEncGfx11Default;
295 break;
296 default:
297 return;
298 }
299
300 int VAddr0Idx =
301 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
302 unsigned NewAddrDwords = Info->VAddrDwords;
303 const TargetRegisterClass *RC;
304
305 if (Info->VAddrDwords == 2) {
306 RC = &AMDGPU::VReg_64RegClass;
307 } else if (Info->VAddrDwords == 3) {
308 RC = &AMDGPU::VReg_96RegClass;
309 } else if (Info->VAddrDwords == 4) {
310 RC = &AMDGPU::VReg_128RegClass;
311 } else if (Info->VAddrDwords == 5) {
312 RC = &AMDGPU::VReg_160RegClass;
313 } else if (Info->VAddrDwords == 6) {
314 RC = &AMDGPU::VReg_192RegClass;
315 } else if (Info->VAddrDwords == 7) {
316 RC = &AMDGPU::VReg_224RegClass;
317 } else if (Info->VAddrDwords == 8) {
318 RC = &AMDGPU::VReg_256RegClass;
319 } else if (Info->VAddrDwords == 9) {
320 RC = &AMDGPU::VReg_288RegClass;
321 } else if (Info->VAddrDwords == 10) {
322 RC = &AMDGPU::VReg_320RegClass;
323 } else if (Info->VAddrDwords == 11) {
324 RC = &AMDGPU::VReg_352RegClass;
325 } else if (Info->VAddrDwords == 12) {
326 RC = &AMDGPU::VReg_384RegClass;
327 } else {
328 RC = &AMDGPU::VReg_512RegClass;
329 NewAddrDwords = 16;
330 }
331
332 unsigned VgprBase = 0;
333 unsigned NextVgpr = 0;
334 bool IsUndef = true;
335 bool IsKill = NewAddrDwords == Info->VAddrDwords;
336 const unsigned NSAMaxSize = ST->getNSAMaxSize();
337 const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
338 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;
339 for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {
340 const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
341 unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
342 unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
343 assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");
344
345 if (Idx == 0) {
346 VgprBase = Vgpr;
347 NextVgpr = Vgpr + Dwords;
348 } else if (Vgpr == NextVgpr) {
349 NextVgpr = Vgpr + Dwords;
350 } else {
351 return;
352 }
353
354 if (!Op.isUndef())
355 IsUndef = false;
356 if (!Op.isKill())
357 IsKill = false;
358 }
359
360 if (VgprBase + NewAddrDwords > 256)
361 return;
362
363 // Further check for implicit tied operands - this may be present if TFE is
364 // enabled
365 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
366 int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
367 unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
368 unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
369 int ToUntie = -1;
370 if (TFEVal || LWEVal) {
371 // TFE/LWE is enabled so we need to deal with an implicit tied operand
372 for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
373 if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
374 MI.getOperand(i).isImplicit()) {
375 // This is the tied operand
376 assert(
377 ToUntie == -1 &&
378 "found more than one tied implicit operand when expecting only 1");
379 ToUntie = i;
380 MI.untieRegOperand(ToUntie);
381 }
382 }
383 }
384
385 unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,
386 Info->VDataDwords, NewAddrDwords);
387 MI.setDesc(TII->get(NewOpcode));
388 MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
389 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
390 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
391
392 for (unsigned i = 1; i < EndVAddr; ++i)
393 MI.removeOperand(VAddr0Idx + 1);
394
395 if (ToUntie >= 0) {
396 MI.tieOperands(
397 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
398 ToUntie - (EndVAddr - 1));
399 }
400}
401
402// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
403void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
404 // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
405 // there is no reason to try to shrink them.
406 if (!ST->hasVOP3Literal())
407 return;
408
409 // There is no advantage to doing this pre-RA.
410 if (!MF->getProperties().hasProperty(
411 MachineFunctionProperties::Property::NoVRegs))
412 return;
413
414 if (TII->hasAnyModifiersSet(MI))
415 return;
416
417 const unsigned Opcode = MI.getOpcode();
418 MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
419 MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);
420 MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
421 unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
422
423 bool Swap;
424
425 // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.
426 if (Src2.isImm() && !TII->isInlineConstant(Src2)) {
427 if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))
428 Swap = false;
429 else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
430 Swap = true;
431 else
432 return;
433
434 switch (Opcode) {
435 default:
436 llvm_unreachable("Unexpected mad/fma opcode!");
437 case AMDGPU::V_MAD_F32_e64:
438 NewOpcode = AMDGPU::V_MADAK_F32;
439 break;
440 case AMDGPU::V_FMA_F32_e64:
441 NewOpcode = AMDGPU::V_FMAAK_F32;
442 break;
443 case AMDGPU::V_MAD_F16_e64:
444 NewOpcode = AMDGPU::V_MADAK_F16;
445 break;
446 case AMDGPU::V_FMA_F16_e64:
447 case AMDGPU::V_FMA_F16_gfx9_e64:
448 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
449 : AMDGPU::V_FMAAK_F16;
450 break;
451 }
452 }
453
454 // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.
455 if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {
456 if (Src1.isImm() && !TII->isInlineConstant(Src1))
457 Swap = false;
458 else if (Src0.isImm() && !TII->isInlineConstant(Src0))
459 Swap = true;
460 else
461 return;
462
463 switch (Opcode) {
464 default:
465 llvm_unreachable("Unexpected mad/fma opcode!");
466 case AMDGPU::V_MAD_F32_e64:
467 NewOpcode = AMDGPU::V_MADMK_F32;
468 break;
469 case AMDGPU::V_FMA_F32_e64:
470 NewOpcode = AMDGPU::V_FMAMK_F32;
471 break;
472 case AMDGPU::V_MAD_F16_e64:
473 NewOpcode = AMDGPU::V_MADMK_F16;
474 break;
475 case AMDGPU::V_FMA_F16_e64:
476 case AMDGPU::V_FMA_F16_gfx9_e64:
477 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
478 : AMDGPU::V_FMAMK_F16;
479 break;
480 }
481 }
482
483 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
484 return;
485
486 if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
487 return;
488
489 if (Swap) {
490 // Swap Src0 and Src1 by building a new instruction.
491 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),
492 MI.getOperand(0).getReg())
493 .add(Src1)
494 .add(Src0)
495 .add(Src2)
496 .setMIFlags(MI.getFlags());
497 MI.eraseFromParent();
498 } else {
499 TII->removeModOperands(MI);
500 MI.setDesc(TII->get(NewOpcode));
501 }
502}
503
504/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
505/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
506/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
507/// XNOR (as a ^ b == ~(a ^ ~b)).
508/// \returns true if the caller should continue the machine function iterator
509bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
510 unsigned Opc = MI.getOpcode();
511 const MachineOperand *Dest = &MI.getOperand(0);
512 MachineOperand *Src0 = &MI.getOperand(1);
513 MachineOperand *Src1 = &MI.getOperand(2);
514 MachineOperand *SrcReg = Src0;
515 MachineOperand *SrcImm = Src1;
516
517 if (!SrcImm->isImm() ||
518 AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
519 return false;
520
521 uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
522 uint32_t NewImm = 0;
523
524 if (Opc == AMDGPU::S_AND_B32) {
525 if (isPowerOf2_32(~Imm)) {
526 NewImm = llvm::countr_one(Imm);
527 Opc = AMDGPU::S_BITSET0_B32;
528 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
529 NewImm = ~Imm;
530 Opc = AMDGPU::S_ANDN2_B32;
531 }
532 } else if (Opc == AMDGPU::S_OR_B32) {
533 if (isPowerOf2_32(Imm)) {
534 NewImm = llvm::countr_zero(Imm);
535 Opc = AMDGPU::S_BITSET1_B32;
536 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
537 NewImm = ~Imm;
538 Opc = AMDGPU::S_ORN2_B32;
539 }
540 } else if (Opc == AMDGPU::S_XOR_B32) {
541 if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
542 NewImm = ~Imm;
543 Opc = AMDGPU::S_XNOR_B32;
544 }
545 } else {
546 llvm_unreachable("unexpected opcode");
547 }
548
549 if (NewImm != 0) {
550 if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
551 MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
552 MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
553 return true;
554 }
555
556 if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
557 const bool IsUndef = SrcReg->isUndef();
558 const bool IsKill = SrcReg->isKill();
559 MI.setDesc(TII->get(Opc));
560 if (Opc == AMDGPU::S_BITSET0_B32 ||
561 Opc == AMDGPU::S_BITSET1_B32) {
562 Src0->ChangeToImmediate(NewImm);
563 // Remove the immediate and add the tied input.
564 MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,
565 /*isImp*/ false, IsKill,
566 /*isDead*/ false, IsUndef);
567 MI.tieOperands(0, 2);
568 } else {
569 SrcImm->setImm(NewImm);
570 }
571 }
572 }
573
574 return false;
575}
576
577// This is the same as MachineInstr::readsRegister/modifiesRegister except
578// it takes subregs into account.
579bool SIShrinkInstructions::instAccessReg(
581 unsigned SubReg) const {
582 for (const MachineOperand &MO : R) {
583 if (!MO.isReg())
584 continue;
585
586 if (Reg.isPhysical() && MO.getReg().isPhysical()) {
587 if (TRI->regsOverlap(Reg, MO.getReg()))
588 return true;
589 } else if (MO.getReg() == Reg && Reg.isVirtual()) {
590 LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &
591 TRI->getSubRegIndexLaneMask(MO.getSubReg());
592 if (Overlap.any())
593 return true;
594 }
595 }
596 return false;
597}
598
599bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,
600 unsigned SubReg) const {
601 return instAccessReg(MI->uses(), Reg, SubReg);
602}
603
604bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,
605 unsigned SubReg) const {
606 return instAccessReg(MI->defs(), Reg, SubReg);
607}
608
610SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
611 unsigned I) const {
612 if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {
613 if (Reg.isPhysical()) {
614 Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));
615 } else {
616 Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));
617 }
618 }
619 return TargetInstrInfo::RegSubRegPair(Reg, Sub);
620}
621
622void SIShrinkInstructions::dropInstructionKeepingImpDefs(
623 MachineInstr &MI) const {
624 for (unsigned i = MI.getDesc().getNumOperands() +
625 MI.getDesc().implicit_uses().size() +
626 MI.getDesc().implicit_defs().size(),
627 e = MI.getNumOperands();
628 i != e; ++i) {
629 const MachineOperand &Op = MI.getOperand(i);
630 if (!Op.isDef())
631 continue;
632 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
633 TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
634 }
635
636 MI.eraseFromParent();
637}
638
639// Match:
640// mov t, x
641// mov x, y
642// mov y, t
643//
644// =>
645//
646// mov t, x (t is potentially dead and move eliminated)
647// v_swap_b32 x, y
648//
649// Returns next valid instruction pointer if was able to create v_swap_b32.
650//
651// This shall not be done too early not to prevent possible folding which may
652// remove matched moves, and this should preferably be done before RA to
653// release saved registers and also possibly after RA which can insert copies
654// too.
655//
656// This is really just a generic peephole that is not a canonical shrinking,
657// although requirements match the pass placement and it reduces code size too.
658MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
659 assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
660 MovT.getOpcode() == AMDGPU::COPY);
661
662 Register T = MovT.getOperand(0).getReg();
663 unsigned Tsub = MovT.getOperand(0).getSubReg();
664 MachineOperand &Xop = MovT.getOperand(1);
665
666 if (!Xop.isReg())
667 return nullptr;
668 Register X = Xop.getReg();
669 unsigned Xsub = Xop.getSubReg();
670
671 unsigned Size = TII->getOpSize(MovT, 0) / 4;
672
673 if (!TRI->isVGPR(*MRI, X))
674 return nullptr;
675
676 const unsigned SearchLimit = 16;
677 unsigned Count = 0;
678 bool KilledT = false;
679 for (auto Iter = std::next(MovT.getIterator()),
680 E = MovT.getParent()->instr_end();
681 Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
682
683 MachineInstr *MovY = &*Iter;
684 KilledT = MovY->killsRegister(T, TRI);
685
686 if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
687 MovY->getOpcode() != AMDGPU::COPY) ||
688 !MovY->getOperand(1).isReg() ||
689 MovY->getOperand(1).getReg() != T ||
690 MovY->getOperand(1).getSubReg() != Tsub)
691 continue;
692
693 Register Y = MovY->getOperand(0).getReg();
694 unsigned Ysub = MovY->getOperand(0).getSubReg();
695
696 if (!TRI->isVGPR(*MRI, Y))
697 continue;
698
699 MachineInstr *MovX = nullptr;
700 for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
701 I != IY; ++I) {
702 if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||
703 instModifiesReg(&*I, T, Tsub) ||
704 (MovX && instModifiesReg(&*I, X, Xsub))) {
705 MovX = nullptr;
706 break;
707 }
708 if (!instReadsReg(&*I, Y, Ysub)) {
709 if (!MovX && instModifiesReg(&*I, X, Xsub)) {
710 MovX = nullptr;
711 break;
712 }
713 continue;
714 }
715 if (MovX ||
716 (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
717 I->getOpcode() != AMDGPU::COPY) ||
718 I->getOperand(0).getReg() != X ||
719 I->getOperand(0).getSubReg() != Xsub) {
720 MovX = nullptr;
721 break;
722 }
723
724 if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
725 continue;
726
727 MovX = &*I;
728 }
729
730 if (!MovX)
731 continue;
732
733 LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
734
735 for (unsigned I = 0; I < Size; ++I) {
737 X1 = getSubRegForIndex(X, Xsub, I);
738 Y1 = getSubRegForIndex(Y, Ysub, I);
740 auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
741 TII->get(AMDGPU::V_SWAP_B32))
742 .addDef(X1.Reg, 0, X1.SubReg)
743 .addDef(Y1.Reg, 0, Y1.SubReg)
744 .addReg(Y1.Reg, 0, Y1.SubReg)
745 .addReg(X1.Reg, 0, X1.SubReg).getInstr();
746 if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
747 // Drop implicit EXEC.
748 MIB->removeOperand(MIB->getNumExplicitOperands());
749 MIB->copyImplicitOps(*MBB.getParent(), *MovX);
750 }
751 }
752 MovX->eraseFromParent();
753 dropInstructionKeepingImpDefs(*MovY);
754 MachineInstr *Next = &*std::next(MovT.getIterator());
755
756 if (T.isVirtual() && MRI->use_nodbg_empty(T)) {
757 dropInstructionKeepingImpDefs(MovT);
758 } else {
759 Xop.setIsKill(false);
760 for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
761 unsigned OpNo = MovT.getNumExplicitOperands() + I;
762 const MachineOperand &Op = MovT.getOperand(OpNo);
763 if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))
764 MovT.removeOperand(OpNo);
765 }
766 }
767
768 return Next;
769 }
770
771 return nullptr;
772}
773
774// If an instruction has dead sdst replace it with NULL register on gfx1030+
775bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
776 if (!ST->hasGFX10_3Insts())
777 return false;
778
779 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
780 if (!Op)
781 return false;
782 Register SDstReg = Op->getReg();
783 if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
784 return false;
785
786 Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
787 return true;
788}
789
790bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
791 if (skipFunction(MF.getFunction()))
792 return false;
793
794 this->MF = &MF;
795 MRI = &MF.getRegInfo();
797 TII = ST->getInstrInfo();
798 TRI = &TII->getRegisterInfo();
799
800 unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
801
802 std::vector<unsigned> I1Defs;
803
804 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
805 BI != BE; ++BI) {
806
807 MachineBasicBlock &MBB = *BI;
809 for (I = MBB.begin(); I != MBB.end(); I = Next) {
810 Next = std::next(I);
811 MachineInstr &MI = *I;
812
813 if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
814 // If this has a literal constant source that is the same as the
815 // reversed bits of an inline immediate, replace with a bitreverse of
816 // that constant. This saves 4 bytes in the common case of materializing
817 // sign bits.
818
819 // Test if we are after regalloc. We only want to do this after any
820 // optimizations happen because this will confuse them.
821 // XXX - not exactly a check for post-regalloc run.
822 MachineOperand &Src = MI.getOperand(1);
823 if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
824 int32_t ModImm;
825 unsigned ModOpcode =
826 canModifyToInlineImmOp32(TII, Src, ModImm, /*Scalar=*/false);
827 if (ModOpcode != 0) {
828 MI.setDesc(TII->get(ModOpcode));
829 Src.setImm(static_cast<int64_t>(ModImm));
830 continue;
831 }
832 }
833 }
834
835 if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
836 MI.getOpcode() == AMDGPU::COPY)) {
837 if (auto *NextMI = matchSwap(MI)) {
838 Next = NextMI->getIterator();
839 continue;
840 }
841 }
842
843 // Try to use S_ADDK_I32 and S_MULK_I32.
844 if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
845 MI.getOpcode() == AMDGPU::S_MUL_I32) {
846 const MachineOperand *Dest = &MI.getOperand(0);
847 MachineOperand *Src0 = &MI.getOperand(1);
848 MachineOperand *Src1 = &MI.getOperand(2);
849
850 if (!Src0->isReg() && Src1->isReg()) {
851 if (TII->commuteInstruction(MI, false, 1, 2))
852 std::swap(Src0, Src1);
853 }
854
855 // FIXME: This could work better if hints worked with subregisters. If
856 // we have a vector add of a constant, we usually don't get the correct
857 // allocation due to the subregister usage.
858 if (Dest->getReg().isVirtual() && Src0->isReg()) {
859 MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
860 MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
861 continue;
862 }
863
864 if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
865 if (Src1->isImm() && isKImmOperand(*Src1)) {
866 unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
867 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
868
869 Src1->setImm(SignExtend64(Src1->getImm(), 32));
870 MI.setDesc(TII->get(Opc));
871 MI.tieOperands(0, 1);
872 }
873 }
874 }
875
876 // Try to use s_cmpk_*
877 if (MI.isCompare() && TII->isSOPC(MI)) {
878 shrinkScalarCompare(MI);
879 continue;
880 }
881
882 // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
883 if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
884 const MachineOperand &Dst = MI.getOperand(0);
885 MachineOperand &Src = MI.getOperand(1);
886
887 if (Src.isImm() && Dst.getReg().isPhysical()) {
888 unsigned ModOpc;
889 int32_t ModImm;
890 if (isKImmOperand(Src)) {
891 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
892 Src.setImm(SignExtend64(Src.getImm(), 32));
893 } else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
894 /*Scalar=*/true))) {
895 MI.setDesc(TII->get(ModOpc));
896 Src.setImm(static_cast<int64_t>(ModImm));
897 }
898 }
899
900 continue;
901 }
902
903 // Shrink scalar logic operations.
904 if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
905 MI.getOpcode() == AMDGPU::S_OR_B32 ||
906 MI.getOpcode() == AMDGPU::S_XOR_B32) {
907 if (shrinkScalarLogicOp(MI))
908 continue;
909 }
910
911 if (TII->isMIMG(MI.getOpcode()) &&
912 ST->getGeneration() >= AMDGPUSubtarget::GFX10 &&
914 MachineFunctionProperties::Property::NoVRegs)) {
915 shrinkMIMG(MI);
916 continue;
917 }
918
919 if (!TII->isVOP3(MI))
920 continue;
921
922 if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
923 MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
924 MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
925 MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
926 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
927 shrinkMadFma(MI);
928 continue;
929 }
930
931 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
932 // If there is no chance we will shrink it and use VCC as sdst to get
933 // a 32 bit form try to replace dead sdst with NULL.
934 tryReplaceDeadSDST(MI);
935 continue;
936 }
937
938 if (!TII->canShrink(MI, *MRI)) {
939 // Try commuting the instruction and see if that enables us to shrink
940 // it.
941 if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
942 !TII->canShrink(MI, *MRI)) {
943 tryReplaceDeadSDST(MI);
944 continue;
945 }
946 }
947
948 int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
949
950 if (TII->isVOPC(Op32)) {
951 MachineOperand &Op0 = MI.getOperand(0);
952 if (Op0.isReg()) {
953 // Exclude VOPCX instructions as these don't explicitly write a
954 // dst.
955 Register DstReg = Op0.getReg();
956 if (DstReg.isVirtual()) {
957 // VOPC instructions can only write to the VCC register. We can't
958 // force them to use VCC here, because this is only one register and
959 // cannot deal with sequences which would require multiple copies of
960 // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
961 //
962 // So, instead of forcing the instruction to write to VCC, we
963 // provide a hint to the register allocator to use VCC and then we
964 // will run this pass again after RA and shrink it if it outputs to
965 // VCC.
966 MRI->setRegAllocationHint(DstReg, 0, VCCReg);
967 continue;
968 }
969 if (DstReg != VCCReg)
970 continue;
971 }
972 }
973
974 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
975 // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
976 // instructions.
977 const MachineOperand *Src2 =
978 TII->getNamedOperand(MI, AMDGPU::OpName::src2);
979 if (!Src2->isReg())
980 continue;
981 Register SReg = Src2->getReg();
982 if (SReg.isVirtual()) {
983 MRI->setRegAllocationHint(SReg, 0, VCCReg);
984 continue;
985 }
986 if (SReg != VCCReg)
987 continue;
988 }
989
990 // Check for the bool flag output for instructions like V_ADD_I32_e64.
991 const MachineOperand *SDst = TII->getNamedOperand(MI,
992 AMDGPU::OpName::sdst);
993
994 if (SDst) {
995 bool Next = false;
996
997 if (SDst->getReg() != VCCReg) {
998 if (SDst->getReg().isVirtual())
999 MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);
1000 Next = true;
1001 }
1002
1003 // All of the instructions with carry outs also have an SGPR input in
1004 // src2.
1005 const MachineOperand *Src2 = TII->getNamedOperand(MI,
1006 AMDGPU::OpName::src2);
1007 if (Src2 && Src2->getReg() != VCCReg) {
1008 if (Src2->getReg().isVirtual())
1009 MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);
1010 Next = true;
1011 }
1012
1013 if (Next)
1014 continue;
1015 }
1016
1017 // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to
1018 // fold an immediate into the shrunk instruction as a literal operand. In
1019 // GFX10 VOP3 instructions can take a literal operand anyway, so there is
1020 // no advantage to doing this.
1021 if (ST->hasVOP3Literal() &&
1023 MachineFunctionProperties::Property::NoVRegs))
1024 continue;
1025
1026 if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
1027 !shouldShrinkTrue16(MI))
1028 continue;
1029
1030 // We can shrink this instruction
1031 LLVM_DEBUG(dbgs() << "Shrinking " << MI);
1032
1033 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
1034 ++NumInstructionsShrunk;
1035
1036 // Copy extra operands not present in the instruction definition.
1037 copyExtraImplicitOps(*Inst32, MI);
1038
1039 // Copy deadness from the old explicit vcc def to the new implicit def.
1040 if (SDst && SDst->isDead())
1041 Inst32->findRegisterDefOperand(VCCReg, /*TRI=*/nullptr)->setIsDead();
1042
1043 MI.eraseFromParent();
1044 foldImmediates(*Inst32);
1045
1046 LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
1047 }
1048 }
1049 return false;
1050}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
Provides AMDGPU specific target descriptions.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ModifiedImm, bool Scalar)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Class for arbitrary precision integers.
Definition: APInt.h:77
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
Definition: MachineInstr.h:651
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
static bool sopkIsZext(unsigned Opcode)
Definition: SIInstrInfo.h:863
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
self_iterator getIterator()
Definition: ilist_node.h:132
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isTrue16Inst(unsigned Opc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
FunctionPass * createSIShrinkInstructionsPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:491
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:509
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
constexpr bool any() const
Definition: LaneBitmask.h:53
A pair composed of a register and a sub-register index.