File: | build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp |
Warning: | line 107, column 12 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | /// \file This file contains the AMDGPU DAG scheduling | |||
10 | /// mutation to pair VOPD instructions back to back. It also contains | |||
11 | // subroutines useful in the creation of VOPD instructions | |||
12 | // | |||
13 | //===----------------------------------------------------------------------===// | |||
14 | ||||
15 | #include "GCNVOPDUtils.h" | |||
16 | #include "AMDGPUSubtarget.h" | |||
17 | #include "GCNSubtarget.h" | |||
18 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | |||
19 | #include "SIInstrInfo.h" | |||
20 | #include "llvm/ADT/STLExtras.h" | |||
21 | #include "llvm/ADT/SmallVector.h" | |||
22 | #include "llvm/CodeGen/MachineBasicBlock.h" | |||
23 | #include "llvm/CodeGen/MachineInstr.h" | |||
24 | #include "llvm/CodeGen/MachineOperand.h" | |||
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
26 | #include "llvm/CodeGen/MacroFusion.h" | |||
27 | #include "llvm/CodeGen/ScheduleDAG.h" | |||
28 | #include "llvm/CodeGen/ScheduleDAGMutation.h" | |||
29 | #include "llvm/CodeGen/TargetInstrInfo.h" | |||
30 | #include "llvm/MC/MCInst.h" | |||
31 | ||||
32 | using namespace llvm; | |||
33 | ||||
34 | #define DEBUG_TYPE"gcn-vopd-utils" "gcn-vopd-utils" | |||
35 | ||||
36 | bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, | |||
37 | const MachineInstr &FirstMI, | |||
38 | const MachineInstr &SecondMI) { | |||
39 | const MachineFunction *MF = FirstMI.getMF(); | |||
40 | const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); | |||
41 | const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo()); | |||
| ||||
42 | const MachineRegisterInfo &MRI = MF->getRegInfo(); | |||
43 | const unsigned NumVGPRBanks = 4; | |||
44 | // Literals also count against scalar bus limit | |||
45 | SmallVector<const MachineOperand *> UniqueLiterals; | |||
46 | auto addLiteral = [&](const MachineOperand &Op) { | |||
47 | for (auto &Literal : UniqueLiterals) { | |||
48 | if (Literal->isIdenticalTo(Op)) | |||
49 | return; | |||
50 | } | |||
51 | UniqueLiterals.push_back(&Op); | |||
52 | }; | |||
53 | SmallVector<Register> UniqueScalarRegs; | |||
54 | assert([&]() -> bool {(static_cast <bool> ([&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI .getParent()->instr_end(); ++MII) { if (&*MII == & SecondMI) return true; } return false; }() && "Expected FirstMI to precede SecondMI" ) ? void (0) : __assert_fail ("[&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI.getParent()->instr_end(); ++MII) { if (&*MII == &SecondMI) return true; } return false; }() && \"Expected FirstMI to precede SecondMI\"" , "llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp", 61, __extension__ __PRETTY_FUNCTION__)) | |||
55 | for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);(static_cast <bool> ([&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI .getParent()->instr_end(); ++MII) { if (&*MII == & SecondMI) return true; } return false; }() && "Expected FirstMI to precede SecondMI" ) ? void (0) : __assert_fail ("[&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI.getParent()->instr_end(); ++MII) { if (&*MII == &SecondMI) return true; } return false; }() && \"Expected FirstMI to precede SecondMI\"" , "llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp", 61, __extension__ __PRETTY_FUNCTION__)) | |||
56 | MII != FirstMI.getParent()->instr_end(); ++MII) {(static_cast <bool> ([&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI .getParent()->instr_end(); ++MII) { if (&*MII == & SecondMI) return true; } return false; }() && "Expected FirstMI to precede SecondMI" ) ? void (0) : __assert_fail ("[&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI.getParent()->instr_end(); ++MII) { if (&*MII == &SecondMI) return true; } return false; }() && \"Expected FirstMI to precede SecondMI\"" , "llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp", 61, __extension__ __PRETTY_FUNCTION__)) | |||
57 | if (&*MII == &SecondMI)(static_cast <bool> ([&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI .getParent()->instr_end(); ++MII) { if (&*MII == & SecondMI) return true; } return false; }() && "Expected FirstMI to precede SecondMI" ) ? void (0) : __assert_fail ("[&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI.getParent()->instr_end(); ++MII) { if (&*MII == &SecondMI) return true; } return false; }() && \"Expected FirstMI to precede SecondMI\"" , "llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp", 61, __extension__ __PRETTY_FUNCTION__)) | |||
58 | return true;(static_cast <bool> ([&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI .getParent()->instr_end(); ++MII) { if (&*MII == & SecondMI) return true; } return false; }() && "Expected FirstMI to precede SecondMI" ) ? void (0) : __assert_fail ("[&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI.getParent()->instr_end(); ++MII) { if (&*MII == &SecondMI) return true; } return false; }() && \"Expected FirstMI to precede SecondMI\"" , "llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp", 61, __extension__ __PRETTY_FUNCTION__)) | |||
59 | }(static_cast <bool> ([&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI .getParent()->instr_end(); ++MII) { if (&*MII == & SecondMI) return true; } return false; }() && "Expected FirstMI to precede SecondMI" ) ? void (0) : __assert_fail ("[&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI.getParent()->instr_end(); ++MII) { if (&*MII == &SecondMI) return true; } return false; }() && \"Expected FirstMI to precede SecondMI\"" , "llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp", 61, __extension__ __PRETTY_FUNCTION__)) | |||
60 | return false;(static_cast <bool> ([&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI .getParent()->instr_end(); ++MII) { if (&*MII == & SecondMI) return true; } return false; }() && "Expected FirstMI to precede SecondMI" ) ? void (0) : __assert_fail ("[&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI.getParent()->instr_end(); ++MII) { if (&*MII == &SecondMI) return true; } return false; }() && \"Expected FirstMI to precede SecondMI\"" , "llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp", 61, __extension__ __PRETTY_FUNCTION__)) | |||
61 | }() && "Expected FirstMI to precede SecondMI")(static_cast <bool> ([&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI .getParent()->instr_end(); ++MII) { if (&*MII == & SecondMI) return true; } return false; }() && "Expected FirstMI to precede SecondMI" ) ? void (0) : __assert_fail ("[&]() -> bool { for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); MII != FirstMI.getParent()->instr_end(); ++MII) { if (&*MII == &SecondMI) return true; } return false; }() && \"Expected FirstMI to precede SecondMI\"" , "llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp", 61, __extension__ __PRETTY_FUNCTION__)); | |||
62 | // Cannot pair dependent instructions | |||
63 | for (const auto &Use : SecondMI.uses()) | |||
64 | if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg())) | |||
65 | return false; | |||
66 | ||||
67 | struct ComponentInfo { | |||
68 | ComponentInfo(const MachineInstr &MI) : MI(MI) {} | |||
69 | Register Dst, Reg0, Reg1, Reg2; | |||
70 | const MachineInstr &MI; | |||
71 | }; | |||
72 | ComponentInfo CInfo[] = {ComponentInfo(FirstMI), ComponentInfo(SecondMI)}; | |||
73 | ||||
74 | for (ComponentInfo &Comp : CInfo) { | |||
75 | switch (Comp.MI.getOpcode()) { | |||
76 | case AMDGPU::V_FMAMK_F32: | |||
77 | // cannot inline the fixed literal in fmamk | |||
78 | addLiteral(Comp.MI.getOperand(2)); | |||
79 | Comp.Reg2 = Comp.MI.getOperand(3).getReg(); | |||
80 | break; | |||
81 | case AMDGPU::V_FMAAK_F32: | |||
82 | // cannot inline the fixed literal in fmaak | |||
83 | addLiteral(Comp.MI.getOperand(3)); | |||
84 | Comp.Reg1 = Comp.MI.getOperand(2).getReg(); | |||
85 | break; | |||
86 | case AMDGPU::V_FMAC_F32_e32: | |||
87 | case AMDGPU::V_DOT2_F32_F16: | |||
88 | case AMDGPU::V_DOT2_F32_BF16: | |||
89 | Comp.Reg1 = Comp.MI.getOperand(2).getReg(); | |||
90 | Comp.Reg2 = Comp.MI.getOperand(0).getReg(); | |||
91 | break; | |||
92 | case AMDGPU::V_CNDMASK_B32_e32: | |||
93 | UniqueScalarRegs.push_back(AMDGPU::VCC_LO); | |||
94 | Comp.Reg1 = Comp.MI.getOperand(2).getReg(); | |||
95 | break; | |||
96 | case AMDGPU::V_MOV_B32_e32: | |||
97 | break; | |||
98 | default: | |||
99 | Comp.Reg1 = Comp.MI.getOperand(2).getReg(); | |||
100 | break; | |||
101 | } | |||
102 | ||||
103 | Comp.Dst = Comp.MI.getOperand(0).getReg(); | |||
104 | ||||
105 | const MachineOperand &Op0 = Comp.MI.getOperand(1); | |||
106 | if (Op0.isReg()) { | |||
107 | if (!TRI->isVectorRegister(MRI, Op0.getReg())) { | |||
| ||||
108 | if (!is_contained(UniqueScalarRegs, Op0.getReg())) | |||
109 | UniqueScalarRegs.push_back(Op0.getReg()); | |||
110 | } else | |||
111 | Comp.Reg0 = Op0.getReg(); | |||
112 | } else { | |||
113 | if (!TII.isInlineConstant(Comp.MI, 1)) | |||
114 | addLiteral(Op0); | |||
115 | } | |||
116 | } | |||
117 | ||||
118 | if (UniqueLiterals.size() > 1) | |||
119 | return false; | |||
120 | if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) | |||
121 | return false; | |||
122 | ||||
123 | // check port 0 | |||
124 | if (CInfo[0].Reg0 && CInfo[1].Reg0 && | |||
125 | CInfo[0].Reg0 % NumVGPRBanks == CInfo[1].Reg0 % NumVGPRBanks) | |||
126 | return false; | |||
127 | // check port 1 | |||
128 | if (CInfo[0].Reg1 && CInfo[1].Reg1 && | |||
129 | CInfo[0].Reg1 % NumVGPRBanks == CInfo[1].Reg1 % NumVGPRBanks) | |||
130 | return false; | |||
131 | // check port 2 | |||
132 | if (CInfo[0].Reg2 && CInfo[1].Reg2 && | |||
133 | !((CInfo[0].Reg2 ^ CInfo[1].Reg2) & 0x1)) | |||
134 | return false; | |||
135 | if (!((CInfo[0].Dst ^ CInfo[1].Dst) & 0x1)) | |||
136 | return false; | |||
137 | ||||
138 | LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gcn-vopd-utils")) { dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI << "\n\tY: " << SecondMI << "\n"; } } while (false) | |||
139 | << "\n\tY: " << SecondMI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gcn-vopd-utils")) { dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI << "\n\tY: " << SecondMI << "\n"; } } while (false); | |||
140 | return true; | |||
141 | } | |||
142 | ||||
143 | /// Check if the instr pair, FirstMI and SecondMI, should be scheduled | |||
144 | /// together. Given SecondMI, when FirstMI is unspecified, then check if | |||
145 | /// SecondMI may be part of a fused pair at all. | |||
146 | static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, | |||
147 | const TargetSubtargetInfo &TSI, | |||
148 | const MachineInstr *FirstMI, | |||
149 | const MachineInstr &SecondMI) { | |||
150 | const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); | |||
151 | unsigned Opc2 = SecondMI.getOpcode(); | |||
152 | auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); | |||
153 | ||||
154 | // One instruction case | |||
155 | if (!FirstMI) | |||
156 | return SecondCanBeVOPD.Y; | |||
157 | ||||
158 | unsigned Opc = FirstMI->getOpcode(); | |||
159 | auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); | |||
160 | ||||
161 | if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || | |||
162 | (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) | |||
163 | return false; | |||
164 | ||||
165 | return checkVOPDRegConstraints(STII, *FirstMI, SecondMI); | |||
166 | } | |||
167 | ||||
168 | /// Adapts design from MacroFusion | |||
169 | /// Puts valid candidate instructions back-to-back so they can easily | |||
170 | /// be turned into VOPD instructions | |||
171 | /// Greedily pairs instruction candidates. O(n^2) algorithm. | |||
172 | struct VOPDPairingMutation : ScheduleDAGMutation { | |||
173 | ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer | |||
174 | ||||
175 | VOPDPairingMutation( | |||
176 | ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer | |||
177 | : shouldScheduleAdjacent(shouldScheduleAdjacent) {} | |||
178 | ||||
179 | void apply(ScheduleDAGInstrs *DAG) override { | |||
180 | const TargetInstrInfo &TII = *DAG->TII; | |||
181 | const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); | |||
182 | if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) { | |||
183 | LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gcn-vopd-utils")) { dbgs() << "Target does not support VOPDPairingMutation\n" ; } } while (false); | |||
184 | return; | |||
185 | } | |||
186 | ||||
187 | std::vector<SUnit>::iterator ISUI, JSUI; | |||
188 | for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { | |||
189 | const MachineInstr *IMI = ISUI->getInstr(); | |||
190 | if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) | |||
191 | continue; | |||
192 | if (!hasLessThanNumFused(*ISUI, 2)) | |||
193 | continue; | |||
194 | ||||
195 | for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { | |||
196 | if (JSUI->isBoundaryNode()) | |||
197 | continue; | |||
198 | const MachineInstr *JMI = JSUI->getInstr(); | |||
199 | if (!hasLessThanNumFused(*JSUI, 2) || | |||
200 | !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) | |||
201 | continue; | |||
202 | if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) | |||
203 | break; | |||
204 | } | |||
205 | } | |||
206 | LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gcn-vopd-utils")) { dbgs() << "Completed VOPDPairingMutation\n" ; } } while (false); | |||
207 | } | |||
208 | }; | |||
209 | ||||
210 | std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { | |||
211 | return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent); | |||
212 | } |