File: | build/source/llvm/lib/Target/X86/X86FastPreTileConfig.cpp |
Warning: | line 243, column 17 Value stored to 'NewMI' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file Pass to preconfig the shape of physical tile registers |
10 | /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm |
11 | /// walk each instruction of basic block in reverse order. All the tile |
12 | /// registers that live out the basic block would be spilled and reloaded |
13 | /// before its user. It also check the depenedency of the shape to ensure |
14 | /// the shape is defined before ldtilecfg. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "X86.h" |
19 | #include "X86InstrBuilder.h" |
20 | #include "X86MachineFunctionInfo.h" |
21 | #include "X86RegisterInfo.h" |
22 | #include "X86Subtarget.h" |
23 | #include "llvm/ADT/DepthFirstIterator.h" |
24 | #include "llvm/ADT/PostOrderIterator.h" |
25 | #include "llvm/ADT/Statistic.h" |
26 | #include "llvm/CodeGen/MachineFrameInfo.h" |
27 | #include "llvm/CodeGen/MachineFunctionPass.h" |
28 | #include "llvm/CodeGen/MachineInstr.h" |
29 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
30 | #include "llvm/CodeGen/Passes.h" |
31 | #include "llvm/CodeGen/TargetInstrInfo.h" |
32 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
33 | #include "llvm/InitializePasses.h" |
34 | #include "llvm/Support/Debug.h" |
35 | |
36 | using namespace llvm; |
37 | |
38 | #define DEBUG_TYPE"fastpretileconfig" "fastpretileconfig" |
39 | |
40 | STATISTIC(NumStores, "Number of stores added")static llvm::Statistic NumStores = {"fastpretileconfig", "NumStores" , "Number of stores added"}; |
41 | STATISTIC(NumLoads, "Number of loads added")static llvm::Statistic NumLoads = {"fastpretileconfig", "NumLoads" , "Number of loads added"}; |
42 | |
43 | namespace { |
44 | |
45 | class X86FastPreTileConfig : public MachineFunctionPass { |
46 | MachineFunction *MF = nullptr; |
47 | const X86Subtarget *ST = nullptr; |
48 | const TargetInstrInfo *TII = nullptr; |
49 | MachineRegisterInfo *MRI = nullptr; |
50 | X86MachineFunctionInfo *X86FI = nullptr; |
51 | MachineFrameInfo *MFI = nullptr; |
52 | const TargetRegisterInfo *TRI = nullptr; |
53 | MachineBasicBlock *MBB = nullptr; |
54 | int CfgSS = -1; |
55 | struct PHIInfo { |
56 | Register Row; |
57 | Register Col; |
58 | Register StackAddr; |
59 | }; |
60 | DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs; |
61 | |
62 | /// Maps virtual regs to the frame index where these values are spilled. |
63 | IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; |
64 | |
65 | /// Has a bit set for tile virtual register for which it was determined |
66 | /// that it is alive across blocks. |
67 | BitVector MayLiveAcrossBlocks; |
68 | |
69 | int getStackSpaceFor(Register VirtReg); |
70 | void InitializeTileConfigStackSpace(); |
71 | bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI); |
72 | void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill); |
73 | void reload(MachineBasicBlock::iterator UseMI, Register VirtReg, |
74 | MachineOperand *RowMO, MachineOperand *ColMO); |
75 | void canonicalizePHIs(MachineBasicBlock &MBB); |
76 | void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI); |
77 | void convertPHIs(MachineBasicBlock &MBB); |
78 | bool configBasicBlock(MachineBasicBlock &MBB); |
79 | |
80 | public: |
81 | X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {} |
82 | |
83 | /// Return the pass name. |
84 | StringRef getPassName() const override { |
85 | return "Fast Tile Register Preconfigure"; |
86 | } |
87 | |
88 | /// Perform tile register configure. |
89 | bool runOnMachineFunction(MachineFunction &MFunc) override; |
90 | |
91 | static char ID; |
92 | }; |
93 | |
94 | } // end anonymous namespace |
95 | |
96 | char X86FastPreTileConfig::ID = 0; |
97 | |
98 | INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,static void *initializeX86FastPreTileConfigPassOnce(PassRegistry &Registry) { |
99 | "Fast Tile Register Preconfigure", false, false)static void *initializeX86FastPreTileConfigPassOnce(PassRegistry &Registry) { |
100 | INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Fast Tile Register Preconfigure" , "fastpretileconfig", &X86FastPreTileConfig::ID, PassInfo ::NormalCtor_t(callDefaultCtor<X86FastPreTileConfig>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeX86FastPreTileConfigPassFlag; void llvm::initializeX86FastPreTileConfigPass(PassRegistry &Registry ) { llvm::call_once(InitializeX86FastPreTileConfigPassFlag, initializeX86FastPreTileConfigPassOnce , std::ref(Registry)); } |
101 | "Fast Tile Register Preconfigure", false, false)PassInfo *PI = new PassInfo( "Fast Tile Register Preconfigure" , "fastpretileconfig", &X86FastPreTileConfig::ID, PassInfo ::NormalCtor_t(callDefaultCtor<X86FastPreTileConfig>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeX86FastPreTileConfigPassFlag; void llvm::initializeX86FastPreTileConfigPass(PassRegistry &Registry ) { llvm::call_once(InitializeX86FastPreTileConfigPassFlag, initializeX86FastPreTileConfigPassOnce , std::ref(Registry)); } |
102 | |
103 | static bool dominates(MachineBasicBlock &MBB, |
104 | MachineBasicBlock::const_iterator A, |
105 | MachineBasicBlock::const_iterator B) { |
106 | auto MBBEnd = MBB.end(); |
107 | if (B == MBBEnd) |
108 | return true; |
109 | |
110 | MachineBasicBlock::const_iterator I = MBB.begin(); |
111 | for (; &*I != A && &*I != B; ++I) |
112 | ; |
113 | |
114 | return &*I == A; |
115 | } |
116 | |
117 | /// This allocates space for the specified virtual register to be held on the |
118 | /// stack. |
119 | int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) { |
120 | // Find the location Reg would belong... |
121 | int SS = StackSlotForVirtReg[VirtReg]; |
122 | // Already has space allocated? |
123 | if (SS != -1) |
124 | return SS; |
125 | |
126 | // Allocate a new stack object for this spill location... |
127 | const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); |
128 | unsigned Size = TRI->getSpillSize(RC); |
129 | Align Alignment = TRI->getSpillAlign(RC); |
130 | int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment); |
131 | |
132 | // Assign the slot. |
133 | StackSlotForVirtReg[VirtReg] = FrameIdx; |
134 | return FrameIdx; |
135 | } |
136 | |
137 | /// Returns false if \p VirtReg is known to not live out of the current config. |
138 | /// If \p VirtReg live out of the current MBB, it must live out of the current |
139 | /// config |
140 | bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) { |
141 | if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) |
142 | return true; |
143 | |
144 | for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { |
145 | if (UseInst.getParent() != MBB) { |
146 | MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); |
147 | return true; |
148 | } |
149 | |
150 | // The use and def are in the same MBB. If the tile register is |
151 | // reconfigured, it is crobbered and we need to spill and reload |
152 | // tile register. |
153 | if (CfgMI) { |
154 | if (dominates(*MBB, *CfgMI, UseInst)) { |
155 | MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); |
156 | return true; |
157 | } |
158 | } |
159 | } |
160 | |
161 | return false; |
162 | } |
163 | |
164 | void X86FastPreTileConfig::InitializeTileConfigStackSpace() { |
165 | MachineBasicBlock &MBB = MF->front(); |
166 | MachineInstr *MI = &*MBB.getFirstNonPHI(); |
167 | DebugLoc DL; |
168 | if (ST->hasAVX512()) { |
169 | Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); |
170 | BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); |
171 | addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS) |
172 | .addReg(Zmm); |
173 | } else if (ST->hasAVX2()) { |
174 | Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); |
175 | BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); |
176 | addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS) |
177 | .addReg(Ymm); |
178 | addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS, |
179 | 32) |
180 | .addReg(Ymm); |
181 | } else { |
182 | assert(ST->hasSSE2() && "AMX should assume SSE2 enabled")(static_cast <bool> (ST->hasSSE2() && "AMX should assume SSE2 enabled" ) ? void (0) : __assert_fail ("ST->hasSSE2() && \"AMX should assume SSE2 enabled\"" , "llvm/lib/Target/X86/X86FastPreTileConfig.cpp", 182, __extension__ __PRETTY_FUNCTION__)); |
183 | unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; |
184 | Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); |
185 | BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); |
186 | addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS) |
187 | .addReg(Xmm); |
188 | addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16) |
189 | .addReg(Xmm); |
190 | addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32) |
191 | .addReg(Xmm); |
192 | addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48) |
193 | .addReg(Xmm); |
194 | } |
195 | // Fill in the palette first. |
196 | addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS) |
197 | .addImm(1); |
198 | } |
199 | |
200 | /// Insert spill instruction for \p AssignedReg before \p Before. |
201 | /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot. |
202 | void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before, |
203 | Register VirtReg, bool Kill) { |
204 | LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("fastpretileconfig")) { dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n"; } } while (false); |
205 | int FI = getStackSpaceFor(VirtReg); |
206 | LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("fastpretileconfig")) { dbgs() << " to stack slot #" << FI << '\n'; } } while (false); |
207 | |
208 | const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); |
209 | // Don't need shape information for tile store, becasue it is adjacent to |
210 | // the tile def instruction. |
211 | TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI, |
212 | Register()); |
213 | ++NumStores; |
214 | |
215 | // TODO: update DBG_VALUEs |
216 | } |
217 | |
218 | /// Insert reload instruction for \p PhysReg before \p Before. |
219 | void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI, |
220 | Register OrigReg, MachineOperand *RowMO, |
221 | MachineOperand *ColMO) { |
222 | int FI = getStackSpaceFor(OrigReg); |
223 | const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg); |
224 | Register TileReg; |
225 | // Fold copy to tileload |
226 | // BB1: |
227 | // spill src to s |
228 | // |
229 | // BB2: |
230 | // t = copy src |
231 | // --> |
232 | // t = tileload (s) |
233 | if (UseMI->isCopy()) |
234 | TileReg = UseMI->getOperand(0).getReg(); |
235 | else |
236 | TileReg = MRI->createVirtualRegister(&RC); |
237 | // Can't use TII->loadRegFromStackSlot(), because we need the shape |
238 | // information for reload. |
239 | // tileloadd (%sp, %idx), %tmm |
240 | unsigned Opc = X86::PTILELOADDV; |
241 | Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); |
242 | // FIXME: MBB is not the parent of UseMI. |
243 | MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), |
Value stored to 'NewMI' during its initialization is never read | |
244 | TII->get(X86::MOV64ri), StrideReg) |
245 | .addImm(64); |
246 | NewMI = addFrameReference( |
247 | BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg) |
248 | .addReg(RowMO->getReg()) |
249 | .addReg(ColMO->getReg()), |
250 | FI); |
251 | MachineOperand &MO = NewMI->getOperand(5); |
252 | MO.setReg(StrideReg); |
253 | MO.setIsKill(true); |
254 | RowMO->setIsKill(false); |
255 | ColMO->setIsKill(false); |
256 | // Erase copy instruction after it is folded. |
257 | if (UseMI->isCopy()) { |
258 | UseMI->eraseFromParent(); |
259 | } else { |
260 | // Replace the register in the user MI. |
261 | for (auto &MO : UseMI->operands()) { |
262 | if (MO.isReg() && MO.getReg() == OrigReg) |
263 | MO.setReg(TileReg); |
264 | } |
265 | } |
266 | |
267 | ++NumLoads; |
268 | LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("fastpretileconfig")) { dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into " << printReg(TileReg , TRI) << '\n'; } } while (false) |
269 | << printReg(TileReg, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("fastpretileconfig")) { dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into " << printReg(TileReg , TRI) << '\n'; } } while (false); |
270 | } |
271 | |
272 | static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { |
273 | // The instruction must have 3 operands: tile def, row, col. |
274 | if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo()) |
275 | return false; |
276 | MachineOperand &MO = MI.getOperand(0); |
277 | |
278 | if (MO.isReg()) { |
279 | Register Reg = MO.getReg(); |
280 | // FIXME it may be used after Greedy RA and the physical |
281 | // register is not rewritten yet. |
282 | if (Reg.isVirtual() && |
283 | MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) |
284 | return true; |
285 | if (Reg >= X86::TMM0 && Reg <= X86::TMM7) |
286 | return true; |
287 | } |
288 | |
289 | return false; |
290 | } |
291 | |
292 | static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) { |
293 | MachineInstr *MI = MRI->getVRegDef(TileReg); |
294 | if (isTileDef(MRI, *MI)) { |
295 | MachineOperand *RowMO = &MI->getOperand(1); |
296 | MachineOperand *ColMO = &MI->getOperand(2); |
297 | return ShapeT(RowMO, ColMO, MRI); |
298 | } else if (MI->isCopy()) { |
299 | TileReg = MI->getOperand(1).getReg(); |
300 | return getShape(MRI, TileReg); |
301 | } |
302 | |
303 | // The def should not be PHI node, because we walk the MBB in reverse post |
304 | // order. |
305 | assert(MI->isPHI() && "Unexpected PHI when get shape.")(static_cast <bool> (MI->isPHI() && "Unexpected PHI when get shape." ) ? void (0) : __assert_fail ("MI->isPHI() && \"Unexpected PHI when get shape.\"" , "llvm/lib/Target/X86/X86FastPreTileConfig.cpp", 305, __extension__ __PRETTY_FUNCTION__)); |
306 | llvm_unreachable("Unexpected MI when get shape.")::llvm::llvm_unreachable_internal("Unexpected MI when get shape." , "llvm/lib/Target/X86/X86FastPreTileConfig.cpp", 306); |
307 | } |
308 | |
309 | // BB0: |
310 | // spill t0 to s0 |
311 | // BB1: |
312 | // spill t1 to s1 |
313 | // |
314 | // BB2: |
315 | // t = phi [t0, bb0] [t1, bb1] |
316 | // --> |
317 | // row = phi [r0, bb0] [r1, bb1] |
318 | // col = phi [c0, bb0] [c1, bb1] |
319 | // s = phi [s0, bb0] [s1, bb1] |
320 | // t = tileload row, col, s |
321 | // The new instruction is inserted at the end of the phi node. The order |
322 | // of the original phi node is not ensured. |
323 | void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB, |
324 | MachineInstr &PHI) { |
325 | // 1. Create instruction to get stack slot address of each incoming block. |
326 | // 2. Create PHI node for the stack address. |
327 | // 3. Create PHI node for shape. If one of the incoming shape is immediate |
328 | // use the immediate and delete the PHI node. |
329 | // 4. Create tileload instruction from the stack address. |
330 | Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); |
331 | MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), |
332 | TII->get(X86::PHI), StackAddrReg); |
333 | Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass); |
334 | MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), |
335 | TII->get(X86::PHI), RowReg); |
336 | Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass); |
337 | MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), |
338 | TII->get(X86::PHI), ColReg); |
339 | // Record the mapping of phi node and its row/column information. |
340 | VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg}; |
341 | |
342 | for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) { |
343 | // Get the 2 incoming value of tile register and MBB. |
344 | Register InTileReg = PHI.getOperand(I).getReg(); |
345 | // Mark it as liveout, so that it will be spilled when visit |
346 | // the incoming MBB. Otherwise since phi will be deleted, it |
347 | // would miss spill when visit incoming MBB. |
348 | MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg)); |
349 | MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB(); |
350 | |
351 | MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg); |
352 | MachineBasicBlock::iterator InsertPos; |
353 | if (TileDefMI->isPHI()) { |
354 | InsertPos = TileDefMI->getParent()->getFirstNonPHI(); |
355 | if (VisitedPHIs.count(TileDefMI)) { // circular phi reference |
356 | // def t1 |
357 | // / \ |
358 | // def t2 t3 = phi(t1, t4) <-- |
359 | // \ / | |
360 | // t4 = phi(t2, t3)------------- |
361 | // |
362 | // For each (row, column and stack address) append phi incoming value. |
363 | // Create r3 = phi(r1, r4) |
364 | // Create r4 = phi(r2, r3) |
365 | Register InRowReg = VisitedPHIs[TileDefMI].Row; |
366 | Register InColReg = VisitedPHIs[TileDefMI].Col; |
367 | Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr; |
368 | RowPHI.addReg(InRowReg).addMBB(InMBB); |
369 | ColPHI.addReg(InColReg).addMBB(InMBB); |
370 | AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); |
371 | continue; |
372 | } else { |
373 | // Recursively convert PHI to tileload |
374 | convertPHI(TileDefMI->getParent(), *TileDefMI); |
375 | // The PHI node is coverted to tileload instruction. Get the stack |
376 | // address from tileload operands. |
377 | MachineInstr *TileLoad = MRI->getVRegDef(InTileReg); |
378 | assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV)(static_cast <bool> (TileLoad && TileLoad->getOpcode () == X86::PTILELOADDV) ? void (0) : __assert_fail ("TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV" , "llvm/lib/Target/X86/X86FastPreTileConfig.cpp", 378, __extension__ __PRETTY_FUNCTION__)); |
379 | Register InRowReg = TileLoad->getOperand(1).getReg(); |
380 | Register InColReg = TileLoad->getOperand(2).getReg(); |
381 | Register InStackAddrReg = TileLoad->getOperand(3).getReg(); |
382 | RowPHI.addReg(InRowReg).addMBB(InMBB); |
383 | ColPHI.addReg(InColReg).addMBB(InMBB); |
384 | AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); |
385 | } |
386 | } else { |
387 | InsertPos = TileDefMI->getIterator(); |
388 | |
389 | // Fill the incoming operand of row/column phi instruction. |
390 | ShapeT Shape = getShape(MRI, InTileReg); |
391 | Shape.getRow()->setIsKill(false); |
392 | Shape.getCol()->setIsKill(false); |
393 | RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB); |
394 | ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB); |
395 | |
396 | // The incoming tile register live out of its def BB, it would be spilled. |
397 | // Create MI to get the spill stack slot address for the tile register |
398 | int FI = getStackSpaceFor(InTileReg); |
399 | Register InStackAddrReg = |
400 | MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); |
401 | addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(), |
402 | TII->get(X86::LEA64r), InStackAddrReg) |
403 | .addFrameIndex(FI), |
404 | 0); |
405 | AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); |
406 | } |
407 | } |
408 | |
409 | MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); |
410 | Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); |
411 | BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg) |
412 | .addImm(64); |
413 | Register TileReg = PHI.getOperand(0).getReg(); |
414 | MachineInstr *NewMI = addDirectMem( |
415 | BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg) |
416 | .addReg(RowReg) |
417 | .addReg(ColReg), |
418 | StackAddrReg); |
419 | MachineOperand &MO = NewMI->getOperand(5); |
420 | MO.setReg(StrideReg); |
421 | MO.setIsKill(true); |
422 | PHI.eraseFromParent(); |
423 | VisitedPHIs.erase(&PHI); |
424 | } |
425 | |
426 | static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) { |
427 | MachineOperand &MO = MI.getOperand(0); |
428 | if (MO.isReg() && MO.getReg().isVirtual() && |
429 | MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) |
430 | return true; |
431 | return false; |
432 | } |
433 | |
434 | void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) { |
435 | SmallVector<MachineInstr *, 8> PHIs; |
436 | |
437 | for (MachineInstr &MI : MBB) { |
438 | if (!MI.isPHI()) |
439 | break; |
440 | if (!isTileRegDef(MRI, MI)) |
441 | continue; |
442 | PHIs.push_back(&MI); |
443 | } |
444 | // Canonicalize the phi node first. One tile phi may depeneds previous |
445 | // phi node. For below case, we need convert %t4. |
446 | // |
447 | // BB0: |
448 | // %t3 = phi (t1 BB1, t2 BB0) |
449 | // %t4 = phi (t5 BB1, t3 BB0) |
450 | // --> |
451 | // %t3 = phi (t1 BB1, t2 BB0) |
452 | // %t4 = phi (t5 BB1, t2 BB0) |
453 | // |
454 | while (!PHIs.empty()) { |
455 | MachineInstr *PHI = PHIs.pop_back_val(); |
456 | |
457 | // Find the operand that is incoming from the same MBB and the def |
458 | // is also phi node. |
459 | MachineOperand *InMO = nullptr; |
460 | MachineInstr *DefMI = nullptr; |
461 | for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) { |
462 | Register InTileReg = PHI->getOperand(I).getReg(); |
463 | MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); |
464 | DefMI = MRI->getVRegDef(InTileReg); |
465 | if (InMBB != &MBB || !DefMI->isPHI()) |
466 | continue; |
467 | |
468 | InMO = &PHI->getOperand(I); |
469 | break; |
470 | } |
471 | // If can't find such operand, do nothing. |
472 | if (!InMO) |
473 | continue; |
474 | |
475 | // Current phi node depends on previous phi node. Break the |
476 | // dependency. |
477 | Register DefTileReg; |
478 | for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) { |
479 | MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); |
480 | if (InMBB != &MBB) |
481 | continue; |
482 | DefTileReg = DefMI->getOperand(I).getReg(); |
483 | InMO->setReg(DefTileReg); |
484 | break; |
485 | } |
486 | } |
487 | } |
488 | |
489 | void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) { |
490 | SmallVector<MachineInstr *, 8> PHIs; |
491 | for (MachineInstr &MI : MBB) { |
492 | if (!MI.isPHI()) |
493 | break; |
494 | if (!isTileRegDef(MRI, MI)) |
495 | continue; |
496 | PHIs.push_back(&MI); |
497 | } |
498 | while (!PHIs.empty()) { |
499 | MachineInstr *MI = PHIs.pop_back_val(); |
500 | VisitedPHIs.clear(); |
501 | convertPHI(&MBB, *MI); |
502 | } |
503 | } |
504 | |
505 | // PreTileConfig should configure the tile registers based on basic |
506 | // block. |
507 | bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) { |
508 | this->MBB = &MBB; |
509 | bool Change = false; |
510 | MachineInstr *LastShapeMI = nullptr; |
511 | MachineInstr *LastTileCfg = nullptr; |
512 | bool HasUnconfigTile = false; |
513 | |
514 | auto Config = [&](MachineInstr &Before) { |
515 | if (CfgSS == -1) |
516 | CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(), |
517 | ST->getTileConfigAlignment(), false); |
518 | LastTileCfg = addFrameReference( |
519 | BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS); |
520 | LastShapeMI = nullptr; |
521 | Change = true; |
522 | }; |
523 | auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) { |
524 | for (const MachineOperand &MO : MI.operands()) { |
525 | if (!MO.isReg()) |
526 | continue; |
527 | Register Reg = MO.getReg(); |
528 | if (Reg.isVirtual() && |
529 | MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) |
530 | return true; |
531 | } |
532 | return false; |
533 | }; |
534 | for (MachineInstr &MI : reverse(MBB)) { |
535 | // We have transformed phi node before configuring BB. |
536 | if (MI.isPHI()) |
537 | break; |
538 | // Don't collect the shape of used tile, the tile should be defined |
539 | // before the tile use. Spill and reload would happen if there is only |
540 | // tile use after ldtilecfg, so the shape can be collected from reload. |
541 | // Take below code for example. %t would be reloaded before tilestore |
542 | // call |
543 | // .... |
544 | // tilestore %r, %c, %t |
545 | // --> |
546 | // call |
547 | // ldtilecfg |
548 | // %t = tileload %r, %c |
549 | // tilestore %r, %c, %t |
550 | if (HasTileOperand(MRI, MI)) |
551 | HasUnconfigTile = true; |
552 | // According to AMX ABI, all the tile registers including config register |
553 | // are volatile. Caller need to save/restore config register. |
554 | if (MI.isCall() && HasUnconfigTile) { |
555 | MachineBasicBlock::iterator I; |
556 | if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) |
557 | I = ++LastShapeMI->getIterator(); |
558 | else |
559 | I = ++MI.getIterator(); |
560 | Config(*I); |
561 | HasUnconfigTile = false; |
562 | continue; |
563 | } |
564 | if (!isTileDef(MRI, MI)) |
565 | continue; |
566 | // |
567 | //--------------------------------------------------------------------- |
568 | // Don't handle COPY instruction. If the src and dst of the COPY can be |
569 | // in the same config in below case, we just check the shape of t0. |
570 | // def row0 |
571 | // def col0 |
572 | // ldtilecfg |
573 | // t0 = tielzero(row0, col0) |
574 | // t1 = copy t0 |
575 | // ... |
576 | // If the src and dst of the COPY can NOT be in the same config in below |
577 | // case. Reload would be generated befor the copy instruction. |
578 | // def row0 |
579 | // def col0 |
580 | // t0 = tielzero(row0, col0) |
581 | // spill t0 |
582 | // ... |
583 | // def row1 |
584 | // def col1 |
585 | // ldtilecfg |
586 | // t1 = tilezero(row1, col1) |
587 | // reload t0 |
588 | // t1 = copy t0 |
589 | //--------------------------------------------------------------------- |
590 | // |
591 | // If MI dominate the last shape def instruction, we need insert |
592 | // ldtilecfg after LastShapeMI now. The config doesn't include |
593 | // current MI. |
594 | // def row0 |
595 | // def col0 |
596 | // tilezero(row0, col0) <- MI |
597 | // def row1 |
598 | // def col1 |
599 | // ldtilecfg <- insert |
600 | // tilezero(row1, col1) |
601 | if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) |
602 | Config(*(++LastShapeMI->getIterator())); |
603 | MachineOperand *RowMO = &MI.getOperand(1); |
604 | MachineOperand *ColMO = &MI.getOperand(2); |
605 | MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg()); |
606 | MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg()); |
607 | // If the shape is defined in current MBB, check the domination. |
608 | // FIXME how about loop? |
609 | if (RowMI->getParent() == &MBB) { |
610 | if (!LastShapeMI) |
611 | LastShapeMI = RowMI; |
612 | else if (dominates(MBB, LastShapeMI, RowMI)) |
613 | LastShapeMI = RowMI; |
614 | } |
615 | if (ColMI->getParent() == &MBB) { |
616 | if (!LastShapeMI) |
617 | LastShapeMI = ColMI; |
618 | else if (dominates(MBB, LastShapeMI, ColMI)) |
619 | LastShapeMI = ColMI; |
620 | } |
621 | // If there is user live out of the tilecfg, spill it and reload in |
622 | // before the user. |
623 | Register TileReg = MI.getOperand(0).getReg(); |
624 | if (mayLiveOut(TileReg, LastTileCfg)) |
625 | spill(++MI.getIterator(), TileReg, false); |
626 | for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) { |
627 | if (UseMI.getParent() == &MBB) { |
628 | // check user should not across ldtilecfg |
629 | if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI)) |
630 | continue; |
631 | // reload befor UseMI |
632 | reload(UseMI.getIterator(), TileReg, RowMO, ColMO); |
633 | } else { |
634 | // Don't reload for phi instruction, we handle phi reload separately. |
635 | // TODO: merge the reload for the same user MBB. |
636 | if (!UseMI.isPHI()) |
637 | reload(UseMI.getIterator(), TileReg, RowMO, ColMO); |
638 | } |
639 | } |
640 | } |
641 | |
642 | // Configure tile registers at the head of the MBB |
643 | if (HasUnconfigTile) { |
644 | MachineInstr *Before; |
645 | if (LastShapeMI == nullptr || LastShapeMI->isPHI()) |
646 | Before = &*MBB.getFirstNonPHI(); |
647 | else |
648 | Before = &*(++LastShapeMI->getIterator()); |
649 | |
650 | Config(*Before); |
651 | } |
652 | |
653 | return Change; |
654 | } |
655 | |
656 | bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) { |
657 | MF = &MFunc; |
658 | MRI = &MFunc.getRegInfo(); |
659 | ST = &MFunc.getSubtarget<X86Subtarget>(); |
660 | TII = ST->getInstrInfo(); |
661 | X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); |
662 | MFI = &MFunc.getFrameInfo(); |
663 | TRI = ST->getRegisterInfo(); |
664 | CfgSS = -1; |
665 | |
666 | unsigned NumVirtRegs = MRI->getNumVirtRegs(); |
667 | // Abandon early if there is no tile register to config. |
668 | bool HasVirtTileReg = false; |
669 | for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) { |
670 | Register VirtReg = Register::index2VirtReg(I); |
671 | if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) { |
672 | HasVirtTileReg = true; |
673 | break; |
674 | } |
675 | } |
676 | if (!HasVirtTileReg) |
677 | return false; |
678 | |
679 | StackSlotForVirtReg.resize(NumVirtRegs); |
680 | MayLiveAcrossBlocks.clear(); |
681 | // We will create register during config. *3 is to make sure |
682 | // the virtual register number doesn't exceed the size of |
683 | // the bit vector. |
684 | MayLiveAcrossBlocks.resize(NumVirtRegs * 3); |
685 | bool Change = false; |
686 | assert(MRI->isSSA())(static_cast <bool> (MRI->isSSA()) ? void (0) : __assert_fail ("MRI->isSSA()", "llvm/lib/Target/X86/X86FastPreTileConfig.cpp" , 686, __extension__ __PRETTY_FUNCTION__)); |
687 | |
688 | // Canonicalize the phi node first. |
689 | for (MachineBasicBlock &MBB : MFunc) |
690 | canonicalizePHIs(MBB); |
691 | |
692 | // Loop over all of the basic blocks in reverse post order and insert |
693 | // ldtilecfg for tile registers. The reserse post order is to facilitate |
694 | // PHI node convert. |
695 | ReversePostOrderTraversal<MachineFunction *> RPOT(MF); |
696 | for (MachineBasicBlock *MBB : RPOT) { |
697 | convertPHIs(*MBB); |
698 | Change |= configBasicBlock(*MBB); |
699 | } |
700 | |
701 | if (Change) |
702 | InitializeTileConfigStackSpace(); |
703 | |
704 | StackSlotForVirtReg.clear(); |
705 | return Change; |
706 | } |
707 | |
708 | FunctionPass *llvm::createX86FastPreTileConfigPass() { |
709 | return new X86FastPreTileConfig(); |
710 | } |