LLVM  16.0.0git
X86FastPreTileConfig.cpp
Go to the documentation of this file.
1 //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Pass to preconfig the shape of physical tile registers
10 /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11 /// walk each instruction of basic block in reverse order. All the tile
12 /// registers that live out the basic block would be spilled and reloaded
13 /// before its user. It also check the depenedency of the shape to ensure
14 /// the shape is defined before ldtilecfg.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "X86.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
25 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "fastpretileconfig"
39 
40 STATISTIC(NumStores, "Number of stores added");
41 STATISTIC(NumLoads, "Number of loads added");
42 
43 namespace {
44 
45 class X86FastPreTileConfig : public MachineFunctionPass {
46  MachineFunction *MF = nullptr;
47  const X86Subtarget *ST = nullptr;
48  const TargetInstrInfo *TII = nullptr;
49  MachineRegisterInfo *MRI = nullptr;
50  X86MachineFunctionInfo *X86FI = nullptr;
51  MachineFrameInfo *MFI = nullptr;
52  const TargetRegisterInfo *TRI = nullptr;
53  MachineBasicBlock *MBB = nullptr;
54  int CfgSS = -1;
55  struct PHIInfo {
56  Register Row;
57  Register Col;
58  Register StackAddr;
59  };
61 
62  /// Maps virtual regs to the frame index where these values are spilled.
63  IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
64 
65  /// Has a bit set for tile virtual register for which it was determined
66  /// that it is alive across blocks.
67  BitVector MayLiveAcrossBlocks;
68 
69  int getStackSpaceFor(Register VirtReg);
70  void InitializeTileConfigStackSpace();
71  bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
72  void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
73  void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
74  MachineOperand *RowMO, MachineOperand *ColMO);
75  void canonicalizePHIs(MachineBasicBlock &MBB);
76  void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
77  void convertPHIs(MachineBasicBlock &MBB);
78  bool configBasicBlock(MachineBasicBlock &MBB);
79 
80 public:
81  X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
82 
83  /// Return the pass name.
84  StringRef getPassName() const override {
85  return "Fast Tile Register Preconfigure";
86  }
87 
88  /// Perform tile register configure.
89  bool runOnMachineFunction(MachineFunction &MFunc) override;
90 
91  static char ID;
92 };
93 
94 } // end anonymous namespace
95 
97 
98 INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
99  "Fast Tile Register Preconfigure", false, false)
100 INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
102 
104  MachineBasicBlock::const_iterator A,
105  MachineBasicBlock::const_iterator B) {
106  auto MBBEnd = MBB.end();
107  if (B == MBBEnd)
108  return true;
109 
111  for (; &*I != A && &*I != B; ++I)
112  ;
113 
114  return &*I == A;
115 }
116 
117 /// This allocates space for the specified virtual register to be held on the
118 /// stack.
119 int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
120  // Find the location Reg would belong...
121  int SS = StackSlotForVirtReg[VirtReg];
122  // Already has space allocated?
123  if (SS != -1)
124  return SS;
125 
126  // Allocate a new stack object for this spill location...
127  const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
128  unsigned Size = TRI->getSpillSize(RC);
129  Align Alignment = TRI->getSpillAlign(RC);
130  int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
131 
132  // Assign the slot.
133  StackSlotForVirtReg[VirtReg] = FrameIdx;
134  return FrameIdx;
135 }
136 
137 /// Returns false if \p VirtReg is known to not live out of the current config.
138 /// If \p VirtReg live out of the current MBB, it must live out of the current
139 /// config
140 bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
141  if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
142  return true;
143 
144  for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
145  if (UseInst.getParent() != MBB) {
146  MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
147  return true;
148  }
149 
150  // The use and def are in the same MBB. If the tile register is
151  // reconfigured, it is crobbered and we need to spill and reload
152  // tile register.
153  if (CfgMI) {
154  if (dominates(*MBB, *CfgMI, UseInst)) {
155  MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
156  return true;
157  }
158  }
159  }
160 
161  return false;
162 }
163 
164 void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
165  MachineBasicBlock &MBB = MF->front();
167  DebugLoc DL;
168  if (ST->hasAVX512()) {
169  Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
170  BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
171  addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
172  .addReg(Zmm);
173  } else if (ST->hasAVX2()) {
174  Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
175  BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
176  addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
177  .addReg(Ymm);
178  addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
179  32)
180  .addReg(Ymm);
181  } else {
182  assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
183  unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
184  Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
185  BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
186  addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
187  .addReg(Xmm);
188  addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
189  .addReg(Xmm);
190  addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
191  .addReg(Xmm);
192  addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
193  .addReg(Xmm);
194  }
195  // Fill in the palette first.
196  addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
197  .addImm(1);
198 }
199 
200 /// Insert spill instruction for \p AssignedReg before \p Before.
201 /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
203  Register VirtReg, bool Kill) {
204  LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
205  int FI = getStackSpaceFor(VirtReg);
206  LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
207 
208  const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
209  // Don't need shape information for tile store, becasue it is adjacent to
210  // the tile def instruction.
211  TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI);
212  ++NumStores;
213 
214  // TODO: update DBG_VALUEs
215 }
216 
217 /// Insert reload instruction for \p PhysReg before \p Before.
218 void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
219  Register OrigReg, MachineOperand *RowMO,
220  MachineOperand *ColMO) {
221  int FI = getStackSpaceFor(OrigReg);
222  const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
223  Register TileReg;
224  // Fold copy to tileload
225  // BB1:
226  // spill src to s
227  //
228  // BB2:
229  // t = copy src
230  // -->
231  // t = tileload (s)
232  if (UseMI->isCopy())
233  TileReg = UseMI->getOperand(0).getReg();
234  else
235  TileReg = MRI->createVirtualRegister(&RC);
236  // Can't use TII->loadRegFromStackSlot(), because we need the shape
237  // information for reload.
238  // tileloadd (%sp, %idx), %tmm
239  unsigned Opc = X86::PTILELOADDV;
240  Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
241  // FIXME: MBB is not the parent of UseMI.
243  TII->get(X86::MOV64ri), StrideReg)
244  .addImm(64);
245  NewMI = addFrameReference(
246  BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
247  .addReg(RowMO->getReg())
248  .addReg(ColMO->getReg()),
249  FI);
250  MachineOperand &MO = NewMI->getOperand(5);
251  MO.setReg(StrideReg);
252  MO.setIsKill(true);
253  RowMO->setIsKill(false);
254  ColMO->setIsKill(false);
255  // Erase copy instruction after it is folded.
256  if (UseMI->isCopy()) {
258  } else {
259  // Replace the register in the user MI.
260  for (auto &MO : UseMI->operands()) {
261  if (MO.isReg() && MO.getReg() == OrigReg)
262  MO.setReg(TileReg);
263  }
264  }
265 
266  ++NumLoads;
267  LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
268  << printReg(TileReg, TRI) << '\n');
269 }
270 
272  // The instruction must have 3 operands: tile def, row, col.
273  if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
274  return false;
275  MachineOperand &MO = MI.getOperand(0);
276 
277  if (MO.isReg()) {
278  Register Reg = MO.getReg();
279  // FIXME it may be used after Greedy RA and the physical
280  // register is not rewritten yet.
281  if (Reg.isVirtual() &&
282  MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
283  return true;
284  if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
285  return true;
286  }
287 
288  return false;
289 }
290 
292  MachineInstr *MI = MRI->getVRegDef(TileReg);
293  if (isTileDef(MRI, *MI)) {
294  MachineOperand *RowMO = &MI->getOperand(1);
295  MachineOperand *ColMO = &MI->getOperand(2);
296  return ShapeT(RowMO, ColMO, MRI);
297  } else if (MI->isCopy()) {
298  TileReg = MI->getOperand(1).getReg();
299  return getShape(MRI, TileReg);
300  }
301 
302  // The def should not be PHI node, because we walk the MBB in reverse post
303  // order.
304  assert(MI->isPHI() && "Unexpected PHI when get shape.");
305  llvm_unreachable("Unexpected MI when get shape.");
306 }
307 
308 // BB0:
309 // spill t0 to s0
310 // BB1:
311 // spill t1 to s1
312 //
313 // BB2:
314 // t = phi [t0, bb0] [t1, bb1]
315 // -->
316 // row = phi [r0, bb0] [r1, bb1]
317 // col = phi [c0, bb0] [c1, bb1]
318 // s = phi [s0, bb0] [s1, bb1]
319 // t = tileload row, col, s
320 // The new instruction is inserted at the end of the phi node. The order
321 // of the original phi node is not ensured.
322 void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
323  MachineInstr &PHI) {
324  // 1. Create instruction to get stack slot address of each incoming block.
325  // 2. Create PHI node for the stack address.
326  // 3. Create PHI node for shape. If one of the incoming shape is immediate
327  // use the immediate and delete the PHI node.
328  // 4. Create tileload instruction from the stack address.
329  Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
330  MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
331  TII->get(X86::PHI), StackAddrReg);
332  Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
333  MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
334  TII->get(X86::PHI), RowReg);
335  Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
336  MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
337  TII->get(X86::PHI), ColReg);
338  // Record the mapping of phi node and its row/column information.
339  VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
340 
341  for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
342  // Get the 2 incoming value of tile register and MBB.
343  Register InTileReg = PHI.getOperand(I).getReg();
344  // Mark it as liveout, so that it will be spilled when visit
345  // the incoming MBB. Otherwise since phi will be deleted, it
346  // would miss spill when visit incoming MBB.
347  MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
348  MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
349 
350  MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
351  MachineBasicBlock::iterator InsertPos;
352  if (TileDefMI->isPHI()) {
353  InsertPos = TileDefMI->getParent()->getFirstNonPHI();
354  if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
355  // def t1
356  // / \
357  // def t2 t3 = phi(t1, t4) <--
358  // \ / |
359  // t4 = phi(t2, t3)-------------
360  //
361  // For each (row, column and stack address) append phi incoming value.
362  // Create r3 = phi(r1, r4)
363  // Create r4 = phi(r2, r3)
364  Register InRowReg = VisitedPHIs[TileDefMI].Row;
365  Register InColReg = VisitedPHIs[TileDefMI].Col;
366  Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
367  RowPHI.addReg(InRowReg).addMBB(InMBB);
368  ColPHI.addReg(InColReg).addMBB(InMBB);
369  AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
370  continue;
371  } else {
372  // Recursively convert PHI to tileload
373  convertPHI(TileDefMI->getParent(), *TileDefMI);
374  // The PHI node is coverted to tileload instruction. Get the stack
375  // address from tileload operands.
376  MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
377  assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
378  Register InRowReg = TileLoad->getOperand(1).getReg();
379  Register InColReg = TileLoad->getOperand(2).getReg();
380  Register InStackAddrReg = TileLoad->getOperand(3).getReg();
381  RowPHI.addReg(InRowReg).addMBB(InMBB);
382  ColPHI.addReg(InColReg).addMBB(InMBB);
383  AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
384  }
385  } else {
386  InsertPos = TileDefMI->getIterator();
387 
388  // Fill the incoming operand of row/column phi instruction.
389  ShapeT Shape = getShape(MRI, InTileReg);
390  Shape.getRow()->setIsKill(false);
391  Shape.getCol()->setIsKill(false);
392  RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
393  ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
394 
395  // The incoming tile register live out of its def BB, it would be spilled.
396  // Create MI to get the spill stack slot address for the tile register
397  int FI = getStackSpaceFor(InTileReg);
398  Register InStackAddrReg =
399  MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
400  addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
401  TII->get(X86::LEA64r), InStackAddrReg)
402  .addFrameIndex(FI),
403  0);
404  AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
405  }
406  }
407 
409  Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
410  BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
411  .addImm(64);
412  Register TileReg = PHI.getOperand(0).getReg();
413  MachineInstr *NewMI = addDirectMem(
414  BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
415  .addReg(RowReg)
416  .addReg(ColReg),
417  StackAddrReg);
418  MachineOperand &MO = NewMI->getOperand(5);
419  MO.setReg(StrideReg);
420  MO.setIsKill(true);
421  PHI.eraseFromParent();
422  VisitedPHIs.erase(&PHI);
423 }
424 
426  MachineOperand &MO = MI.getOperand(0);
427  if (MO.isReg() && MO.getReg().isVirtual() &&
428  MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
429  return true;
430  return false;
431 }
432 
433 void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
435 
436  for (MachineInstr &MI : MBB) {
437  if (!MI.isPHI())
438  break;
439  if (!isTileRegDef(MRI, MI))
440  continue;
441  PHIs.push_back(&MI);
442  }
443  // Canonicalize the phi node first. One tile phi may depeneds previous
444  // phi node. For below case, we need convert %t4.
445  //
446  // BB0:
447  // %t3 = phi (t1 BB1, t2 BB0)
448  // %t4 = phi (t5 BB1, t3 BB0)
449  // -->
450  // %t3 = phi (t1 BB1, t2 BB0)
451  // %t4 = phi (t5 BB1, t2 BB0)
452  //
453  while (!PHIs.empty()) {
454  MachineInstr *PHI = PHIs.pop_back_val();
455 
456  // Find the operand that is incoming from the same MBB and the def
457  // is also phi node.
458  MachineOperand *InMO = nullptr;
459  MachineInstr *DefMI = nullptr;
460  for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
461  Register InTileReg = PHI->getOperand(I).getReg();
462  MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
463  DefMI = MRI->getVRegDef(InTileReg);
464  if (InMBB != &MBB || !DefMI->isPHI())
465  continue;
466 
467  InMO = &PHI->getOperand(I);
468  break;
469  }
470  // If can't find such operand, do nothing.
471  if (!InMO)
472  continue;
473 
474  // Current phi node depends on previous phi node. Break the
475  // dependency.
476  Register DefTileReg;
477  for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
478  MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
479  if (InMBB != &MBB)
480  continue;
481  DefTileReg = DefMI->getOperand(I).getReg();
482  InMO->setReg(DefTileReg);
483  break;
484  }
485  }
486 }
487 
488 void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
490  for (MachineInstr &MI : MBB) {
491  if (!MI.isPHI())
492  break;
493  if (!isTileRegDef(MRI, MI))
494  continue;
495  PHIs.push_back(&MI);
496  }
497  while (!PHIs.empty()) {
498  MachineInstr *MI = PHIs.pop_back_val();
499  VisitedPHIs.clear();
500  convertPHI(&MBB, *MI);
501  }
502 }
503 
504 // PreTileConfig should configure the tile registers based on basic
505 // block.
506 bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
507  this->MBB = &MBB;
508  bool Change = false;
509  MachineInstr *LastShapeMI = nullptr;
510  MachineInstr *LastTileCfg = nullptr;
511  bool HasUnconfigTile = false;
512 
513  auto Config = [&](MachineInstr &Before) {
514  if (CfgSS == -1)
515  CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
516  ST->getTileConfigAlignment(), false);
517  LastTileCfg = addFrameReference(
518  BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
519  LastShapeMI = nullptr;
520  Change = true;
521  };
522  auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
523  for (const MachineOperand &MO : MI.operands()) {
524  if (!MO.isReg())
525  continue;
526  Register Reg = MO.getReg();
527  if (Reg.isVirtual() &&
528  MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
529  return true;
530  }
531  return false;
532  };
533  for (MachineInstr &MI : reverse(MBB)) {
534  // We have transformed phi node before configuring BB.
535  if (MI.isPHI())
536  break;
537  // Don't collect the shape of used tile, the tile should be defined
538  // before the tile use. Spill and reload would happen if there is only
539  // tile use after ldtilecfg, so the shape can be collected from reload.
540  // Take below code for example. %t would be reloaded before tilestore
541  // call
542  // ....
543  // tilestore %r, %c, %t
544  // -->
545  // call
546  // ldtilecfg
547  // %t = tileload %r, %c
548  // tilestore %r, %c, %t
549  if (HasTileOperand(MRI, MI))
550  HasUnconfigTile = true;
551  // According to AMX ABI, all the tile registers including config register
552  // are volatile. Caller need to save/restore config register.
553  if (MI.isCall() && HasUnconfigTile) {
555  if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
556  I = ++LastShapeMI->getIterator();
557  else
558  I = ++MI.getIterator();
559  Config(*I);
560  HasUnconfigTile = false;
561  continue;
562  }
563  if (!isTileDef(MRI, MI))
564  continue;
565  //
566  //---------------------------------------------------------------------
567  // Don't handle COPY instruction. If the src and dst of the COPY can be
568  // in the same config in below case, we just check the shape of t0.
569  // def row0
570  // def col0
571  // ldtilecfg
572  // t0 = tielzero(row0, col0)
573  // t1 = copy t0
574  // ...
575  // If the src and dst of the COPY can NOT be in the same config in below
576  // case. Reload would be generated befor the copy instruction.
577  // def row0
578  // def col0
579  // t0 = tielzero(row0, col0)
580  // spill t0
581  // ...
582  // def row1
583  // def col1
584  // ldtilecfg
585  // t1 = tilezero(row1, col1)
586  // reload t0
587  // t1 = copy t0
588  //---------------------------------------------------------------------
589  //
590  // If MI dominate the last shape def instruction, we need insert
591  // ldtilecfg after LastShapeMI now. The config doesn't include
592  // current MI.
593  // def row0
594  // def col0
595  // tilezero(row0, col0) <- MI
596  // def row1
597  // def col1
598  // ldtilecfg <- insert
599  // tilezero(row1, col1)
600  if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
601  Config(*(++LastShapeMI->getIterator()));
602  MachineOperand *RowMO = &MI.getOperand(1);
603  MachineOperand *ColMO = &MI.getOperand(2);
604  MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
605  MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
606  // If the shape is defined in current MBB, check the domination.
607  // FIXME how about loop?
608  if (RowMI->getParent() == &MBB) {
609  if (!LastShapeMI)
610  LastShapeMI = RowMI;
611  else if (dominates(MBB, LastShapeMI, RowMI))
612  LastShapeMI = RowMI;
613  }
614  if (ColMI->getParent() == &MBB) {
615  if (!LastShapeMI)
616  LastShapeMI = ColMI;
617  else if (dominates(MBB, LastShapeMI, ColMI))
618  LastShapeMI = ColMI;
619  }
620  // If there is user live out of the tilecfg, spill it and reload in
621  // before the user.
622  Register TileReg = MI.getOperand(0).getReg();
623  if (mayLiveOut(TileReg, LastTileCfg))
624  spill(++MI.getIterator(), TileReg, false);
625  for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
626  if (UseMI.getParent() == &MBB) {
627  // check user should not across ldtilecfg
628  if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
629  continue;
630  // reload befor UseMI
631  reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
632  } else {
633  // Don't reload for phi instruction, we handle phi reload separately.
634  // TODO: merge the reload for the same user MBB.
635  if (!UseMI.isPHI())
636  reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
637  }
638  }
639  }
640 
641  // Configure tile registers at the head of the MBB
642  if (HasUnconfigTile) {
643  MachineInstr *Before;
644  if (LastShapeMI == nullptr || LastShapeMI->isPHI())
645  Before = &*MBB.getFirstNonPHI();
646  else
647  Before = &*(++LastShapeMI->getIterator());
648 
649  Config(*Before);
650  }
651 
652  return Change;
653 }
654 
655 bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
656  MF = &MFunc;
657  MRI = &MFunc.getRegInfo();
658  ST = &MFunc.getSubtarget<X86Subtarget>();
659  TII = ST->getInstrInfo();
660  X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
661  MFI = &MFunc.getFrameInfo();
662  TRI = ST->getRegisterInfo();
663  CfgSS = -1;
664 
665  unsigned NumVirtRegs = MRI->getNumVirtRegs();
666  // Abandon early if there is no tile register to config.
667  bool HasVirtTileReg = false;
668  for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
670  if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
671  HasVirtTileReg = true;
672  break;
673  }
674  }
675  if (!HasVirtTileReg)
676  return false;
677 
678  StackSlotForVirtReg.resize(NumVirtRegs);
679  MayLiveAcrossBlocks.clear();
680  // We will create register during config. *3 is to make sure
681  // the virtual register number doesn't exceed the size of
682  // the bit vector.
683  MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
684  bool Change = false;
685  assert(MRI->isSSA());
686 
687  // Canonicalize the phi node first.
688  for (MachineBasicBlock &MBB : MFunc)
689  canonicalizePHIs(MBB);
690 
691  // Loop over all of the basic blocks in reverse post order and insert
692  // ldtilecfg for tile registers. The reserse post order is to facilitate
693  // PHI node convert.
695  for (MachineBasicBlock *MBB : RPOT) {
696  convertPHIs(*MBB);
697  Change |= configBasicBlock(*MBB);
698  }
699 
700  if (Change)
701  InitializeTileConfigStackSpace();
702 
703  StackSlotForVirtReg.clear();
704  return Change;
705 }
706 
708  return new X86FastPreTileConfig();
709 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:72
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::ShapeT::getRow
MachineOperand * getRow() const
Definition: TileShapeInfo.h:57
X86Subtarget.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
isTileRegDef
static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI)
Definition: X86FastPreTileConfig.cpp:425
X86InstrBuilder.h
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1183
Statistic.h
llvm::X86Subtarget
Definition: X86Subtarget.h:52
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:551
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:493
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:381
TargetInstrInfo.h
isTileDef
static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI)
Definition: X86FastPreTileConfig.cpp:271
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1291
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:770
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
llvm::ShapeT
Definition: TileShapeInfo.h:30
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
MachineRegisterInfo.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
X86MachineFunctionInfo.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
X86.h
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:96
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
DEBUG_TYPE
#define DEBUG_TYPE
Definition: X86FastPreTileConfig.cpp:38
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
llvm::addDirectMem
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
Definition: X86InstrBuilder.h:124
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
llvm::IndexedMap
Definition: IndexedMap.h:30
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::addFrameReference
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
Definition: PPCInstrBuilder.h:32
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::ShapeT::getCol
MachineOperand * getCol() const
Definition: TileShapeInfo.h:59
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::BitVector
Definition: BitVector.h:75
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::addOffset
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
Definition: X86InstrBuilder.h:143
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::MachineRegisterInfo::isSSA
bool isSSA() const
Definition: MachineRegisterInfo.h:183
Passes.h
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::TargetRegisterInfo::getSpillAlign
Align getSpillAlign(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class.
Definition: TargetRegisterInfo.h:289
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition: MachineInstrBuilder.h:152
llvm::TargetRegisterInfo::getSpillSize
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
Definition: TargetRegisterInfo.h:283
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
getShape
static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg)
Definition: X86FastPreTileConfig.cpp:291
llvm::HexagonInstrInfo::storeRegToStackSlot
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
Store the specified register of the given register class to the specified stack frame index.
Definition: HexagonInstrInfo.cpp:955
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:196
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:672
llvm::X86MachineFunctionInfo
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Definition: X86MachineFunctionInfo.h:25
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1255
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineFunction
Definition: MachineFunction.h:257
Preconfigure
Fast Tile Register Preconfigure
Definition: X86FastPreTileConfig.cpp:101
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE, "Fast Tile Register Preconfigure", false, false) INITIALIZE_PASS_END(X86FastPreTileConfig
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::X86AS::SS
@ SS
Definition: X86.h:197
MachineFrameInfo.h
llvm::MachineBasicBlock::front
MachineInstr & front()
Definition: MachineBasicBlock.h:284
llvm::createX86FastPreTileConfigPass
FunctionPass * createX86FastPreTileConfigPass()
Return a pass that preconfig the tile registers before fast reg allocation.
Definition: X86FastPreTileConfig.cpp:707
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:291
X86RegisterInfo.h
dominates
Fast Tile Register static false bool dominates(MachineBasicBlock &MBB, MachineBasicBlock::const_iterator A, MachineBasicBlock::const_iterator B)
Definition: X86FastPreTileConfig.cpp:103
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
PostOrderIterator.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:53
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:104
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:494
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:661
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::Register::virtReg2Index
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
spill
the custom lowered code happens to be but we shouldn t have to custom lower anything This is probably related to< 2 x i64 > ops being so bad LLVM currently generates stack realignment when it is not necessary needed The problem is that we need to know about stack alignment too before RA runs At that point we don t whether there will be vector spill
Definition: README-SSE.txt:489
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:655
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:616
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38