LLVM  7.0.0svn
SILoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===- SILoadStoreOptimizer.cpp -------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass tries to fuse DS instructions with close by immediate offsets.
11 // This will fuse operations such as
12 // ds_read_b32 v0, v2 offset:16
13 // ds_read_b32 v1, v2 offset:32
14 // ==>
15 // ds_read2_b32 v[0:1], v2, offset0:4 offset1:8
16 //
17 // The same is done for certain SMEM and VMEM opcodes, e.g.:
18 // s_buffer_load_dword s4, s[0:3], 4
19 // s_buffer_load_dword s5, s[0:3], 8
20 // ==>
21 // s_buffer_load_dwordx2 s[4:5], s[0:3], 4
22 //
23 //
24 // Future improvements:
25 //
26 // - This currently relies on the scheduler to place loads and stores next to
27 // each other, and then only merges adjacent pairs of instructions. It would
28 // be good to be more flexible with interleaved instructions, and possibly run
29 // before scheduling. It currently missing stores of constants because loading
30 // the constant into the data register is placed between the stores, although
31 // this is arguably a scheduling problem.
32 //
33 // - Live interval recomputing seems inefficient. This currently only matches
34 // one pair, and recomputes live intervals and moves on to the next pair. It
35 // would be better to compute a list of all merges that need to occur.
36 //
37 // - With a list of instructions to process, we can also merge more. If a
38 // cluster of loads have offsets that are too large to fit in the 8-bit
39 // offsets, but are close enough to fit in the 8 bits, we can add to the base
40 // pointer and use the new reduced offsets.
41 //
42 //===----------------------------------------------------------------------===//
43 
44 #include "AMDGPU.h"
45 #include "AMDGPUSubtarget.h"
46 #include "SIInstrInfo.h"
47 #include "SIRegisterInfo.h"
48 #include "Utils/AMDGPUBaseInfo.h"
49 #include "llvm/ADT/ArrayRef.h"
50 #include "llvm/ADT/SmallVector.h"
51 #include "llvm/ADT/StringRef.h"
60 #include "llvm/IR/DebugLoc.h"
61 #include "llvm/Pass.h"
62 #include "llvm/Support/Debug.h"
65 #include <algorithm>
66 #include <cassert>
67 #include <cstdlib>
68 #include <iterator>
69 #include <utility>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "si-load-store-opt"
74 
75 namespace {
76 
77 class SILoadStoreOptimizer : public MachineFunctionPass {
78  enum InstClassEnum {
79  DS_READ_WRITE,
80  S_BUFFER_LOAD_IMM,
81  BUFFER_LOAD_OFFEN,
82  BUFFER_LOAD_OFFSET,
83  BUFFER_STORE_OFFEN,
84  BUFFER_STORE_OFFSET,
85  };
86 
87  struct CombineInfo {
90  unsigned EltSize;
91  unsigned Offset0;
92  unsigned Offset1;
93  unsigned BaseOff;
94  InstClassEnum InstClass;
95  bool GLC0;
96  bool GLC1;
97  bool SLC0;
98  bool SLC1;
99  bool UseST64;
100  bool IsX2;
101  SmallVector<MachineInstr*, 8> InstsToMove;
102  };
103 
104 private:
105  const SISubtarget *STM = nullptr;
106  const SIInstrInfo *TII = nullptr;
107  const SIRegisterInfo *TRI = nullptr;
108  MachineRegisterInfo *MRI = nullptr;
109  AliasAnalysis *AA = nullptr;
110  unsigned CreatedX2;
111 
112  static bool offsetsCanBeCombined(CombineInfo &CI);
113 
114  bool findMatchingInst(CombineInfo &CI);
115 
116  unsigned read2Opcode(unsigned EltSize) const;
117  unsigned read2ST64Opcode(unsigned EltSize) const;
118  MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI);
119 
120  unsigned write2Opcode(unsigned EltSize) const;
121  unsigned write2ST64Opcode(unsigned EltSize) const;
122  MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
123  MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
124  MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
125  unsigned promoteBufferStoreOpcode(const MachineInstr &I, bool &IsX2,
126  bool &IsOffen) const;
127  MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI);
128 
129 public:
130  static char ID;
131 
132  SILoadStoreOptimizer() : MachineFunctionPass(ID) {
134  }
135 
136  bool optimizeBlock(MachineBasicBlock &MBB);
137 
138  bool runOnMachineFunction(MachineFunction &MF) override;
139 
140  StringRef getPassName() const override { return "SI Load Store Optimizer"; }
141 
142  void getAnalysisUsage(AnalysisUsage &AU) const override {
143  AU.setPreservesCFG();
145 
147  }
148 };
149 
150 } // end anonymous namespace.
151 
152 INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
153  "SI Load Store Optimizer", false, false)
155 INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
157 
158 char SILoadStoreOptimizer::ID = 0;
159 
160 char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID;
161 
163  return new SILoadStoreOptimizer();
164 }
165 
167  ArrayRef<MachineInstr*> InstsToMove) {
168  MachineBasicBlock *MBB = I->getParent();
169  ++I;
170  for (MachineInstr *MI : InstsToMove) {
171  MI->removeFromParent();
172  MBB->insert(I, MI);
173  }
174 }
175 
176 static void addDefsUsesToList(const MachineInstr &MI,
177  DenseSet<unsigned> &RegDefs,
178  DenseSet<unsigned> &PhysRegUses) {
179  for (const MachineOperand &Op : MI.operands()) {
180  if (Op.isReg()) {
181  if (Op.isDef())
182  RegDefs.insert(Op.getReg());
183  else if (Op.readsReg() &&
185  PhysRegUses.insert(Op.getReg());
186  }
187  }
188 }
189 
192  const SIInstrInfo *TII,
193  AliasAnalysis * AA) {
194  // RAW or WAR - cannot reorder
195  // WAW - cannot reorder
196  // RAR - safe to reorder
197  return !(A->mayStore() || B->mayStore()) ||
198  TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
199 }
200 
201 // Add MI and its defs to the lists if MI reads one of the defs that are
202 // already in the list. Returns true in that case.
203 static bool
205  DenseSet<unsigned> &RegDefs,
206  DenseSet<unsigned> &PhysRegUses,
208  for (MachineOperand &Use : MI.operands()) {
209  // If one of the defs is read, then there is a use of Def between I and the
210  // instruction that I will potentially be merged with. We will need to move
211  // this instruction after the merged instructions.
212  //
213  // Similarly, if there is a def which is read by an instruction that is to
214  // be moved for merging, then we need to move the def-instruction as well.
215  // This can only happen for physical registers such as M0; virtual
216  // registers are in SSA form.
217  if (Use.isReg() &&
218  ((Use.readsReg() && RegDefs.count(Use.getReg())) ||
219  (Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) &&
220  PhysRegUses.count(Use.getReg())))) {
221  Insts.push_back(&MI);
222  addDefsUsesToList(MI, RegDefs, PhysRegUses);
223  return true;
224  }
225  }
226 
227  return false;
228 }
229 
230 static bool
232  ArrayRef<MachineInstr*> InstsToMove,
233  const SIInstrInfo *TII,
234  AliasAnalysis *AA) {
235  assert(MemOp.mayLoadOrStore());
236 
237  for (MachineInstr *InstToMove : InstsToMove) {
238  if (!InstToMove->mayLoadOrStore())
239  continue;
240  if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA))
241  return false;
242  }
243  return true;
244 }
245 
246 bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
247  // XXX - Would the same offset be OK? Is there any reason this would happen or
248  // be useful?
249  if (CI.Offset0 == CI.Offset1)
250  return false;
251 
252  // This won't be valid if the offset isn't aligned.
253  if ((CI.Offset0 % CI.EltSize != 0) || (CI.Offset1 % CI.EltSize != 0))
254  return false;
255 
256  unsigned EltOffset0 = CI.Offset0 / CI.EltSize;
257  unsigned EltOffset1 = CI.Offset1 / CI.EltSize;
258  CI.UseST64 = false;
259  CI.BaseOff = 0;
260 
261  // Handle SMEM and VMEM instructions.
262  if (CI.InstClass != DS_READ_WRITE) {
263  unsigned Diff = CI.IsX2 ? 2 : 1;
264  return (EltOffset0 + Diff == EltOffset1 ||
265  EltOffset1 + Diff == EltOffset0) &&
266  CI.GLC0 == CI.GLC1 &&
267  (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
268  }
269 
270  // If the offset in elements doesn't fit in 8-bits, we might be able to use
271  // the stride 64 versions.
272  if ((EltOffset0 % 64 == 0) && (EltOffset1 % 64) == 0 &&
273  isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64)) {
274  CI.Offset0 = EltOffset0 / 64;
275  CI.Offset1 = EltOffset1 / 64;
276  CI.UseST64 = true;
277  return true;
278  }
279 
280  // Check if the new offsets fit in the reduced 8-bit range.
281  if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1)) {
282  CI.Offset0 = EltOffset0;
283  CI.Offset1 = EltOffset1;
284  return true;
285  }
286 
287  // Try to shift base address to decrease offsets.
288  unsigned OffsetDiff = std::abs((int)EltOffset1 - (int)EltOffset0);
289  CI.BaseOff = std::min(CI.Offset0, CI.Offset1);
290 
291  if ((OffsetDiff % 64 == 0) && isUInt<8>(OffsetDiff / 64)) {
292  CI.Offset0 = (EltOffset0 - CI.BaseOff / CI.EltSize) / 64;
293  CI.Offset1 = (EltOffset1 - CI.BaseOff / CI.EltSize) / 64;
294  CI.UseST64 = true;
295  return true;
296  }
297 
298  if (isUInt<8>(OffsetDiff)) {
299  CI.Offset0 = EltOffset0 - CI.BaseOff / CI.EltSize;
300  CI.Offset1 = EltOffset1 - CI.BaseOff / CI.EltSize;
301  return true;
302  }
303 
304  return false;
305 }
306 
307 bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
308  MachineBasicBlock *MBB = CI.I->getParent();
310  MachineBasicBlock::iterator MBBI = CI.I;
311 
312  unsigned AddrOpName[3] = {0};
313  int AddrIdx[3];
314  const MachineOperand *AddrReg[3];
315  unsigned NumAddresses = 0;
316 
317  switch (CI.InstClass) {
318  case DS_READ_WRITE:
319  AddrOpName[NumAddresses++] = AMDGPU::OpName::addr;
320  break;
321  case S_BUFFER_LOAD_IMM:
322  AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase;
323  break;
324  case BUFFER_LOAD_OFFEN:
325  case BUFFER_STORE_OFFEN:
326  AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
327  AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
328  AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
329  break;
330  case BUFFER_LOAD_OFFSET:
331  case BUFFER_STORE_OFFSET:
332  AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
333  AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
334  break;
335  }
336 
337  for (unsigned i = 0; i < NumAddresses; i++) {
338  AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]);
339  AddrReg[i] = &CI.I->getOperand(AddrIdx[i]);
340 
341  // We only ever merge operations with the same base address register, so don't
342  // bother scanning forward if there are no other uses.
343  if (AddrReg[i]->isReg() &&
345  MRI->hasOneNonDBGUse(AddrReg[i]->getReg())))
346  return false;
347  }
348 
349  ++MBBI;
350 
351  DenseSet<unsigned> RegDefsToMove;
352  DenseSet<unsigned> PhysRegUsesToMove;
353  addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
354 
355  for ( ; MBBI != E; ++MBBI) {
356  if (MBBI->getOpcode() != CI.I->getOpcode()) {
357  // This is not a matching DS instruction, but we can keep looking as
358  // long as one of these conditions are met:
359  // 1. It is safe to move I down past MBBI.
360  // 2. It is safe to move MBBI down past the instruction that I will
361  // be merged into.
362 
363  if (MBBI->hasUnmodeledSideEffects()) {
364  // We can't re-order this instruction with respect to other memory
365  // operations, so we fail both conditions mentioned above.
366  return false;
367  }
368 
369  if (MBBI->mayLoadOrStore() &&
370  (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
371  !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
372  // We fail condition #1, but we may still be able to satisfy condition
373  // #2. Add this instruction to the move list and then we will check
374  // if condition #2 holds once we have selected the matching instruction.
375  CI.InstsToMove.push_back(&*MBBI);
376  addDefsUsesToList(*MBBI, RegDefsToMove, PhysRegUsesToMove);
377  continue;
378  }
379 
380  // When we match I with another DS instruction we will be moving I down
381  // to the location of the matched instruction any uses of I will need to
382  // be moved down as well.
383  addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove,
384  CI.InstsToMove);
385  continue;
386  }
387 
388  // Don't merge volatiles.
389  if (MBBI->hasOrderedMemoryRef())
390  return false;
391 
392  // Handle a case like
393  // DS_WRITE_B32 addr, v, idx0
394  // w = DS_READ_B32 addr, idx0
395  // DS_WRITE_B32 addr, f(w), idx1
396  // where the DS_READ_B32 ends up in InstsToMove and therefore prevents
397  // merging of the two writes.
398  if (addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove,
399  CI.InstsToMove))
400  continue;
401 
402  bool Match = true;
403  for (unsigned i = 0; i < NumAddresses; i++) {
404  const MachineOperand &AddrRegNext = MBBI->getOperand(AddrIdx[i]);
405 
406  if (AddrReg[i]->isImm() || AddrRegNext.isImm()) {
407  if (AddrReg[i]->isImm() != AddrRegNext.isImm() ||
408  AddrReg[i]->getImm() != AddrRegNext.getImm()) {
409  Match = false;
410  break;
411  }
412  continue;
413  }
414 
415  // Check same base pointer. Be careful of subregisters, which can occur with
416  // vectors of pointers.
417  if (AddrReg[i]->getReg() != AddrRegNext.getReg() ||
418  AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) {
419  Match = false;
420  break;
421  }
422  }
423 
424  if (Match) {
425  int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
426  AMDGPU::OpName::offset);
427  CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm();
428  CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm();
429  CI.Paired = MBBI;
430 
431  if (CI.InstClass == DS_READ_WRITE) {
432  CI.Offset0 &= 0xffff;
433  CI.Offset1 &= 0xffff;
434  } else {
435  CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm();
436  CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm();
437  if (CI.InstClass != S_BUFFER_LOAD_IMM) {
438  CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
439  CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
440  }
441  }
442 
443  // Check both offsets fit in the reduced range.
444  // We also need to go through the list of instructions that we plan to
445  // move and make sure they are all safe to move down past the merged
446  // instruction.
447  if (offsetsCanBeCombined(CI))
448  if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
449  return true;
450  }
451 
452  // We've found a load/store that we couldn't merge for some reason.
453  // We could potentially keep looking, but we'd need to make sure that
454  // it was safe to move I and also all the instruction in InstsToMove
455  // down past this instruction.
456  // check if we can move I across MBBI and if we can move all I's users
457  if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
458  !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
459  break;
460  }
461  return false;
462 }
463 
464 unsigned SILoadStoreOptimizer::read2Opcode(unsigned EltSize) const {
465  if (STM->ldsRequiresM0Init())
466  return (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
467  return (EltSize == 4) ? AMDGPU::DS_READ2_B32_gfx9 : AMDGPU::DS_READ2_B64_gfx9;
468 }
469 
470 unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const {
471  if (STM->ldsRequiresM0Init())
472  return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
473 
474  return (EltSize == 4) ?
475  AMDGPU::DS_READ2ST64_B32_gfx9 : AMDGPU::DS_READ2ST64_B64_gfx9;
476 }
477 
478 MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
479  CombineInfo &CI) {
480  MachineBasicBlock *MBB = CI.I->getParent();
481 
482  // Be careful, since the addresses could be subregisters themselves in weird
483  // cases, like vectors of pointers.
484  const auto *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
485 
486  const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst);
487  const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdst);
488 
489  unsigned NewOffset0 = CI.Offset0;
490  unsigned NewOffset1 = CI.Offset1;
491  unsigned Opc = CI.UseST64 ?
492  read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
493 
494  unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
495  unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
496 
497  if (NewOffset0 > NewOffset1) {
498  // Canonicalize the merged instruction so the smaller offset comes first.
499  std::swap(NewOffset0, NewOffset1);
500  std::swap(SubRegIdx0, SubRegIdx1);
501  }
502 
503  assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
504  (NewOffset0 != NewOffset1) &&
505  "Computed offset doesn't fit");
506 
507  const MCInstrDesc &Read2Desc = TII->get(Opc);
508 
509  const TargetRegisterClass *SuperRC
510  = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
511  unsigned DestReg = MRI->createVirtualRegister(SuperRC);
512 
513  DebugLoc DL = CI.I->getDebugLoc();
514 
515  unsigned BaseReg = AddrReg->getReg();
516  unsigned BaseRegFlags = 0;
517  if (CI.BaseOff) {
518  unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
519  BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
520  .addImm(CI.BaseOff);
521 
522  BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
523  BaseRegFlags = RegState::Kill;
524 
525  TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
526  .addReg(ImmReg)
527  .addReg(AddrReg->getReg());
528  }
529 
530  MachineInstrBuilder Read2 =
531  BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
532  .addReg(BaseReg, BaseRegFlags) // addr
533  .addImm(NewOffset0) // offset0
534  .addImm(NewOffset1) // offset1
535  .addImm(0) // gds
536  .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
537 
538  (void)Read2;
539 
540  const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
541 
542  // Copy to the old destination registers.
543  BuildMI(*MBB, CI.Paired, DL, CopyDesc)
544  .add(*Dest0) // Copy to same destination including flags and sub reg.
545  .addReg(DestReg, 0, SubRegIdx0);
546  MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
547  .add(*Dest1)
548  .addReg(DestReg, RegState::Kill, SubRegIdx1);
549 
550  moveInstsAfter(Copy1, CI.InstsToMove);
551 
552  MachineBasicBlock::iterator Next = std::next(CI.I);
553  CI.I->eraseFromParent();
554  CI.Paired->eraseFromParent();
555 
556  DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
557  return Next;
558 }
559 
560 unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const {
561  if (STM->ldsRequiresM0Init())
562  return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
563  return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9 : AMDGPU::DS_WRITE2_B64_gfx9;
564 }
565 
566 unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const {
567  if (STM->ldsRequiresM0Init())
568  return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
569 
570  return (EltSize == 4) ?
571  AMDGPU::DS_WRITE2ST64_B32_gfx9 : AMDGPU::DS_WRITE2ST64_B64_gfx9;
572 }
573 
574 MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
575  CombineInfo &CI) {
576  MachineBasicBlock *MBB = CI.I->getParent();
577 
578  // Be sure to use .addOperand(), and not .addReg() with these. We want to be
579  // sure we preserve the subregister index and any register flags set on them.
580  const MachineOperand *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
581  const MachineOperand *Data0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
582  const MachineOperand *Data1
583  = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
584 
585  unsigned NewOffset0 = CI.Offset0;
586  unsigned NewOffset1 = CI.Offset1;
587  unsigned Opc = CI.UseST64 ?
588  write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
589 
590  if (NewOffset0 > NewOffset1) {
591  // Canonicalize the merged instruction so the smaller offset comes first.
592  std::swap(NewOffset0, NewOffset1);
593  std::swap(Data0, Data1);
594  }
595 
596  assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
597  (NewOffset0 != NewOffset1) &&
598  "Computed offset doesn't fit");
599 
600  const MCInstrDesc &Write2Desc = TII->get(Opc);
601  DebugLoc DL = CI.I->getDebugLoc();
602 
603  unsigned BaseReg = AddrReg->getReg();
604  unsigned BaseRegFlags = 0;
605  if (CI.BaseOff) {
606  unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
607  BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
608  .addImm(CI.BaseOff);
609 
610  BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
611  BaseRegFlags = RegState::Kill;
612 
613  TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
614  .addReg(ImmReg)
615  .addReg(AddrReg->getReg());
616  }
617 
618  MachineInstrBuilder Write2 =
619  BuildMI(*MBB, CI.Paired, DL, Write2Desc)
620  .addReg(BaseReg, BaseRegFlags) // addr
621  .add(*Data0) // data0
622  .add(*Data1) // data1
623  .addImm(NewOffset0) // offset0
624  .addImm(NewOffset1) // offset1
625  .addImm(0) // gds
626  .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
627 
628  moveInstsAfter(Write2, CI.InstsToMove);
629 
630  MachineBasicBlock::iterator Next = std::next(CI.I);
631  CI.I->eraseFromParent();
632  CI.Paired->eraseFromParent();
633 
634  DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
635  return Next;
636 }
637 
638 MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
639  CombineInfo &CI) {
640  MachineBasicBlock *MBB = CI.I->getParent();
641  DebugLoc DL = CI.I->getDebugLoc();
642  unsigned Opcode = CI.IsX2 ? AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM :
643  AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
644 
645  const TargetRegisterClass *SuperRC =
646  CI.IsX2 ? &AMDGPU::SReg_128RegClass : &AMDGPU::SReg_64_XEXECRegClass;
647  unsigned DestReg = MRI->createVirtualRegister(SuperRC);
648  unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
649 
650  BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg)
651  .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
652  .addImm(MergedOffset) // offset
653  .addImm(CI.GLC0) // glc
654  .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
655 
656  unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
657  unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
658 
659  // Handle descending offsets
660  if (CI.Offset0 > CI.Offset1)
661  std::swap(SubRegIdx0, SubRegIdx1);
662 
663  // Copy to the old destination registers.
664  const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
665  const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst);
666  const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::sdst);
667 
668  BuildMI(*MBB, CI.Paired, DL, CopyDesc)
669  .add(*Dest0) // Copy to same destination including flags and sub reg.
670  .addReg(DestReg, 0, SubRegIdx0);
671  MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
672  .add(*Dest1)
673  .addReg(DestReg, RegState::Kill, SubRegIdx1);
674 
675  moveInstsAfter(Copy1, CI.InstsToMove);
676 
677  MachineBasicBlock::iterator Next = std::next(CI.I);
678  CI.I->eraseFromParent();
679  CI.Paired->eraseFromParent();
680  return Next;
681 }
682 
683 MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
684  CombineInfo &CI) {
685  MachineBasicBlock *MBB = CI.I->getParent();
686  DebugLoc DL = CI.I->getDebugLoc();
687  unsigned Opcode;
688 
689  if (CI.InstClass == BUFFER_LOAD_OFFEN) {
690  Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN :
691  AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
692  } else {
693  Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET :
694  AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
695  }
696 
697  const TargetRegisterClass *SuperRC =
698  CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass;
699  unsigned DestReg = MRI->createVirtualRegister(SuperRC);
700  unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
701 
702  auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
703 
704  if (CI.InstClass == BUFFER_LOAD_OFFEN)
705  MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
706 
707  MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
708  .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
709  .addImm(MergedOffset) // offset
710  .addImm(CI.GLC0) // glc
711  .addImm(CI.SLC0) // slc
712  .addImm(0) // tfe
713  .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
714 
715  unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
716  unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
717 
718  // Handle descending offsets
719  if (CI.Offset0 > CI.Offset1)
720  std::swap(SubRegIdx0, SubRegIdx1);
721 
722  // Copy to the old destination registers.
723  const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
724  const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
725  const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
726 
727  BuildMI(*MBB, CI.Paired, DL, CopyDesc)
728  .add(*Dest0) // Copy to same destination including flags and sub reg.
729  .addReg(DestReg, 0, SubRegIdx0);
730  MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
731  .add(*Dest1)
732  .addReg(DestReg, RegState::Kill, SubRegIdx1);
733 
734  moveInstsAfter(Copy1, CI.InstsToMove);
735 
736  MachineBasicBlock::iterator Next = std::next(CI.I);
737  CI.I->eraseFromParent();
738  CI.Paired->eraseFromParent();
739  return Next;
740 }
741 
742 unsigned SILoadStoreOptimizer::promoteBufferStoreOpcode(
743  const MachineInstr &I, bool &IsX2, bool &IsOffen) const {
744  IsX2 = false;
745  IsOffen = false;
746 
747  switch (I.getOpcode()) {
748  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
749  IsOffen = true;
750  return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
751  case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
752  IsOffen = true;
753  return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN_exact;
754  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
755  IsX2 = true;
756  IsOffen = true;
757  return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
758  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN_exact:
759  IsX2 = true;
760  IsOffen = true;
761  return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN_exact;
762  case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
763  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
764  case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
765  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET_exact;
766  case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
767  IsX2 = true;
768  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
769  case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET_exact:
770  IsX2 = true;
771  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET_exact;
772  }
773  return 0;
774 }
775 
776 MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
777  CombineInfo &CI) {
778  MachineBasicBlock *MBB = CI.I->getParent();
779  DebugLoc DL = CI.I->getDebugLoc();
780  bool Unused1, Unused2;
781  unsigned Opcode = promoteBufferStoreOpcode(*CI.I, Unused1, Unused2);
782 
783  unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
784  unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
785 
786  // Handle descending offsets
787  if (CI.Offset0 > CI.Offset1)
788  std::swap(SubRegIdx0, SubRegIdx1);
789 
790  // Copy to the new source register.
791  const TargetRegisterClass *SuperRC =
792  CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass;
793  unsigned SrcReg = MRI->createVirtualRegister(SuperRC);
794 
795  const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
796  const auto *Src1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
797 
798  BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
799  .add(*Src0)
800  .addImm(SubRegIdx0)
801  .add(*Src1)
802  .addImm(SubRegIdx1);
803 
804  auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode))
805  .addReg(SrcReg, RegState::Kill);
806 
807  if (CI.InstClass == BUFFER_STORE_OFFEN)
808  MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
809 
810  MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
811  .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
812  .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
813  .addImm(CI.GLC0) // glc
814  .addImm(CI.SLC0) // slc
815  .addImm(0) // tfe
816  .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
817 
818  moveInstsAfter(MIB, CI.InstsToMove);
819 
820  MachineBasicBlock::iterator Next = std::next(CI.I);
821  CI.I->eraseFromParent();
822  CI.Paired->eraseFromParent();
823  return Next;
824 }
825 
826 // Scan through looking for adjacent LDS operations with constant offsets from
827 // the same base register. We rely on the scheduler to do the hard work of
828 // clustering nearby loads, and assume these are all adjacent.
829 bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
830  bool Modified = false;
831 
832  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
833  MachineInstr &MI = *I;
834 
835  // Don't combine if volatile.
836  if (MI.hasOrderedMemoryRef()) {
837  ++I;
838  continue;
839  }
840 
841  CombineInfo CI;
842  CI.I = I;
843  unsigned Opc = MI.getOpcode();
844  if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64 ||
845  Opc == AMDGPU::DS_READ_B32_gfx9 || Opc == AMDGPU::DS_READ_B64_gfx9) {
846 
847  CI.InstClass = DS_READ_WRITE;
848  CI.EltSize =
849  (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 : 4;
850 
851  if (findMatchingInst(CI)) {
852  Modified = true;
853  I = mergeRead2Pair(CI);
854  } else {
855  ++I;
856  }
857 
858  continue;
859  } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64 ||
860  Opc == AMDGPU::DS_WRITE_B32_gfx9 ||
861  Opc == AMDGPU::DS_WRITE_B64_gfx9) {
862  CI.InstClass = DS_READ_WRITE;
863  CI.EltSize
864  = (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 : 4;
865 
866  if (findMatchingInst(CI)) {
867  Modified = true;
868  I = mergeWrite2Pair(CI);
869  } else {
870  ++I;
871  }
872 
873  continue;
874  }
875  if (Opc == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
876  Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM) {
877  // EltSize is in units of the offset encoding.
878  CI.InstClass = S_BUFFER_LOAD_IMM;
879  CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4);
880  CI.IsX2 = Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
881  if (findMatchingInst(CI)) {
882  Modified = true;
883  I = mergeSBufferLoadImmPair(CI);
884  if (!CI.IsX2)
885  CreatedX2++;
886  } else {
887  ++I;
888  }
889  continue;
890  }
891  if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN ||
892  Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN ||
893  Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFSET ||
894  Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET) {
895  if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN ||
896  Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN)
897  CI.InstClass = BUFFER_LOAD_OFFEN;
898  else
899  CI.InstClass = BUFFER_LOAD_OFFSET;
900 
901  CI.EltSize = 4;
902  CI.IsX2 = Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN ||
903  Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
904  if (findMatchingInst(CI)) {
905  Modified = true;
906  I = mergeBufferLoadPair(CI);
907  if (!CI.IsX2)
908  CreatedX2++;
909  } else {
910  ++I;
911  }
912  continue;
913  }
914 
915  bool StoreIsX2, IsOffen;
916  if (promoteBufferStoreOpcode(*I, StoreIsX2, IsOffen)) {
917  CI.InstClass = IsOffen ? BUFFER_STORE_OFFEN : BUFFER_STORE_OFFSET;
918  CI.EltSize = 4;
919  CI.IsX2 = StoreIsX2;
920  if (findMatchingInst(CI)) {
921  Modified = true;
922  I = mergeBufferStorePair(CI);
923  if (!CI.IsX2)
924  CreatedX2++;
925  } else {
926  ++I;
927  }
928  continue;
929  }
930 
931  ++I;
932  }
933 
934  return Modified;
935 }
936 
937 bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
938  if (skipFunction(MF.getFunction()))
939  return false;
940 
941  STM = &MF.getSubtarget<SISubtarget>();
942  if (!STM->loadStoreOptEnabled())
943  return false;
944 
945  TII = STM->getInstrInfo();
946  TRI = &TII->getRegisterInfo();
947 
948  MRI = &MF.getRegInfo();
949  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
950 
951  assert(MRI->isSSA() && "Must be run on SSA");
952 
953  DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
954 
955  bool Modified = false;
956 
957  for (MachineBasicBlock &MBB : MF) {
958  CreatedX2 = 0;
959  Modified |= optimizeBlock(MBB);
960 
961  // Run again to convert x2 to x4.
962  if (CreatedX2 >= 1)
963  Modified |= optimizeBlock(MBB);
964  }
965 
966  return Modified;
967 }
static bool isReg(const MCInst &MI, unsigned OpNo)
void push_back(const T &Elt)
Definition: SmallVector.h:212
const MachineInstrBuilder & add(const MachineOperand &MO) const
Interface definition for SIRegisterInfo.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
char & SILoadStoreOptimizerID
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getSubReg() const
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:652
A debug info location.
Definition: DebugLoc.h:34
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:335
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A, MachineBasicBlock::iterator B, const SIInstrInfo *TII, AliasAnalysis *AA)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
#define DEBUG_TYPE
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:335
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:187
Represent the analysis usage information of a pass.
SI Load Store Optimizer
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static void moveInstsAfter(MachineBasicBlock::iterator I, ArrayRef< MachineInstr *> InstsToMove)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
static bool addToListsIfDependent(MachineInstr &MI, DenseSet< unsigned > &RegDefs, DenseSet< unsigned > &PhysRegUses, SmallVectorImpl< MachineInstr *> &Insts)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE, "SI Load Store Optimizer", false, false) INITIALIZE_PASS_END(SILoadStoreOptimizer
void initializeSILoadStoreOptimizerPass(PassRegistry &)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:60
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp, ArrayRef< MachineInstr *> InstsToMove, const SIInstrInfo *TII, AliasAnalysis *AA)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
FunctionPass * createSILoadStoreOptimizerPass()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & setMemRefs(MachineInstr::mmo_iterator b, MachineInstr::mmo_iterator e) const
static void addDefsUsesToList(const MachineInstr &MI, DenseSet< unsigned > &RegDefs, DenseSet< unsigned > &PhysRegUses)