LLVM  7.0.0svn
AMDGPUInstrInfo.cpp
Go to the documentation of this file.
1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implementation of the TargetInstrInfo class that is common to all
12 /// AMD GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
22 
23 using namespace llvm;
24 
25 #define GET_INSTRINFO_CTOR_DTOR
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 // Pin the vtable to this file.
29 void AMDGPUInstrInfo::anchor() {}
30 
32  : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
33  ST(ST),
34  AMDGPUASI(ST.getAMDGPUAS()) {}
35 
36 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
37 // the first 16 loads will be interleaved with the stores, and the next 16 will
38 // be clustered as expected. It should really split into 2 16 store batches.
39 //
40 // Loads are clustered until this returns false, rather than trying to schedule
41 // groups of stores. This also means we have to deal with saying different
42 // address space loads should be clustered, and ones which might cause bank
43 // conflicts.
44 //
45 // This might be deprecated so it might not be worth that much effort to fix.
47  int64_t Offset0, int64_t Offset1,
48  unsigned NumLoads) const {
49  assert(Offset1 > Offset0 &&
50  "Second offset should be larger than first offset!");
51  // If we have less than 16 loads in a row, and the offsets are within 64
52  // bytes, then schedule together.
53 
54  // A cacheline is 64 bytes (for global memory).
55  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
56 }
57 
58 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
60  SI = 0,
61  VI = 1,
62  SDWA = 2,
63  SDWA9 = 3,
64  GFX80 = 4,
65  GFX9 = 5
66 };
67 
69  switch (ST.getGeneration()) {
72  return SIEncodingFamily::SI;
75  return SIEncodingFamily::VI;
76 
77  // FIXME: This should never be called for r600 GPUs.
82  return SIEncodingFamily::SI;
83  }
84 
85  llvm_unreachable("Unknown subtarget generation!");
86 }
87 
88 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
90 
91  if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
94 
95  if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
98 
99  int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
100 
101  // -1 means that Opcode is already a native instruction.
102  if (MCOp == -1)
103  return Opcode;
104 
105  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
106  // no encoding in the given subtarget generation.
107  if (MCOp == (uint16_t)-1)
108  return -1;
109 
110  return MCOp;
111 }
Generation getGeneration() const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST)
AMDGPUAS getAMDGPUAS(const Module &M)
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
AMDGPUInstrInfo(const AMDGPUSubtarget &st)
TargetRegisterInfo interface that is implemented by all hw codegen targets.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
SIEncodingFamily
The AMDGPU TargetMachine interface definition for hw codgen targets.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Represents one node in the SelectionDAG.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())