LLVM  7.0.0svn
AMDGPUInstrInfo.cpp
Go to the documentation of this file.
1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Implementation of the TargetInstrInfo class that is common to all
12 /// AMD GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
23 
24 using namespace llvm;
25 
26 #define GET_INSTRINFO_CTOR_DTOR
27 #include "AMDGPUGenInstrInfo.inc"
28 
29 namespace llvm {
30 namespace AMDGPU {
31 #define GET_RSRCINTRINSIC_IMPL
32 #include "AMDGPUGenSearchableTables.inc"
33 
34 #define GET_D16IMAGEDIMINTRINSIC_IMPL
35 #include "AMDGPUGenSearchableTables.inc"
36 }
37 }
38 
39 // Pin the vtable to this file.
40 void AMDGPUInstrInfo::anchor() {}
41 
43  : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
44  ST(ST),
45  AMDGPUASI(ST.getAMDGPUAS()) {}
46 
47 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
48 // the first 16 loads will be interleaved with the stores, and the next 16 will
49 // be clustered as expected. It should really split into 2 16 store batches.
50 //
51 // Loads are clustered until this returns false, rather than trying to schedule
52 // groups of stores. This also means we have to deal with saying different
53 // address space loads should be clustered, and ones which might cause bank
54 // conflicts.
55 //
56 // This might be deprecated so it might not be worth that much effort to fix.
58  int64_t Offset0, int64_t Offset1,
59  unsigned NumLoads) const {
60  assert(Offset1 > Offset0 &&
61  "Second offset should be larger than first offset!");
62  // If we have less than 16 loads in a row, and the offsets are within 64
63  // bytes, then schedule together.
64 
65  // A cacheline is 64 bytes (for global memory).
66  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
67 }
68 
69 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
71  SI = 0,
72  VI = 1,
73  SDWA = 2,
74  SDWA9 = 3,
75  GFX80 = 4,
76  GFX9 = 5
77 };
78 
80  switch (ST.getGeneration()) {
83  return SIEncodingFamily::SI;
86  return SIEncodingFamily::VI;
87 
88  // FIXME: This should never be called for r600 GPUs.
93  return SIEncodingFamily::SI;
94  }
95 
96  llvm_unreachable("Unknown subtarget generation!");
97 }
98 
99 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
101 
102  if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
105 
106  if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
109  // Adjust the encoding family to GFX80 for D16 buffer instructions when the
110  // subtarget has UnpackedD16VMem feature.
111  // TODO: remove this when we discard GFX80 encoding.
112  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
113  && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
115 
116  int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
117 
118  // -1 means that Opcode is already a native instruction.
119  if (MCOp == -1)
120  return Opcode;
121 
122  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
123  // no encoding in the given subtarget generation.
124  if (MCOp == (uint16_t)-1)
125  return -1;
126 
127  return MCOp;
128 }
129 
130 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
132  const Value *Ptr = MMO->getValue();
133  // UndefValue means this is a load of a kernel input. These are uniform.
134  // Sometimes LDS instructions have constant pointers.
135  // If Ptr is null, then that means this mem operand contains a
136  // PseudoSourceValue like GOT.
137  if (!Ptr || isa<UndefValue>(Ptr) ||
138  isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
139  return true;
140 
142  return true;
143 
144  if (const Argument *Arg = dyn_cast<Argument>(Ptr))
146 
147  const Instruction *I = dyn_cast<Instruction>(Ptr);
148  return I && I->getMetadata("amdgpu.uniform");
149 }
Generation getGeneration() const
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
unsigned getAddrSpace() const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST)
AMDGPUAS getAMDGPUAS(const Module &M)
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
AMDGPUInstrInfo(const AMDGPUSubtarget &st)
A description of a memory reference used in the backend.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
Address space for 32-bit constant memory.
Definition: AMDGPU.h:234
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:195
const Value * getValue() const
Return the base address of the memory access.
bool hasUnpackedD16VMem() const
static bool isUniformMMO(const MachineMemOperand *MMO)
SIEncodingFamily
The AMDGPU TargetMachine interface definition for hw codgen targets.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isArgPassedInSGPR(const Argument *A)
Represents one node in the SelectionDAG.
amdgpu Simplify well known AMD library false Value Value * Arg
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Provides AMDGPU specific target descriptions.
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73