LLVM
10.0.0svn
|
Try to reassign registers on GFX10+ to reduce register bank conflicts. More...
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/MathExtras.h"
Go to the source code of this file.
Macros | |
#define | DEBUG_TYPE "amdgpu-regbanks-reassign" |
#define | NUM_VGPR_BANKS 4 |
#define | NUM_SGPR_BANKS 8 |
#define | NUM_BANKS (NUM_VGPR_BANKS + NUM_SGPR_BANKS) |
#define | SGPR_BANK_OFFSET NUM_VGPR_BANKS |
#define | VGPR_BANK_MASK 0xf |
#define | SGPR_BANK_MASK 0xff0 |
#define | SGPR_BANK_SHIFTED_MASK (SGPR_BANK_MASK >> SGPR_BANK_OFFSET) |
Functions | |
STATISTIC (NumStallsDetected, "Number of operand read stalls detected") | |
STATISTIC (NumStallsRecovered, "Number of operand read stalls recovered") | |
INITIALIZE_PASS_BEGIN (GCNRegBankReassign, DEBUG_TYPE, "GCN RegBank Reassign", false, false) INITIALIZE_PASS_END(GCNRegBankReassign | |
Variables | |
static cl::opt< unsigned > | VerifyStallCycles ("amdgpu-verify-regbanks-reassign", cl::desc("Verify stall cycles in the regbanks reassign pass"), cl::value_desc("0|1|2"), cl::init(0), cl::Hidden) |
DEBUG_TYPE | |
GCN RegBank | Reassign |
GCN RegBank | false |
Try to reassign registers on GFX10+ to reduce register bank conflicts.
On GFX10 registers are organized in banks. VGPRs have 4 banks assigned in a round-robin fashion: v0, v4, v8... belong to bank 0. v1, v5, v9... to bank 1, etc. SGPRs have 8 banks and allocated in pairs, so that s0:s1, s16:s17, s32:s33 are at bank 0. s2:s3, s18:s19, s34:s35 are at bank 1 etc.
The shader can read one dword from each of these banks once per cycle. If an instruction has to read more register operands from the same bank an additional cycle is needed. HW attempts to pre-load registers through input operand gathering, but a stall cycle may occur if that fails. For example V_FMA_F32 V111 = V0 + V4 * V8 will need 3 cycles to read operands, potentially incuring 2 stall cycles.
The pass tries to reassign registers to reduce bank conflicts.
In this pass bank numbers 0-3 are VGPR banks and 4-11 are SGPR banks, so that 4 has to be subtracted from an SGPR bank number to get the real value. This also corresponds to bit numbers in bank masks used in the pass.
Definition in file GCNRegBankReassign.cpp.
#define DEBUG_TYPE "amdgpu-regbanks-reassign" |
Definition at line 55 of file GCNRegBankReassign.cpp.
#define NUM_BANKS (NUM_VGPR_BANKS + NUM_SGPR_BANKS) |
Definition at line 59 of file GCNRegBankReassign.cpp.
#define NUM_SGPR_BANKS 8 |
Definition at line 58 of file GCNRegBankReassign.cpp.
#define NUM_VGPR_BANKS 4 |
Definition at line 57 of file GCNRegBankReassign.cpp.
#define SGPR_BANK_MASK 0xff0 |
Definition at line 62 of file GCNRegBankReassign.cpp.
#define SGPR_BANK_OFFSET NUM_VGPR_BANKS |
Definition at line 60 of file GCNRegBankReassign.cpp.
#define SGPR_BANK_SHIFTED_MASK (SGPR_BANK_MASK >> SGPR_BANK_OFFSET) |
Definition at line 63 of file GCNRegBankReassign.cpp.
#define VGPR_BANK_MASK 0xf |
Definition at line 61 of file GCNRegBankReassign.cpp.
INITIALIZE_PASS_BEGIN | ( | GCNRegBankReassign | , |
DEBUG_TYPE | , | ||
"GCN RegBank Reassign" | , | ||
false | , | ||
false | |||
) |
STATISTIC | ( | NumStallsDetected | , |
"Number of operand read stalls detected" | |||
) |
STATISTIC | ( | NumStallsRecovered | , |
"Number of operand read stalls recovered" | |||
) |
DEBUG_TYPE |
Definition at line 269 of file GCNRegBankReassign.cpp.
GCN RegBank false |
Definition at line 269 of file GCNRegBankReassign.cpp.
GCN RegBank Reassign |
Definition at line 269 of file GCNRegBankReassign.cpp.