LLVM 22.0.0git
|
This pass lowers the pseudo control flow instructions to real machine instructions. More...
#include "SILowerControlFlow.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/Target/TargetMachine.h"
Go to the source code of this file.
Macros | |
#define | DEBUG_TYPE "si-lower-control-flow" |
Functions | |
INITIALIZE_PASS (SILowerControlFlowLegacy, DEBUG_TYPE, "SI lower control flow", false, false) static void setImpSCCDefDead(MachineInstr &MI | |
assert (ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef()) | |
ImpDefSCC | setIsDead (IsDead) |
static bool | isSimpleIf (const MachineInstr &MI, const MachineRegisterInfo *MRI) |
Variables | |
static cl::opt< bool > | RemoveRedundantEndcf ("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden) |
bool | IsDead |
This pass lowers the pseudo control flow instructions to real machine instructions.
All control flow is handled using predicated instructions and a predicate stack. Each Scalar ALU controls the operations of 64 Vector ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs by writing to the 64-bit EXEC register (each bit corresponds to a single vector ALU). Typically, for predicates, a vector ALU will write to its bit of the VCC register (like EXEC VCC is 64-bits, one for each Vector ALU) and then the ScalarALU will AND the VCC register with the EXEC to update the predicates.
For example: vcc = V_CMP_GT_F32 vgpr1, vgpr2 sgpr0 = SI_IF vcc vgpr0 = V_ADD_F32 vgpr0, vgpr0 sgpr0 = SI_ELSE sgpr0 vgpr0 = V_SUB_F32 vgpr0, vgpr0 SI_END_CF sgpr0
becomes:
sgpr0 = S_AND_SAVEEXEC_B64 vcc // Save and update the exec mask sgpr0 = S_XOR_B64 sgpr0, exec // Clear live bits from saved exec mask S_CBRANCH_EXECZ label0 // This instruction is an optional // optimization which allows us to // branch if all the bits of // EXEC are zero. vgpr0 = V_ADD_F32 vgpr0, vgpr0 // Do the IF block of the branch
label0: sgpr0 = S_OR_SAVEEXEC_B64 sgpr0 // Restore the exec mask for the Then // block exec = S_XOR_B64 sgpr0, exec // Update the exec mask S_CBRANCH_EXECZ label1 // Use our branch optimization // instruction again. vgpr0 = V_SUB_F32 vgpr0, vgpr // Do the ELSE block label1: exec = S_OR_B64 exec, sgpr0 // Re-enable saved exec mask bits
Definition in file SILowerControlFlow.cpp.
#define DEBUG_TYPE "si-lower-control-flow" |
Definition at line 65 of file SILowerControlFlow.cpp.
assert | ( | ImpDefSCC. | getReg() = =AMDGPU::SCC &&ImpDefSCC.isDef() | ) |
INITIALIZE_PASS | ( | SILowerControlFlowLegacy | , |
DEBUG_TYPE | , | ||
"SI lower control flow" | , | ||
false | , | ||
false | |||
) | & |
|
static |
Definition at line 207 of file SILowerControlFlow.cpp.
ImpDefSCC setIsDead | ( | IsDead | ) |
bool IsDead |
Definition at line 179 of file SILowerControlFlow.cpp.
Referenced by llvm::rdf::DeadCodeElimination::collect(), llvm::ReachingDefAnalysis::collectKilledOperands(), llvm::rdf::Liveness::computePhiInfo(), llvm::rdf::Liveness::getAllReachedUses(), rematerializeCheapDef(), llvm::objcopy::macho::Object::removeSections(), and swapRegAndNonRegOperand().