Go to the documentation of this file.
16 #include "llvm/IR/IntrinsicsR600.h"
22 #define DEBUG_TYPE "amdgpu-lower-intrinsics"
28 static int MaxStaticSize;
31 "amdgpu-mem-intrinsic-expand-size",
32 cl::desc(
"Set minimum mem intrinsic size to expand in IR"),
38 class AMDGPULowerIntrinsics :
public ModulePass {
40 bool makeLIDRangeMetadata(
Function &
F)
const;
47 bool runOnModule(
Module &
M)
override;
50 return "AMDGPU Lower Intrinsics";
69 static
bool shouldExpandOperationWithSize(
Value *Size) {
74 bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(
Function &
F) {
83 auto *Memcpy = cast<MemCpyInst>(Inst);
84 if (shouldExpandOperationWithSize(Memcpy->getLength())) {
87 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*ParentFunc);
90 Memcpy->eraseFromParent();
95 case Intrinsic::memmove: {
96 auto *Memmove = cast<MemMoveInst>(Inst);
97 if (shouldExpandOperationWithSize(Memmove->getLength())) {
100 Memmove->eraseFromParent();
105 case Intrinsic::memset: {
106 auto *Memset = cast<MemSetInst>(Inst);
107 if (shouldExpandOperationWithSize(Memset->getLength())) {
110 Memset->eraseFromParent();
123 bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(
Function &
F)
const {
124 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
129 bool Changed =
false;
131 for (
auto *U :
F.users()) {
132 auto *CI = dyn_cast<CallInst>(U);
138 Changed |=
ST.makeLIDRangeMetadata(CI);
143 bool AMDGPULowerIntrinsics::runOnModule(
Module &M) {
144 bool Changed =
false;
147 if (!
F.isDeclaration())
150 switch (
F.getIntrinsicID()) {
152 case Intrinsic::memmove:
153 case Intrinsic::memset:
154 if (expandMemIntrinsicUses(
F))
158 case Intrinsic::r600_read_tidig_x:
159 case Intrinsic::r600_read_tidig_y:
160 case Intrinsic::r600_read_tidig_z:
161 case Intrinsic::r600_read_local_size_x:
162 case Intrinsic::r600_read_local_size_y:
163 case Intrinsic::r600_read_local_size_z:
164 Changed |= makeLIDRangeMetadata(
F);
176 return new AMDGPULowerIntrinsics();
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
LocationClass< Ty > location(Ty &L)
This is the shared class of boolean and integer constants.
INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false, false) static bool shouldExpandOperationWithSize(Value *Size)
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Represent the analysis usage information of a pass.
ModulePass * createAMDGPULowerIntrinsicsPass()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Module * getParent()
Get the module that this global value is contained inside of...
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Primary interface to the complete machine description for the target machine.
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
A Module instance is used to store all the information related to an LLVM module.
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
StringRef - Represent a constant reference to a string, i.e.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
const char LLVMTargetMachineRef TM
AnalysisUsage & addRequired()
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
LLVM Value Representation.
char & AMDGPULowerIntrinsicsID