LLVM 20.0.0git
NVPTXLowerAggrCopies.cpp
Go to the documentation of this file.
1//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
11// the size is large or is not a compile-time constant.
12//
13//===----------------------------------------------------------------------===//
14
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/DataLayout.h"
20#include "llvm/IR/Function.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/LLVMContext.h"
25#include "llvm/IR/Module.h"
28
29#define DEBUG_TYPE "nvptx"
30
31using namespace llvm;
32
33namespace {
34
35// actual analysis class, which is a functionpass
36struct NVPTXLowerAggrCopies : public FunctionPass {
37 static char ID;
38
39 NVPTXLowerAggrCopies() : FunctionPass(ID) {}
40
41 void getAnalysisUsage(AnalysisUsage &AU) const override {
44 }
45
46 bool runOnFunction(Function &F) override;
47
48 static const unsigned MaxAggrCopySize = 128;
49
50 StringRef getPassName() const override {
51 return "Lower aggregate copies/intrinsics into loops";
52 }
53};
54
55char NVPTXLowerAggrCopies::ID = 0;
56
57bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
60
61 const DataLayout &DL = F.getDataLayout();
62 LLVMContext &Context = F.getParent()->getContext();
64 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
65
66 // Collect all aggregate loads and mem* calls.
67 for (BasicBlock &BB : F) {
68 for (Instruction &I : BB) {
69 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
70 if (!LI->hasOneUse())
71 continue;
72
73 if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
74 continue;
75
76 if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
77 if (SI->getOperand(0) != LI)
78 continue;
79 AggrLoads.push_back(LI);
80 }
81 } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(&I)) {
82 // Convert intrinsic calls with variable size or with constant size
83 // larger than the MaxAggrCopySize threshold.
84 if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
85 if (LenCI->getZExtValue() >= MaxAggrCopySize) {
86 MemCalls.push_back(IntrCall);
87 }
88 } else {
89 MemCalls.push_back(IntrCall);
90 }
91 }
92 }
93 }
94
95 if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
96 return false;
97 }
98
99 //
100 // Do the transformation of an aggr load/copy/set to a loop
101 //
102 for (LoadInst *LI : AggrLoads) {
103 auto *SI = cast<StoreInst>(*LI->user_begin());
104 Value *SrcAddr = LI->getOperand(0);
105 Value *DstAddr = SI->getOperand(1);
106 unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
107 ConstantInt *CopyLen =
108 ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
109
110 createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
111 /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
112 /* CopyLen */ CopyLen,
113 /* SrcAlign */ LI->getAlign(),
114 /* DestAlign */ SI->getAlign(),
115 /* SrcIsVolatile */ LI->isVolatile(),
116 /* DstIsVolatile */ SI->isVolatile(),
117 /* CanOverlap */ true, TTI);
118
119 SI->eraseFromParent();
120 LI->eraseFromParent();
121 }
122
123 // Transform mem* intrinsic calls.
124 for (MemIntrinsic *MemCall : MemCalls) {
125 if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
126 expandMemCpyAsLoop(Memcpy, TTI);
127 } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
128 expandMemMoveAsLoop(Memmove, TTI);
129 } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
130 expandMemSetAsLoop(Memset);
131 }
132 MemCall->eraseFromParent();
133 }
134
135 return true;
136}
137
138} // namespace
139
140namespace llvm {
142}
143
144INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
145 "Lower aggregate copies, and llvm.mem* intrinsics into loops",
146 false, false)
147
149 return new NVPTXLowerAggrCopies();
150}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This pass exposes codegen information to IR-level passes.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
This class wraps the llvm.memcpy intrinsic.
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &)
bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
FunctionPass * createLowerAggrCopies()
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.