22 cl::desc(
"Limit the size of the seed bundle to cap compilation time."));
23#define LoadSeedsDef "loads"
24#define StoreSeedsDef "stores"
27 cl::desc(
"Collect these seeds. Use empty for none or a comma-separated "
31 cl::desc(
"Limit the number of collected seeds groups in a BB to "
32 "cap compilation time."));
35 unsigned MaxVecRegBits,
48 assert(!
isUsed(StartIdx) &&
"Expected unused at StartIdx");
53 if (
isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
58 NumElementsPowerOfTwo = NumElements;
59 BitCountPowerOfTwo = BitCount;
63 NumElements = NumElementsPowerOfTwo;
64 BitCount = BitCountPowerOfTwo;
68 "Must be a power of two");
76template <
typename LoadOrStoreT>
77SeedContainer::KeyT SeedContainer::getKey(LoadOrStoreT *LSI)
const {
78 assert((isa<LoadInst>(LSI) || isa<StoreInst>(LSI)) &&
79 "Expected Load or Store!");
83 if (
auto *VTy = dyn_cast<VectorType>(Ty))
84 Ty = VTy->getElementType();
89template SeedContainer::KeyT
90SeedContainer::getKey<LoadInst>(
LoadInst *LSI)
const;
91template SeedContainer::KeyT
92SeedContainer::getKey<StoreInst>(
StoreInst *LSI)
const;
95 assert((isa<LoadInst>(
I) || isa<StoreInst>(
I)) &&
"Expected Load or Store!");
96 auto It = SeedLookupMap.find(
I);
97 if (It == SeedLookupMap.end())
106 auto &BundleVec = Bundles[getKey(LSI)];
113 BundleVec.
back()->insert(LSI, SE);
115 SeedLookupMap[LSI] = BundleVec.back().get();
119template void SeedContainer::insert<LoadInst>(
LoadInst *);
120template void SeedContainer::insert<StoreInst>(
StoreInst *);
124 for (
const auto &Pair : Bundles) {
125 auto [
I, Ty, Opc] = Pair.first;
126 const auto &SeedsVec = Pair.second;
127 std::string RefType = dyn_cast<LoadInst>(
I) ?
"Load"
128 : dyn_cast<StoreInst>(
I) ?
"Store"
130 OS <<
"[Inst=" << *
I <<
" Ty=" << Ty <<
" " << RefType <<
"]\n";
131 for (
const auto &SeedPtr : SeedsVec) {
143 if (!LSI->isSimple())
150 if (isa<ScalableVectorType>(Ty))
152 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty))
161 : StoreSeeds(SE), LoadSeeds(SE), Ctx(BB->getContext()) {
165 if (!CollectStores && !CollectLoads)
169 if (
auto SI = dyn_cast<StoreInst>(
I))
170 StoreSeeds.
erase(SI);
171 else if (
auto LI = dyn_cast<LoadInst>(
I))
176 for (
auto &
I : *BB) {
180 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I))
195 OS <<
"=== StoreSeeds ===\n";
197 OS <<
"=== LoadSeeds ===\n";
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents an Operation in the Expression.
An instruction for reading from memory.
std::pair< KeyT, ValueT > & back()
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
The main scalar evolution driver.
An instruction for storing to memory.
This class implements an extremely fast bulk output stream that can only output to a stream.
Contains a list of sandboxir::Instruction's.
void unregisterEraseInstrCallback(CallbackID ID)
CallbackID registerEraseInstrCallback(EraseInstrCallback CB)
Register a callback that gets called when a SandboxIR instruction is about to be removed from its par...
A sandboxir::User with operands, opcode and linked with previous/next instructions in an instruction ...
Specialization of SeedBundle for memory access instructions.
A set of candidate Instructions for vectorizing together.
bool isUsed(unsigned Element) const
\Returns whether or not Element has been used.
SmallVector< Instruction * > Seeds
void setUsed(Instruction *I)
Marks instruction I "used" within the bundle.
MutableArrayRef< Instruction * > getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2)
\Returns a slice of seed elements, starting at the element StartIdx, with a total size <= MaxVecRegBi...
SeedCollector(BasicBlock *BB, ScalarEvolution &SE)
void print(raw_ostream &OS) const
LLVM_DUMP_METHOD void dump() const
void print(raw_ostream &OS) const
void insert(LoadOrStoreT *LSI)
LLVM_DUMP_METHOD void dump() const
bool erase(Instruction *I)
Just like llvm::Type these are immutable, unique, never get freed and can only be created via static ...
bool isX86_FP80Ty() const
Return true if this is x86 long double.
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
static unsigned getNumBits(Value *V, const DataLayout &DL)
\Returns the number of bits required to represent the operands or return value of V in DL.
static Type * getExpectedType(const Value *V)
\Returns the expected type of Value V.
static Value * getMemInstructionBase(const LoadOrStoreT *LSI)
\Returns the base Value for load or store instruction LSI.
A SandboxIR Value has users. This is the base class.
static bool isValidElementType(Type *ElemTy)
initializer< Ty > init(const Ty &Val)
template bool isValidMemSeed< StoreInst >(StoreInst *LSI)
static bool isValidMemSeed(LoadOrStoreT *LSI)
template bool isValidMemSeed< LoadInst >(LoadInst *LSI)
cl::opt< unsigned > SeedGroupsLimit("sbvec-seed-groups-limit", cl::init(256), cl::Hidden, cl::desc("Limit the number of collected seeds groups in a BB to " "cap compilation time."))
cl::opt< unsigned > SeedBundleSizeLimit("sbvec-seed-bundle-size-limit", cl::init(32), cl::Hidden, cl::desc("Limit the size of the seed bundle to cap compilation time."))
cl::opt< std::string > CollectSeeds("sbvec-collect-seeds", cl::init(LoadSeedsDef "," StoreSeedsDef), cl::Hidden, cl::desc("Collect these seeds. Use empty for none or a comma-separated " "list of '" LoadSeedsDef "' and '" StoreSeedsDef "'."))
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.