22 cl::desc(
"Limit the size of the seed bundle to cap compilation time."));
23#define LoadSeedsDef "loads"
24#define StoreSeedsDef "stores"
27 cl::desc(
"Collect these seeds. Use empty for none or a comma-separated "
31 cl::desc(
"Limit the number of collected seeds groups in a BB to "
32 "cap compilation time."));
35 unsigned MaxVecRegBits,
48 assert(!
isUsed(StartIdx) &&
"Expected unused at StartIdx");
52 if (
isUsed(StartIdx + NumElements))
56 if (BitCount + InstBits > MaxVecRegBits)
61 NumElementsPowerOfTwo = NumElements;
62 BitCountPowerOfTwo = BitCount;
66 NumElements = NumElementsPowerOfTwo;
67 BitCount = BitCountPowerOfTwo;
71 if (NumElements > 1) {
73 "Must be a power of two");
79template <
typename LoadOrStoreT>
80SeedContainer::KeyT SeedContainer::getKey(LoadOrStoreT *LSI)
const {
81 assert((isa<LoadInst>(LSI) || isa<StoreInst>(LSI)) &&
82 "Expected Load or Store!");
86 if (
auto *VTy = dyn_cast<VectorType>(Ty))
87 Ty = VTy->getElementType();
92template SeedContainer::KeyT
93SeedContainer::getKey<LoadInst>(
LoadInst *LSI)
const;
94template SeedContainer::KeyT
95SeedContainer::getKey<StoreInst>(
StoreInst *LSI)
const;
98 assert((isa<LoadInst>(
I) || isa<StoreInst>(
I)) &&
"Expected Load or Store!");
99 auto It = SeedLookupMap.find(
I);
100 if (It == SeedLookupMap.end())
109 auto &BundleVec = Bundles[getKey(LSI)];
116 BundleVec.
back()->insert(LSI, SE);
118 SeedLookupMap[LSI] = BundleVec.back().get();
122template void SeedContainer::insert<LoadInst>(
LoadInst *);
123template void SeedContainer::insert<StoreInst>(
StoreInst *);
127 for (
const auto &Pair : Bundles) {
128 auto [
I, Ty, Opc] = Pair.first;
129 const auto &SeedsVec = Pair.second;
130 std::string RefType = dyn_cast<LoadInst>(
I) ?
"Load"
131 : dyn_cast<StoreInst>(
I) ?
"Store"
133 OS <<
"[Inst=" << *
I <<
" Ty=" << Ty <<
" " << RefType <<
"]\n";
134 for (
const auto &SeedPtr : SeedsVec) {
146 if (!LSI->isSimple())
153 if (isa<ScalableVectorType>(Ty))
155 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty))
164 : StoreSeeds(SE), LoadSeeds(SE), Ctx(BB->getContext()) {
168 if (!CollectStores && !CollectLoads)
172 if (
auto SI = dyn_cast<StoreInst>(
I))
173 StoreSeeds.
erase(SI);
174 else if (
auto LI = dyn_cast<LoadInst>(
I))
179 for (
auto &
I : *BB) {
183 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I))
198 OS <<
"=== StoreSeeds ===\n";
200 OS <<
"=== LoadSeeds ===\n";
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
An instruction for reading from memory.
std::pair< KeyT, ValueT > & back()
The main scalar evolution driver.
An instruction for storing to memory.
This class implements an extremely fast bulk output stream that can only output to a stream.
Contains a list of sandboxir::Instruction's.
void unregisterEraseInstrCallback(CallbackID ID)
CallbackID registerEraseInstrCallback(EraseInstrCallback CB)
Register a callback that gets called when a SandboxIR instruction is about to be removed from its par...
A sandboxir::User with operands, opcode and linked with previous/next instructions in an instruction ...
Specialization of SeedBundle for memory access instructions.
A set of candidate Instructions for vectorizing together.
bool isUsed(unsigned Element) const
\Returns whether or not Element has been used.
SmallVector< Instruction * > Seeds
ArrayRef< Instruction * > getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2)
\Returns a slice of seed elements, starting at the element StartIdx, with a total size <= MaxVecRegBi...
void setUsed(Instruction *I)
Marks instruction I "used" within the bundle.
SeedCollector(BasicBlock *BB, ScalarEvolution &SE)
void print(raw_ostream &OS) const
LLVM_DUMP_METHOD void dump() const
void print(raw_ostream &OS) const
void insert(LoadOrStoreT *LSI)
LLVM_DUMP_METHOD void dump() const
bool erase(Instruction *I)
Just like llvm::Type these are immutable, unique, never get freed and can only be created via static ...
bool isX86_FP80Ty() const
Return true if this is x86 long double.
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
static unsigned getNumBits(Type *Ty, const DataLayout &DL)
\Returns the number of bits of Ty.
static Type * getExpectedType(const Value *V)
\Returns the expected type of Value V.
static Value * getMemInstructionBase(const LoadOrStoreT *LSI)
\Returns the base Value for load or store instruction LSI.
A SandboxIR Value has users. This is the base class.
static bool isValidElementType(Type *ElemTy)
initializer< Ty > init(const Ty &Val)
template bool isValidMemSeed< StoreInst >(StoreInst *LSI)
static bool isValidMemSeed(LoadOrStoreT *LSI)
template bool isValidMemSeed< LoadInst >(LoadInst *LSI)
cl::opt< unsigned > SeedGroupsLimit("sbvec-seed-groups-limit", cl::init(256), cl::Hidden, cl::desc("Limit the number of collected seeds groups in a BB to " "cap compilation time."))
cl::opt< unsigned > SeedBundleSizeLimit("sbvec-seed-bundle-size-limit", cl::init(32), cl::Hidden, cl::desc("Limit the size of the seed bundle to cap compilation time."))
cl::opt< std::string > CollectSeeds("sbvec-collect-seeds", cl::init(LoadSeedsDef "," StoreSeedsDef), cl::Hidden, cl::desc("Collect these seeds. Use empty for none or a comma-separated " "list of '" LoadSeedsDef "' and '" StoreSeedsDef "'."))
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.