22static cl::opt<unsigned>
24 cl::desc(
"Override the vector register size in bits, "
25 "which is otherwise found by querying TTI."));
28 cl::desc(
"Allow non-power-of-2 vectorization."));
33 cl::desc(
"Helps find bugs by verifying the IR whenever we "
34 "emit new instructions (*very* expensive)."));
46 for (
Value *BndlV : Bndl) {
47 auto *BndlI = cast<Instruction>(BndlV);
48 Operands.push_back(BndlI->getOperand(OpIdx));
64 return std::next(BotI->getIterator());
71 assert(
all_of(Bndl, [](
auto *V) {
return isa<Instruction>(V); }) &&
72 "Expect Instructions!");
73 auto &Ctx = Bndl[0]->getContext();
79 Bndl, cast<Instruction>(Bndl[0])->
getParent());
81 auto Opcode = cast<Instruction>(Bndl[0])->getOpcode();
83 case Instruction::Opcode::ZExt:
84 case Instruction::Opcode::SExt:
85 case Instruction::Opcode::FPToUI:
86 case Instruction::Opcode::FPToSI:
87 case Instruction::Opcode::FPExt:
88 case Instruction::Opcode::PtrToInt:
89 case Instruction::Opcode::IntToPtr:
90 case Instruction::Opcode::SIToFP:
91 case Instruction::Opcode::UIToFP:
92 case Instruction::Opcode::Trunc:
93 case Instruction::Opcode::FPTrunc:
94 case Instruction::Opcode::BitCast: {
99 case Instruction::Opcode::FCmp:
100 case Instruction::Opcode::ICmp: {
101 auto Pred = cast<CmpInst>(Bndl[0])->getPredicate();
104 return cast<CmpInst>(SBV)->getPredicate() == Pred;
106 "Expected same predicate across bundle.");
110 case Instruction::Opcode::Select: {
114 case Instruction::Opcode::FNeg: {
115 auto *UOp0 = cast<UnaryOperator>(Bndl[0]);
116 auto OpC = UOp0->getOpcode();
118 WhereIt, Ctx,
"Vec");
120 case Instruction::Opcode::Add:
121 case Instruction::Opcode::FAdd:
122 case Instruction::Opcode::Sub:
123 case Instruction::Opcode::FSub:
124 case Instruction::Opcode::Mul:
125 case Instruction::Opcode::FMul:
126 case Instruction::Opcode::UDiv:
127 case Instruction::Opcode::SDiv:
128 case Instruction::Opcode::FDiv:
129 case Instruction::Opcode::URem:
130 case Instruction::Opcode::SRem:
131 case Instruction::Opcode::FRem:
132 case Instruction::Opcode::Shl:
133 case Instruction::Opcode::LShr:
134 case Instruction::Opcode::AShr:
135 case Instruction::Opcode::And:
136 case Instruction::Opcode::Or:
137 case Instruction::Opcode::Xor: {
138 auto *BinOp0 = cast<BinaryOperator>(Bndl[0]);
142 BinOp0->getOpcode(),
LHS,
RHS, BinOp0, WhereIt, Ctx,
"Vec");
144 case Instruction::Opcode::Load: {
145 auto *Ld0 = cast<LoadInst>(Bndl[0]);
146 Value *
Ptr = Ld0->getPointerOperand();
150 case Instruction::Opcode::Store: {
151 auto Align = cast<StoreInst>(Bndl[0])->getAlign();
156 case Instruction::Opcode::Br:
157 case Instruction::Opcode::Ret:
158 case Instruction::Opcode::PHI:
159 case Instruction::Opcode::AddrSpaceCast:
160 case Instruction::Opcode::Call:
161 case Instruction::Opcode::GetElementPtr:
172 auto *VecI = CreateVectorInstr(Bndl,
Operands);
173 if (VecI !=
nullptr) {
175 IMaps->registerVector(Bndl, VecI);
180void BottomUpVec::tryEraseDeadInstrs() {
181 DenseMap<BasicBlock *, SmallVector<Instruction *>> SortedDeadInstrCandidates;
183 for (
auto *DeadI : DeadInstrCandidates)
184 SortedDeadInstrCandidates[DeadI->getParent()].push_back(DeadI);
185 for (
auto &Pair : SortedDeadInstrCandidates)
187 [](Instruction *I1, Instruction *I2) { return I1->comesBefore(I2); });
188 for (
const auto &Pair : SortedDeadInstrCandidates) {
189 for (Instruction *
I :
reverse(Pair.second)) {
192 I->eraseFromParent();
195 DeadInstrCandidates.clear();
198Value *BottomUpVec::createShuffle(Value *VecOp,
const ShuffleMask &Mask,
199 BasicBlock *UserBB) {
202 VecOp->getContext(),
"VShuf");
205Value *BottomUpVec::createPack(ArrayRef<Value *> ToPack, BasicBlock *UserBB) {
215 Context &Ctx = ToPack[0]->getContext();
217 unsigned InsertIdx = 0;
218 for (Value *Elm : ToPack) {
221 if (Elm->getType()->isVectorTy()) {
223 cast<FixedVectorType>(Elm->getType())->getNumElements();
224 for (
auto ExtrLane : seq<int>(0, NumElms)) {
231 if (!isa<Constant>(ExtrI))
232 WhereIt = std::next(cast<Instruction>(ExtrI)->getIterator());
237 LastInsert, ExtrI, InsertLaneC, WhereIt, Ctx,
"VPack");
238 if (!isa<Constant>(InsertI)) {
239 LastInsert = InsertI;
240 WhereIt = std::next(cast<Instruction>(LastInsert)->getIterator());
249 WhereIt, Ctx,
"Pack");
250 if (
auto *NewI = dyn_cast<Instruction>(LastInsert))
251 WhereIt = std::next(NewI->getIterator());
257void BottomUpVec::collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl) {
258 for (Value *V : Bndl)
259 DeadInstrCandidates.insert(cast<Instruction>(V));
261 auto Opcode = cast<Instruction>(Bndl[0])->getOpcode();
263 case Instruction::Opcode::Load: {
267 DeadInstrCandidates.insert(
Ptr);
270 case Instruction::Opcode::Store: {
274 DeadInstrCandidates.insert(
Ptr);
282Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl,
283 ArrayRef<Value *> UserBndl,
unsigned Depth) {
284 Value *NewVec =
nullptr;
285 auto *UserBB = !UserBndl.empty()
286 ? cast<Instruction>(UserBndl.front())->getParent()
287 : cast<Instruction>(Bndl[0])->getParent();
288 const auto &LegalityRes = Legality->canVectorize(Bndl);
289 switch (LegalityRes.getSubclassID()) {
291 auto *
I = cast<Instruction>(Bndl[0]);
292 SmallVector<Value *, 2> VecOperands;
293 switch (
I->getOpcode()) {
294 case Instruction::Opcode::Load:
298 case Instruction::Opcode::Store: {
301 VecOperands.push_back(VecOp);
307 for (
auto OpIdx : seq<unsigned>(
I->getNumOperands())) {
309 VecOperands.push_back(VecOp);
313 NewVec = createVectorInstr(Bndl, VecOperands);
317 if (NewVec !=
nullptr)
318 collectPotentiallyDeadInstrs(Bndl);
322 NewVec = cast<DiamondReuse>(LegalityRes).getVector();
326 auto *VecOp = cast<DiamondReuseWithShuffle>(LegalityRes).getVector();
327 const ShuffleMask &
Mask =
328 cast<DiamondReuseWithShuffle>(LegalityRes).getMask();
329 NewVec = createShuffle(VecOp, Mask, UserBB);
334 cast<DiamondReuseMultiInput>(LegalityRes).getCollectDescr();
338 SmallVector<Value *, 4> DescrInstrs;
339 for (
const auto &ElmDescr : Descr.getDescrs()) {
340 if (
auto *
I = dyn_cast<Instruction>(ElmDescr.getValue()))
341 DescrInstrs.push_back(
I);
347 for (
auto [Lane, ElmDescr] :
enumerate(Descr.getDescrs())) {
348 Value *VecOp = ElmDescr.getValue();
349 Context &Ctx = VecOp->getContext();
350 Value *ValueToInsert;
351 if (ElmDescr.needsExtract()) {
355 VecOp->getContext(),
"VExt");
357 ValueToInsert = VecOp;
361 WhereIt, Ctx,
"VIns");
371 NewVec = createPack(Bndl, UserBB);
379 Instruction *I0 = isa<Instruction>(Bndl[0])
380 ? cast<Instruction>(Bndl[0])
381 :
cast<Instruction>(UserBndl[0]);
389bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
390 DeadInstrCandidates.clear();
392 vectorizeRec(Bndl, {}, 0);
393 tryEraseDeadInstrs();
398 IMaps = std::make_unique<InstrMaps>(
F.getContext());
399 Legality = std::make_unique<LegalityAnalysis>(
400 A.getAA(),
A.getScalarEvolution(),
F.getParent()->getDataLayout(),
401 F.getContext(), *IMaps);
403 const auto &
DL =
F.getParent()->getDataLayout();
404 unsigned VecRegBits =
414 for (
SeedBundle &Seeds : SC.getStoreSeeds()) {
417 Seeds[Seeds.getFirstUnusedElementIdx()])),
420 auto DivideBy2 = [](
unsigned Num) {
428 for (
unsigned SliceElms = std::min(VecRegBits / ElmBits,
429 Seeds.getNumUnusedBits() / ElmBits);
430 SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) {
435 for (
unsigned Offset = Seeds.getFirstUnusedElementIdx(),
446 if (SeedSlice.empty())
449 assert(SeedSlice.size() >= 2 &&
"Should have been rejected!");
457 Change |= tryVectorize(SeedSliceVals);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
LLVM Value Representation.
Iterator for Instructions in a `BasicBlock.
Contains a list of sandboxir::Instruction's.
static Value * createWithCopiedFlags(Instruction::Opcode Op, Value *LHS, Value *RHS, Value *CopyFrom, InsertPosition Pos, Context &Ctx, const Twine &Name="")
BottomUpVec(StringRef Pipeline)
bool runOnFunction(Function &F, const Analyses &A) final
\Returns true if it modifies F.
static Value * create(Type *DestTy, Opcode Op, Value *Operand, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static Value * create(Predicate Pred, Value *S1, Value *S2, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static ConstantInt * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static Value * create(Value *Vec, Value *Idx, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
A pass that runs on a sandbox::Function.
static Value * create(Value *Vec, Value *NewElt, Value *Idx, InsertPosition Pos, Context &Ctx, const Twine &Name="")
BBIterator getIterator() const
\Returns a BasicBlock::iterator for this Instruction.
static LoadInst * create(Type *Ty, Value *Ptr, MaybeAlign Align, InsertPosition Pos, bool IsVolatile, Context &Ctx, const Twine &Name="")
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of candidate Instructions for vectorizing together.
static Value * create(Value *Cond, Value *True, Value *False, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static Value * create(Value *V1, Value *V2, Value *Mask, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static StoreInst * create(Value *V, Value *Ptr, MaybeAlign Align, InsertPosition Pos, bool IsVolatile, Context &Ctx)
static Type * getInt32Ty(Context &Ctx)
static Value * createWithCopiedFlags(Instruction::Opcode Op, Value *OpV, Value *CopyFrom, InsertPosition Pos, Context &Ctx, const Twine &Name="")
static unsigned getNumBits(Type *Ty, const DataLayout &DL)
\Returns the number of bits of Ty.
static Type * getExpectedType(const Value *V)
\Returns the expected type of Value V.
static bool verifyFunction(const Function *F, raw_ostream &OS)
Equivalent to llvm::verifyFunction().
A SandboxIR Value has users. This is the base class.
static Instruction * getLowest(ArrayRef< Instruction * > Instrs)
\Returns the instruction in Instrs that is lowest in the BB.
static Type * getCommonScalarType(ArrayRef< Value * > Bndl)
Similar to tryGetCommonScalarType() but will assert that there is a common type.
static Instruction * getLastPHIOrSelf(Instruction *I)
If I is not a PHI it returns it.
static unsigned getNumLanes(Type *Ty)
\Returns the number of vector lanes of Ty or 1 if not a vector.
static Type * getWideType(Type *ElemTy, unsigned NumElts)
\Returns <NumElts x ElemTy>.
static Type * getElementType(Type *Ty)
Returns Ty if scalar or its element type if vector.
static unsigned getFloorPowerOf2(unsigned Num)
\Returns the first integer power of 2 that is <= Num.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
initializer< Ty > init(const Ty &Val)
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
@ DiamondReuse
Vectorize by combining scalars to a vector.
@ DiamondReuseWithShuffle
Don't generate new code, reuse existing vector.
@ Widen
Collect scalar values.
@ DiamondReuseMultiInput
Reuse the existing vector but add a shuffle.
static BasicBlock::iterator getInsertPointAfterInstrs(ArrayRef< Value * > Vals, BasicBlock *BB)
\Returns the BB iterator after the lowest instruction in Vals, or the top of BB if no instruction fou...
static SmallVector< Value *, 4 > getOperand(ArrayRef< Value * > Bndl, unsigned OpIdx)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static cl::opt< unsigned > OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden, cl::desc("Override the vector register size in bits, " "which is otherwise found by querying TTI."))
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
static cl::opt< bool > AlwaysVerify("sbvec-always-verify", cl::init(false), cl::Hidden, cl::desc("Helps find bugs by verifying the IR whenever we " "emit new instructions (*very* expensive)."))
static cl::opt< bool > AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden, cl::desc("Allow non-power-of-2 vectorization."))