36 #define DEBUG_TYPE "vplan-slp"
44 CompletelySLP =
false;
50 return cast<VPInstruction>(V)->getUnderlyingInstr();
52 unsigned BundleSize = 0;
54 Type *
T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
55 assert(!
T->isVectorTy() &&
"Only scalar types supported for now");
56 BundleSize +=
T->getScalarSizeInBits();
58 WidestBundleBits =
std::max(WidestBundleBits, BundleSize);
61 auto Res = BundleToCombined.try_emplace(to_vector<4>(
Operands), New);
63 "Already created a combined instruction for the operand bundle");
70 return Op && isa<VPInstruction>(
Op) &&
71 cast<VPInstruction>(
Op)->getUnderlyingInstr();
73 LLVM_DEBUG(
dbgs() <<
"VPSLP: not all operands are VPInstructions\n");
82 cast<VPInstruction>(
Operands[0])->getUnderlyingInstr();
83 unsigned Opcode = OriginalInstr->
getOpcode();
86 const Instruction *
I = cast<VPInstruction>(
Op)->getUnderlyingInstr();
87 return I->getOpcode() == Opcode &&
88 I->getType()->getPrimitiveSizeInBits() ==
Width;
96 return cast<VPInstruction>(
Op)->getParent() != &this->BB;
103 [](
VPValue *
Op) {
return Op->hasMoreThanOneUniqueUser(); })) {
104 LLVM_DEBUG(
dbgs() <<
"VPSLP: Some operands have multiple users.\n");
113 unsigned LoadsSeen = 0;
115 for (
auto &
I : *Parent) {
116 auto *VPI = dyn_cast<VPInstruction>(&
I);
125 if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
127 dbgs() <<
"VPSLP: instruction modifying memory between loads\n");
133 return cast<LoadInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
136 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple loads are supported.\n");
143 return cast<StoreInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
146 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple stores are supported.\n");
154 unsigned OperandIndex) {
158 auto *U = cast<VPInstruction>(V);
159 Operands.push_back(U->getOperand(OperandIndex));
166 cast<VPInstruction>(Values[0])->
getOpcode());
172 auto *VPI = cast<VPInstruction>(Values[0]);
174 switch (VPI->getOpcode()) {
181 for (
unsigned I = 0, NumOps = VPI->getNumOperands();
I < NumOps; ++
I)
191 unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
193 return cast<VPInstruction>(V)->getOpcode() != Opcode;
203 if (A->getOpcode() !=
B->getOpcode())
212 return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(
B);
219 auto *
I1 = dyn_cast<VPInstruction>(V1);
220 auto *I2 = dyn_cast<VPInstruction>(
V2);
229 for (
unsigned I = 0, EV1 =
I1->getNumOperands();
I < EV1; ++
I)
230 for (
unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
232 getLAScore(
I1->getOperand(
I), I2->getOperand(J), MaxLevel - 1, IAI);
236 std::pair<VPlanSlp::OpMode, VPValue *>
241 "Currently we only handle load and commutative opcodes");
246 << *cast<VPInstruction>(Last)->getUnderlyingInstr() <<
" ");
247 for (
auto *Candidate : Candidates) {
248 auto *LastI = cast<VPInstruction>(Last);
249 auto *CandidateI = cast<VPInstruction>(Candidate);
251 LLVM_DEBUG(
dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
253 BestCandidates.push_back(Candidate);
258 if (BestCandidates.empty())
259 return {OpMode::Failed,
nullptr};
261 if (BestCandidates.size() == 1)
262 return {
Mode, BestCandidates[0]};
265 unsigned BestScore = 0;
267 unsigned PrevScore = ~0u;
271 for (
auto *Candidate : BestCandidates) {
273 if (PrevScore == ~0u)
275 if (PrevScore != Score)
279 if (Score > BestScore) {
288 << *cast<VPInstruction>(Best)->getUnderlyingInstr()
290 Candidates.erase(Best);
298 FinalOrder.
reserve(MultiNodeOps.size());
299 Mode.reserve(MultiNodeOps.size());
303 for (
auto &
Operands : MultiNodeOps) {
305 if (cast<VPInstruction>(
Operands.second[0])->getOpcode() ==
307 Mode.push_back(OpMode::Load);
309 Mode.push_back(OpMode::Opcode);
312 for (
unsigned Lane = 1,
E = MultiNodeOps[0].second.size(); Lane <
E; ++Lane) {
313 LLVM_DEBUG(
dbgs() <<
" Finding best value for lane " << Lane <<
"\n");
316 for (
auto Ops : MultiNodeOps) {
318 dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
320 Candidates.
insert(Ops.second[Lane]);
324 for (
unsigned Op = 0,
E = MultiNodeOps.size();
Op <
E; ++
Op) {
326 if (
Mode[
Op] == OpMode::Failed)
330 std::pair<OpMode, VPValue *> Res =
331 getBest(
Mode[
Op], Last, Candidates, IAI);
333 FinalOrder[
Op].second.push_back(Res.second);
336 FinalOrder[
Op].second.push_back(markFailed());
343 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
346 for (
auto Op : Values) {
347 if (
auto *VPInstr = cast_or_null<VPInstruction>(
Op))
348 if (
auto *Instr = VPInstr->getUnderlyingInstr()) {
349 dbgs() << *Instr <<
" | ";
352 dbgs() <<
" nullptr | ";
362 auto I = BundleToCombined.find(to_vector<4>(Values));
363 if (
I != BundleToCombined.end()) {
368 for (
auto *V : Values) {
370 auto *FirstUser = *UI++;
372 assert(*UI == FirstUser &&
"Currently we only support SLP trees.");
382 dbgs() <<
"buildGraph: ";
386 if (!areVectorizable(Values))
390 unsigned ValuesOpcode = *
getOpcode(Values);
394 bool MultiNodeRoot = !MultiNodeActive;
395 MultiNodeActive =
true;
398 dbgs() <<
" Visiting Commutative";
403 if (OperandsOpcode && OperandsOpcode ==
getOpcode(Values)) {
411 CombinedOperands.push_back(
Op);
418 MultiNodeActive =
false;
420 auto FinalOrder = reorderMultiNodeOps();
422 MultiNodeOps.clear();
423 for (
auto &Ops : FinalOrder) {
425 Ops.first->replaceAllUsesWith(NewOp);
426 for (
unsigned i = 0;
i < CombinedOperands.size();
i++)
427 if (CombinedOperands[
i] == Ops.first)
428 CombinedOperands[
i] = NewOp;
438 CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
445 switch (ValuesOpcode) {
453 Opcode = ValuesOpcode;
460 assert(CombinedOperands.size() > 0 &&
"Need more some operands");
461 auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
462 auto *VPI =
new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
463 VPI->setUnderlyingInstr(Inst);
466 << *cast<VPInstruction>(Values[0]) <<
"\n");
467 addCombined(Values, VPI);