LLVM 23.0.0git
IVDescriptors.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
20#include "llvm/IR/Dominators.h"
23#include "llvm/IR/ValueHandle.h"
24#include "llvm/Support/Debug.h"
26
27using namespace llvm;
28using namespace llvm::PatternMatch;
29using namespace llvm::SCEVPatternMatch;
30
31#define DEBUG_TYPE "iv-descriptors"
32
35 for (const Use &Use : I->operands())
36 if (!Set.count(dyn_cast<Instruction>(Use)))
37 return false;
38 return true;
39}
40
42 switch (Kind) {
43 default:
44 break;
46 case RecurKind::Sub:
47 case RecurKind::Add:
48 case RecurKind::Mul:
49 case RecurKind::Or:
50 case RecurKind::And:
51 case RecurKind::Xor:
52 case RecurKind::SMax:
53 case RecurKind::SMin:
54 case RecurKind::UMax:
55 case RecurKind::UMin:
58 // TODO: Make type-agnostic.
60 return true;
61 }
62 return false;
63}
64
68
69/// Determines if Phi may have been type-promoted. If Phi has a single user
70/// that ANDs the Phi with a type mask, return the user. RT is updated to
71/// account for the narrower bit width represented by the mask, and the AND
72/// instruction is added to CI.
76 if (!Phi->hasOneUse())
77 return Phi;
78
79 const APInt *M = nullptr;
80 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
81
82 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
83 // with a new integer type of the corresponding bit width.
84 if (match(J, m_And(m_Instruction(I), m_APInt(M)))) {
85 int32_t Bits = (*M + 1).exactLogBase2();
86 if (Bits > 0) {
87 RT = IntegerType::get(Phi->getContext(), Bits);
88 Visited.insert(Phi);
89 CI.insert(J);
90 return J;
91 }
92 }
93 return Phi;
94}
95
96/// Compute the minimal bit width needed to represent a reduction whose exit
97/// instruction is given by Exit.
98static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
99 DemandedBits *DB,
100 AssumptionCache *AC,
101 DominatorTree *DT) {
102 bool IsSigned = false;
103 const DataLayout &DL = Exit->getDataLayout();
104 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
105
106 if (DB) {
107 // Use the demanded bits analysis to determine the bits that are live out
108 // of the exit instruction, rounding up to the nearest power of two. If the
109 // use of demanded bits results in a smaller bit width, we know the value
110 // must be positive (i.e., IsSigned = false), because if this were not the
111 // case, the sign bit would have been demanded.
112 auto Mask = DB->getDemandedBits(Exit);
113 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
114 }
115
116 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
117 // If demanded bits wasn't able to limit the bit width, we can try to use
118 // value tracking instead. This can be the case, for example, if the value
119 // may be negative.
120 auto NumSignBits = ComputeNumSignBits(Exit, DL, AC, nullptr, DT);
121 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
122 MaxBitWidth = NumTypeBits - NumSignBits;
123 KnownBits Bits = computeKnownBits(Exit, DL);
124 if (!Bits.isNonNegative()) {
125 // If the value is not known to be non-negative, we set IsSigned to true,
126 // meaning that we will use sext instructions instead of zext
127 // instructions to restore the original type.
128 IsSigned = true;
129 // Make sure at least one sign bit is included in the result, so it
130 // will get properly sign-extended.
131 ++MaxBitWidth;
132 }
133 }
134 MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
135
136 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
137 IsSigned);
138}
139
140/// Collect cast instructions that can be ignored in the vectorizer's cost
141/// model, given a reduction exit value and the minimal type in which the
142// reduction can be represented. Also search casts to the recurrence type
143// to find the minimum width used by the recurrence.
144static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
145 Type *RecurrenceType,
147 unsigned &MinWidthCastToRecurTy) {
148
151 Worklist.push_back(Exit);
152 MinWidthCastToRecurTy = -1U;
153
154 while (!Worklist.empty()) {
155 Instruction *Val = Worklist.pop_back_val();
156 Visited.insert(Val);
157 if (auto *Cast = dyn_cast<CastInst>(Val)) {
158 if (Cast->getSrcTy() == RecurrenceType) {
159 // If the source type of a cast instruction is equal to the recurrence
160 // type, it will be eliminated, and should be ignored in the vectorizer
161 // cost model.
162 Casts.insert(Cast);
163 continue;
164 }
165 if (Cast->getDestTy() == RecurrenceType) {
166 // The minimum width used by the recurrence is found by checking for
167 // casts on its operands. The minimum width is used by the vectorizer
168 // when finding the widest type for in-loop reductions without any
169 // loads/stores.
170 MinWidthCastToRecurTy = std::min<unsigned>(
171 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
172 continue;
173 }
174 }
175 // Add all operands to the work list if they are loop-varying values that
176 // we haven't yet visited.
177 for (Value *O : cast<User>(Val)->operands())
178 if (auto *I = dyn_cast<Instruction>(O))
179 if (TheLoop->contains(I) && !Visited.count(I))
180 Worklist.push_back(I);
181 }
182}
183
184// Check if a given Phi node can be recognized as an ordered reduction for
185// vectorizing floating point operations without unsafe math.
186static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
187 Instruction *Exit, PHINode *Phi) {
188 // Currently only FAdd and FMulAdd are supported.
189 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
190 return false;
191
192 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
193 return false;
194
195 if (Kind == RecurKind::FMulAdd &&
197 return false;
198
199 // Ensure the exit instruction has only one user other than the reduction PHI
200 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
201 return false;
202
203 // The only pattern accepted is the one in which the reduction PHI
204 // is used as one of the operands of the exit instruction
205 auto *Op0 = Exit->getOperand(0);
206 auto *Op1 = Exit->getOperand(1);
207 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
208 return false;
209 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
210 return false;
211
212 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
213 << ", ExitInst: " << *Exit << "\n");
214
215 return true;
216}
217
218// Collect FMF from a value and its associated fcmp in select patterns
220 FastMathFlags FMF = cast<FPMathOperator>(V)->getFastMathFlags();
221 if (auto *Sel = dyn_cast<SelectInst>(V)) {
222 // Accept FMF from either fcmp or select in a min/max idiom.
223 // TODO: Remove this when FMF propagation is fixed or we standardize on
224 // intrinsics.
225 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
226 FMF |= FCmp->getFastMathFlags();
227 }
228 return FMF;
229}
230
231static std::optional<FastMathFlags>
233 FastMathFlags FuncFMF) {
234 bool HasRequiredFMF = (FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
235 (FPOp && FPOp->hasNoNaNs() && FPOp->hasNoSignedZeros());
236 if (HasRequiredFMF)
237 return collectMinMaxFMF(FPOp);
238
239 switch (RK) {
244 break;
245
246 case RecurKind::FMax:
248 return std::nullopt;
250 break;
251 case RecurKind::FMin:
253 return std::nullopt;
255 break;
256 default:
257 return std::nullopt;
258 }
259 return collectMinMaxFMF(FPOp);
260}
261
263 FastMathFlags FuncFMF,
264 ScalarEvolution *SE) {
265 Type *Ty = Phi->getType();
266 BasicBlock *Latch = TheLoop->getLoopLatch();
267 if (Phi->getNumIncomingValues() != 2 ||
268 Phi->getParent() != TheLoop->getHeader() ||
269 (!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || !Latch)
270 return {};
271
272 auto GetMinMaxRK = [](Value *V, Value *&A, Value *&B) -> RecurKind {
273 if (match(V, m_UMin(m_Value(A), m_Value(B))))
274 return RecurKind::UMin;
275 if (match(V, m_UMax(m_Value(A), m_Value(B))))
276 return RecurKind::UMax;
277 if (match(V, m_SMax(m_Value(A), m_Value(B))))
278 return RecurKind::SMax;
279 if (match(V, m_SMin(m_Value(A), m_Value(B))))
280 return RecurKind::SMin;
281 if (match(V, m_OrdOrUnordFMin(m_Value(A), m_Value(B))) ||
283 return RecurKind::FMin;
284 if (match(V, m_OrdOrUnordFMax(m_Value(A), m_Value(B))) ||
286 return RecurKind::FMax;
287 if (match(V, m_FMinimum(m_Value(A), m_Value(B))))
288 return RecurKind::FMinimum;
289 if (match(V, m_FMaximum(m_Value(A), m_Value(B))))
290 return RecurKind::FMaximum;
295 return RecurKind::None;
296 };
297
299 Value *BackedgeValue = Phi->getIncomingValueForBlock(Latch);
301 // Walk def-use chains upwards from BackedgeValue to identify min/max
302 // recurrences.
303 SmallVector<Value *> WorkList({BackedgeValue});
304 SmallPtrSet<Value *, 8> Chain({Phi});
305 while (!WorkList.empty()) {
306 Value *Cur = WorkList.pop_back_val();
307 if (!Chain.insert(Cur).second)
308 continue;
309 auto *I = dyn_cast<Instruction>(Cur);
310 if (!I || !TheLoop->contains(I))
311 return {};
312 if (auto *PN = dyn_cast<PHINode>(I)) {
313 append_range(WorkList, PN->operands());
314 continue;
315 }
316 Value *A, *B;
317 RecurKind CurRK = GetMinMaxRK(Cur, A, B);
318 if (CurRK == RecurKind::None || (RK != RecurKind::None && CurRK != RK))
319 return {};
320
321 RK = CurRK;
322 // Check required fast-math flags for FP recurrences.
324 auto CurFMF =
326 if (!CurFMF)
327 return {};
328 FMF &= *CurFMF;
329 }
330
331 if (auto *SI = dyn_cast<SelectInst>(I))
332 Chain.insert(SI->getCondition());
333
334 if (A == Phi || B == Phi)
335 continue;
336
337 // Add operand to worklist if it matches the pattern (exactly one must
338 // match)
339 Value *X, *Y;
340 auto *IA = dyn_cast<Instruction>(A);
341 auto *IB = dyn_cast<Instruction>(B);
342 bool AMatches = IA && TheLoop->contains(IA) && GetMinMaxRK(A, X, Y) == RK;
343 bool BMatches = IB && TheLoop->contains(IB) && GetMinMaxRK(B, X, Y) == RK;
344 if (AMatches == BMatches) // Both or neither match
345 return {};
346 WorkList.push_back(AMatches ? A : B);
347 }
348
349 // Handle argmin/argmax pattern: PHI has uses outside the reduction chain
350 // that are not intermediate min/max operations (which are handled below).
351 // Requires integer min/max, and single-use BackedgeValue (so vectorizer can
352 // handle both PHIs together).
353 bool PhiHasInvalidUses = any_of(Phi->users(), [&](User *U) {
354 Value *A, *B;
355 return !Chain.contains(U) && TheLoop->contains(cast<Instruction>(U)) &&
356 GetMinMaxRK(U, A, B) == RecurKind::None;
357 });
358 if (PhiHasInvalidUses) {
360 !BackedgeValue->hasOneUse())
361 return {};
363 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()),
364 /*Exit=*/nullptr, /*Store=*/nullptr, RK, FastMathFlags(),
365 /*ExactFP=*/nullptr, Phi->getType(), /*IsMultiUse=*/true);
366 }
367
368 // Validate chain entries and collect stores from chain entries and
369 // intermediate ops.
371 unsigned OutOfLoopUses = 0;
372 for (Value *V : Chain) {
373 for (User *U : V->users()) {
374 if (Chain.contains(U))
375 continue;
376 auto *I = dyn_cast<Instruction>(U);
377 if (!I || (!TheLoop->contains(I) &&
378 (V != BackedgeValue || ++OutOfLoopUses > 1)))
379 return {};
380 if (!TheLoop->contains(I))
381 continue;
382 if (auto *SI = dyn_cast<StoreInst>(I)) {
383 Stores.push_back(SI);
384 continue;
385 }
386 // Must be intermediate min/max of the same kind.
387 Value *A, *B;
388 if (GetMinMaxRK(I, A, B) != RK)
389 return {};
390 for (User *IU : I->users()) {
391 if (auto *SI = dyn_cast<StoreInst>(IU))
392 Stores.push_back(SI);
393 else if (!Chain.contains(IU))
394 return {};
395 }
396 }
397 }
398
399 // Validate all stores go to same invariant address and are in the same block.
400 StoreInst *IntermediateStore = nullptr;
401 const SCEV *StorePtrSCEV = nullptr;
402 for (StoreInst *SI : Stores) {
403 const SCEV *Ptr = SE->getSCEV(SI->getPointerOperand());
404 if (!SE->isLoopInvariant(Ptr, TheLoop) ||
405 (StorePtrSCEV && StorePtrSCEV != Ptr))
406 return {};
407 StorePtrSCEV = Ptr;
408 if (!IntermediateStore)
409 IntermediateStore = SI;
410 else if (IntermediateStore->getParent() != SI->getParent())
411 return {};
412 else if (IntermediateStore->comesBefore(SI))
413 IntermediateStore = SI;
414 }
415
417 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()),
418 cast<Instruction>(BackedgeValue), IntermediateStore, RK, FMF, nullptr,
419 Phi->getType());
420}
421
422// This matches a phi that selects between the original value (HeaderPhi) and an
423// arbitrary non-reduction value.
424static bool isFindLastLikePhi(PHINode *Phi, PHINode *HeaderPhi,
425 SmallPtrSetImpl<Instruction *> &ReductionInstrs) {
426 unsigned NumNonReduxInputs = 0;
427 for (const Value *Op : Phi->operands()) {
428 if (!ReductionInstrs.contains(dyn_cast<Instruction>(Op))) {
429 if (++NumNonReduxInputs > 1)
430 return false;
431 } else if (Op != HeaderPhi) {
432 // TODO: Remove this restriction once chained phis are supported.
433 return false;
434 }
435 }
436 return NumNonReduxInputs == 1;
437}
438
440 PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
443 if (Phi->getNumIncomingValues() != 2)
444 return false;
445
446 // Reduction variables are only found in the loop header block.
447 if (Phi->getParent() != TheLoop->getHeader())
448 return false;
449
450 // Obtain the reduction start value from the value that comes from the loop
451 // preheader.
452 if (!TheLoop->getLoopPreheader())
453 return false;
454
455 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
456 // ExitInstruction is the single value which is used outside the loop.
457 // We only allow for a single reduction value to be used outside the loop.
458 // This includes users of the reduction, variables (which form a cycle
459 // which ends in the phi node).
460 Instruction *ExitInstruction = nullptr;
461
462 // Variable to keep last visited store instruction. By the end of the
463 // algorithm this variable will be either empty or having intermediate
464 // reduction value stored in invariant address.
465 StoreInst *IntermediateStore = nullptr;
466
467 // Indicates that we found a reduction operation in our scan.
468 bool FoundReduxOp = false;
469
470 // We start with the PHI node and scan for all of the users of this
471 // instruction. All users must be instructions that can be used as reduction
472 // variables (such as ADD). We must have a single out-of-block user. The cycle
473 // must include the original PHI.
474 bool FoundStartPHI = false;
475
476 // To recognize AnyOf patterns formed by a icmp select sequence, we store
477 // the number of instruction we saw to make sure we only see one.
478 unsigned NumCmpSelectPatternInst = 0;
479 InstDesc ReduxDesc(false, nullptr);
480
481 // To recognize find-lasts of conditional operations (such as loads or
482 // divides), that need masking, we track non-phi users and if we've found a
483 // "find-last-like" phi (see isFindLastLikePhi). We currently only support
484 // find-last reduction chains with a single "find-last-like" phi and do not
485 // allow any other operations.
486 [[maybe_unused]] unsigned NumNonPHIUsers = 0;
487 bool FoundFindLastLikePhi = false;
488
489 // Data used for determining if the recurrence has been type-promoted.
490 Type *RecurrenceType = Phi->getType();
492 unsigned MinWidthCastToRecurrenceType;
493 Instruction *Start = Phi;
494 bool IsSigned = false;
495
498
499 // Return early if the recurrence kind does not match the type of Phi. If the
500 // recurrence kind is arithmetic, we attempt to look through AND operations
501 // resulting from the type promotion performed by InstCombine. Vector
502 // operations are not limited to the legal integer widths, so we may be able
503 // to evaluate the reduction in the narrower width.
504 // Check the scalar type to handle both scalar and vector types.
505 Type *ScalarTy = RecurrenceType->getScalarType();
506 if (ScalarTy->isFloatingPointTy()) {
508 return false;
509 } else if (ScalarTy->isIntegerTy()) {
510 if (!isIntegerRecurrenceKind(Kind))
511 return false;
512 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
513 } else {
514 // Pointer min/max may exist, but it is not supported as a reduction op.
515 return false;
516 }
517
518 Worklist.push_back(Start);
519 VisitedInsts.insert(Start);
520
521 // Start with all flags set because we will intersect this with the reduction
522 // flags from all the reduction operations.
524
525 // The first instruction in the use-def chain of the Phi node that requires
526 // exact floating point operations.
527 Instruction *ExactFPMathInst = nullptr;
528
529 // A value in the reduction can be used:
530 // - By the reduction:
531 // - Reduction operation:
532 // - One use of reduction value (safe).
533 // - Multiple use of reduction value (not safe).
534 // - PHI:
535 // - All uses of the PHI must be the reduction (safe).
536 // - Otherwise, not safe.
537 // - By instructions outside of the loop (safe).
538 // * One value may have several outside users, but all outside
539 // uses must be of the same value.
540 // - By store instructions with a loop invariant address (safe with
541 // the following restrictions):
542 // * If there are several stores, all must have the same address.
543 // * Final value should be stored in that loop invariant address.
544 // - By an instruction that is not part of the reduction (not safe).
545 // This is either:
546 // * An instruction type other than PHI or the reduction operation.
547 // * A PHI in the header other than the initial PHI.
548 while (!Worklist.empty()) {
549 Instruction *Cur = Worklist.pop_back_val();
550
551 // Store instructions are allowed iff it is the store of the reduction
552 // value to the same loop invariant memory location.
553 if (auto *SI = dyn_cast<StoreInst>(Cur)) {
554 if (!SE) {
555 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
556 << "Scalar Evolution Analysis\n");
557 return false;
558 }
559
560 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
561 // Check it is the same address as previous stores
562 if (IntermediateStore) {
563 const SCEV *OtherScev =
564 SE->getSCEV(IntermediateStore->getPointerOperand());
565
566 if (OtherScev != PtrScev) {
567 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
568 << "inside the loop: " << *SI->getPointerOperand()
569 << " and "
570 << *IntermediateStore->getPointerOperand() << '\n');
571 return false;
572 }
573 }
574
575 // Check the pointer is loop invariant
576 if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
577 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
578 << "inside the loop: " << *SI->getPointerOperand()
579 << '\n');
580 return false;
581 }
582
583 // IntermediateStore is always the last store in the loop.
585 continue;
586 }
587
588 // No Users.
589 // If the instruction has no users then this is a broken chain and can't be
590 // a reduction variable.
591 if (Cur->use_empty())
592 return false;
593
594 bool IsAPhi = isa<PHINode>(Cur);
595 if (!IsAPhi)
596 ++NumNonPHIUsers;
597
598 // A header PHI use other than the original PHI.
599 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
600 return false;
601
602 // Reductions of instructions such as Div, and Sub is only possible if the
603 // LHS is the reduction variable.
604 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
605 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
606 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
607 return false;
608
609 // Any reduction instruction must be of one of the allowed kinds. We ignore
610 // the starting value (the Phi or an AND instruction if the Phi has been
611 // type-promoted).
612 if (Cur != Start) {
613 ReduxDesc =
614 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
615 ExactFPMathInst = ExactFPMathInst == nullptr
616 ? ReduxDesc.getExactFPMathInst()
617 : ExactFPMathInst;
618 if (!ReduxDesc.isRecurrence())
619 return false;
620 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
621 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi)
622 FMF &= collectMinMaxFMF(ReduxDesc.getPatternInst());
623 // Update this reduction kind if we matched a new instruction.
624 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
625 // state accurate while processing the worklist?
626 if (ReduxDesc.getRecKind() != RecurKind::None)
627 Kind = ReduxDesc.getRecKind();
628 }
629
630 bool IsASelect = isa<SelectInst>(Cur);
631
632 // A conditional reduction operation must only have 2 or less uses in
633 // VisitedInsts.
634 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
635 hasMultipleUsesOf(Cur, VisitedInsts, 2))
636 return false;
637
638 // A reduction operation must only have one use of the reduction value.
639 if (!IsAPhi && !IsASelect && !isAnyOfRecurrenceKind(Kind) &&
640 hasMultipleUsesOf(Cur, VisitedInsts, 1))
641 return false;
642
643 // All inputs to a PHI node must be a reduction value, unless the phi is a
644 // "FindLast-like" phi (described below).
645 if (IsAPhi && Cur != Phi) {
646 if (!areAllUsesIn(Cur, VisitedInsts)) {
647 // A "FindLast-like" phi acts like a conditional select between the
648 // previous reduction value, and an arbitrary value. Note: Multiple
649 // "FindLast-like" phis are not supported see:
650 // IVDescriptorsTest.UnsupportedFindLastPhi.
651 FoundFindLastLikePhi =
652 Kind == RecurKind::FindLast && !FoundFindLastLikePhi &&
653 isFindLastLikePhi(cast<PHINode>(Cur), Phi, VisitedInsts);
654 if (!FoundFindLastLikePhi)
655 return false;
656 }
657 }
658
659 if (isAnyOfRecurrenceKind(Kind) && IsASelect)
660 ++NumCmpSelectPatternInst;
661
662 // Check whether we found a reduction operator.
663 FoundReduxOp |= (!IsAPhi || FoundFindLastLikePhi) && Cur != Start;
664
665 // Process users of current instruction. Push non-PHI nodes after PHI nodes
666 // onto the stack. This way we are going to have seen all inputs to PHI
667 // nodes once we get to them.
670 for (User *U : Cur->users()) {
672
673 // If the user is a call to llvm.fmuladd then the instruction can only be
674 // the final operand.
675 if (isFMulAddIntrinsic(UI))
676 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
677 return false;
678
679 // Check if we found the exit user.
680 BasicBlock *Parent = UI->getParent();
681 if (!TheLoop->contains(Parent)) {
682 // If we already know this instruction is used externally, move on to
683 // the next user.
684 if (ExitInstruction == Cur)
685 continue;
686
687 // Exit if you find multiple values used outside or if the header phi
688 // node is being used. In this case the user uses the value of the
689 // previous iteration, in which case we would loose "VF-1" iterations of
690 // the reduction operation if we vectorize.
691 if (ExitInstruction != nullptr || Cur == Phi)
692 return false;
693
694 // The instruction used by an outside user must be the last instruction
695 // before we feed back to the reduction phi. Otherwise, we loose VF-1
696 // operations on the value.
697 if (!is_contained(Phi->operands(), Cur))
698 return false;
699
700 ExitInstruction = Cur;
701 continue;
702 }
703
704 // Process instructions only once (termination). Each reduction cycle
705 // value must only be used once, except by phi nodes and conditional
706 // reductions which are represented as a cmp followed by a select.
707 InstDesc IgnoredVal(false, nullptr);
708 if (VisitedInsts.insert(UI).second) {
709 if (isa<PHINode>(UI)) {
710 PHIs.push_back(UI);
711 } else {
713 if (SI && SI->getPointerOperand() == Cur) {
714 // Reduction variable chain can only be stored somewhere but it
715 // can't be used as an address.
716 return false;
717 }
718 NonPHIs.push_back(UI);
719 }
720 } else if (!isa<PHINode>(UI) &&
721 ((!isConditionalRdxPattern(UI).isRecurrence() &&
722 !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)
723 .isRecurrence())))
724 return false;
725
726 // Remember that we completed the cycle.
727 if (UI == Phi)
728 FoundStartPHI = true;
729 }
730 Worklist.append(PHIs.begin(), PHIs.end());
731 Worklist.append(NonPHIs.begin(), NonPHIs.end());
732 }
733
734 // We only expect to match a single "find-last-like" phi per find-last
735 // reduction, with no non-phi operations in the reduction use chain.
736 assert((!FoundFindLastLikePhi ||
737 (Kind == RecurKind::FindLast && NumNonPHIUsers == 0)) &&
738 "Unexpectedly matched a 'find-last-like' phi");
739
740 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
741 return false;
742
743 if (IntermediateStore) {
744 // Check that stored value goes to the phi node again. This way we make sure
745 // that the value stored in IntermediateStore is indeed the final reduction
746 // value.
747 if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
748 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
749 << *IntermediateStore << '\n');
750 return false;
751 }
752
753 // If there is an exit instruction it's value should be stored in
754 // IntermediateStore
755 if (ExitInstruction &&
756 IntermediateStore->getValueOperand() != ExitInstruction) {
757 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
758 "store last calculated value of the reduction: "
759 << *IntermediateStore << '\n');
760 return false;
761 }
762
763 // If all uses are inside the loop (intermediate stores), then the
764 // reduction value after the loop will be the one used in the last store.
765 if (!ExitInstruction)
766 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
767 }
768
769 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
770 return false;
771
772 const bool IsOrdered =
773 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
774
775 if (Start != Phi) {
776 // If the starting value is not the same as the phi node, we speculatively
777 // looked through an 'and' instruction when evaluating a potential
778 // arithmetic reduction to determine if it may have been type-promoted.
779 //
780 // We now compute the minimal bit width that is required to represent the
781 // reduction. If this is the same width that was indicated by the 'and', we
782 // can represent the reduction in the smaller type. The 'and' instruction
783 // will be eliminated since it will essentially be a cast instruction that
784 // can be ignore in the cost model. If we compute a different type than we
785 // did when evaluating the 'and', the 'and' will not be eliminated, and we
786 // will end up with different kinds of operations in the recurrence
787 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
788 // the case.
789 //
790 // The vectorizer relies on InstCombine to perform the actual
791 // type-shrinking. It does this by inserting instructions to truncate the
792 // exit value of the reduction to the width indicated by RecurrenceType and
793 // then extend this value back to the original width. If IsSigned is false,
794 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
795 // used.
796 //
797 // TODO: We should not rely on InstCombine to rewrite the reduction in the
798 // smaller type. We should just generate a correctly typed expression
799 // to begin with.
800 Type *ComputedType;
801 std::tie(ComputedType, IsSigned) =
802 computeRecurrenceType(ExitInstruction, DB, AC, DT);
803 if (ComputedType != RecurrenceType)
804 return false;
805 }
806
807 // Collect cast instructions and the minimum width used by the recurrence.
808 // If the starting value is not the same as the phi node and the computed
809 // recurrence type is equal to the recurrence type, the recurrence expression
810 // will be represented in a narrower or wider type. If there are any cast
811 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
812 // Note that the 'and' instruction was already included in this list.
813 //
814 // TODO: A better way to represent this may be to tag in some way all the
815 // instructions that are a part of the reduction. The vectorizer cost
816 // model could then apply the recurrence type to these instructions,
817 // without needing a white list of instructions to ignore.
818 // This may also be useful for the inloop reductions, if it can be
819 // kept simple enough.
820 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
821 MinWidthCastToRecurrenceType);
822
823 // We found a reduction var if we have reached the original phi node and we
824 // only have a single instruction with out-of-loop users.
825
826 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
827 // is saved as part of the RecurrenceDescriptor.
828
829 // Save the description of this reduction variable.
830 RedDes =
831 RecurrenceDescriptor(RdxStart, ExitInstruction, IntermediateStore, Kind,
832 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
833 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
834 return true;
835}
836
837// We are looking for loops that do something like this:
838// int r = 0;
839// for (int i = 0; i < n; i++) {
840// if (src[i] > 3)
841// r = 3;
842// }
843// where the reduction value (r) only has two states, in this example 0 or 3.
844// The generated LLVM IR for this type of loop will be like this:
845// for.body:
846// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
847// ...
848// %cmp = icmp sgt i32 %5, 3
849// %spec.select = select i1 %cmp, i32 3, i32 %r
850// ...
851// In general we can support vectorization of loops where 'r' flips between
852// any two non-constants, provided they are loop invariant. The only thing
853// we actually care about at the end of the loop is whether or not any lane
854// in the selected vector is different from the start value. The final
855// across-vector reduction after the loop simply involves choosing the start
856// value if nothing changed (0 in the example above) or the other selected
857// value (3 in the example above).
860 Instruction *I, InstDesc &Prev) {
861 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
862 // the select.
863 if (match(I, m_OneUse(m_Cmp()))) {
864 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
865 return InstDesc(Select, Prev.getRecKind());
866 }
867
868 if (!match(I, m_Select(m_Cmp(), m_Value(), m_Value())))
869 return InstDesc(false, I);
870
872 Value *NonPhi = nullptr;
873
874 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
875 NonPhi = SI->getFalseValue();
876 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
877 NonPhi = SI->getTrueValue();
878 else
879 return InstDesc(false, I);
880
881 // We are looking for selects of the form:
882 // select(cmp(), phi, loop_invariant) or
883 // select(cmp(), loop_invariant, phi)
884 if (!Loop->isLoopInvariant(NonPhi))
885 return InstDesc(false, I);
886
887 return InstDesc(I, RecurKind::AnyOf);
888}
889
890// We are looking for loops that do something like this:
891// int r = 0;
892// for (int i = 0; i < n; i++) {
893// if (src[i] > 3)
894// r = i;
895// }
896// or like this:
897// int r = 0;
898// for (int i = 0; i < n; i++) {
899// if (src[i] > 3)
900// r = <loop-varying value>;
901// }
902// The reduction value (r) is derived from either the values of an induction
903// variable (i) sequence, an arbitrary loop-varying value, or from the start
904// value (0). The LLVM IR generated for such loops would be as follows:
905// for.body:
906// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
907// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
908// ...
909// %cmp = icmp sgt i32 %5, 3
910// %spec.select = select i1 %cmp, i32 %i, i32 %r
911// %inc = add nsw i32 %i, 1
912// ...
913//
914// When searching for an arbitrary loop-varying value, the reduction value will
915// either be the initial value (0) if the condition was never met, or the value
916// of the loop-varying value in the most recent loop iteration where the
917// condition was met.
921 // TODO: Support the vectorization of FindLastIV when the reduction phi is
922 // used by more than one select instruction. This vectorization is only
923 // performed when the SCEV of each increasing induction variable used by the
924 // select instructions is identical.
925 if (!OrigPhi->hasOneUse())
926 return InstDesc(false, I);
927
928 // We are looking for selects of the form:
929 // select(cmp(), phi, value) or
930 // select(cmp(), value, phi)
931 // TODO: Match selects with multi-use cmp conditions.
932 Value *NonRdxPhi = nullptr;
933 if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
934 m_Specific(OrigPhi)),
935 m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi),
936 m_Value(NonRdxPhi)))))
937 return InstDesc(false, I);
938
940}
941
942/// Returns true if the select instruction has users in the compare-and-add
943/// reduction pattern below. The select instruction argument is the last one
944/// in the sequence.
945///
946/// %sum.1 = phi ...
947/// ...
948/// %cmp = fcmp pred %0, %CFP
949/// %add = fadd %0, %sum.1
950/// %sum.2 = select %cmp, %add, %sum.1
953 Value *TrueVal, *FalseVal;
954 // Only handle single use cases for now.
955 if (!match(I,
956 m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal))))
957 return InstDesc(false, I);
958
959 // Handle only when either of operands of select instruction is a PHI
960 // node for now.
961 if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
962 (!isa<PHINode>(TrueVal) && !isa<PHINode>(FalseVal)))
963 return InstDesc(false, I);
964
965 Instruction *I1 = isa<PHINode>(TrueVal) ? dyn_cast<Instruction>(FalseVal)
966 : dyn_cast<Instruction>(TrueVal);
967 if (!I1 || !I1->isBinaryOp())
968 return InstDesc(false, I);
969
970 Value *Op1, *Op2;
971 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
972 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
973 I1->isFast()) ||
974 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
975 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
976 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
977 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
978 return InstDesc(false, I);
979
982 if (!IPhi || IPhi != FalseVal)
983 return InstDesc(false, I);
984
985 return InstDesc(true, I);
986}
987
989 Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
990 FastMathFlags FuncFMF, ScalarEvolution *SE) {
991 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
992 switch (I->getOpcode()) {
993 default:
994 return InstDesc(false, I);
995 case Instruction::PHI:
996 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
997 case Instruction::Sub:
998 return InstDesc(
999 Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs, I);
1000 case Instruction::Add:
1001 return InstDesc(
1002 Kind == RecurKind::Add || Kind == RecurKind::AddChainWithSubs, I);
1003 case Instruction::Mul:
1004 return InstDesc(Kind == RecurKind::Mul, I);
1005 case Instruction::And:
1006 return InstDesc(Kind == RecurKind::And, I);
1007 case Instruction::Or:
1008 return InstDesc(Kind == RecurKind::Or, I);
1009 case Instruction::Xor:
1010 return InstDesc(Kind == RecurKind::Xor, I);
1011 case Instruction::FDiv:
1012 case Instruction::FMul:
1013 return InstDesc(Kind == RecurKind::FMul, I,
1014 I->hasAllowReassoc() ? nullptr : I);
1015 case Instruction::FSub:
1016 case Instruction::FAdd:
1017 return InstDesc(Kind == RecurKind::FAdd, I,
1018 I->hasAllowReassoc() ? nullptr : I);
1019 case Instruction::Select:
1020 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
1021 Kind == RecurKind::Add || Kind == RecurKind::Mul ||
1023 return isConditionalRdxPattern(I);
1024 if (isFindRecurrenceKind(Kind) && SE)
1025 return isFindPattern(L, OrigPhi, I, *SE);
1026 [[fallthrough]];
1027 case Instruction::FCmp:
1028 case Instruction::ICmp:
1029 case Instruction::Call:
1030 if (isAnyOfRecurrenceKind(Kind))
1031 return isAnyOfPattern(L, OrigPhi, I, Prev);
1032 if (isFMulAddIntrinsic(I))
1033 return InstDesc(Kind == RecurKind::FMulAdd, I,
1034 I->hasAllowReassoc() ? nullptr : I);
1035 return InstDesc(false, I);
1036 }
1037}
1038
1041 unsigned MaxNumUses) {
1042 unsigned NumUses = 0;
1043 for (const Use &U : I->operands()) {
1044 if (Insts.count(dyn_cast<Instruction>(U)))
1045 ++NumUses;
1046 if (NumUses > MaxNumUses)
1047 return true;
1048 }
1049
1050 return false;
1051}
1052
1054 RecurrenceDescriptor &RedDes,
1056 DominatorTree *DT,
1057 ScalarEvolution *SE) {
1058 BasicBlock *Header = TheLoop->getHeader();
1059 Function &F = *Header->getParent();
1060 FastMathFlags FMF;
1061 FMF.setNoNaNs(
1062 F.getFnAttribute("no-nans-fp-math").getValueAsBool());
1063 FMF.setNoSignedZeros(
1064 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
1065
1066 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
1067 SE)) {
1068 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
1069 return true;
1070 }
1071 if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, FMF, RedDes, DB, AC, DT,
1072 SE)) {
1073 LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n");
1074 return true;
1075 }
1076 if (AddReductionVar(Phi, RecurKind::AddChainWithSubs, TheLoop, FMF, RedDes,
1077 DB, AC, DT, SE)) {
1078 LLVM_DEBUG(dbgs() << "Found a chained ADD-SUB reduction PHI." << *Phi
1079 << "\n");
1080 return true;
1081 }
1082 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
1083 SE)) {
1084 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
1085 return true;
1086 }
1087 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
1088 SE)) {
1089 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
1090 return true;
1091 }
1092 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
1093 SE)) {
1094 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
1095 return true;
1096 }
1097 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
1098 SE)) {
1099 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
1100 return true;
1101 }
1102 auto RD = getMinMaxRecurrence(Phi, TheLoop, FMF, SE);
1103 if (RD.getRecurrenceKind() != RecurKind::None) {
1104 assert(
1105 RecurrenceDescriptor::isMinMaxRecurrenceKind(RD.getRecurrenceKind()) &&
1106 "Expected a min/max recurrence kind");
1107 LLVM_DEBUG(dbgs() << "Found a min/max reduction PHI." << *Phi << "\n");
1108 RedDes = RD;
1109 return true;
1110 }
1111 if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
1112 SE)) {
1113 LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi
1114 << "\n");
1115 return true;
1116 }
1117 if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC,
1118 DT, SE)) {
1119 LLVM_DEBUG(dbgs() << "Found a Find reduction PHI." << *Phi << "\n");
1120 return true;
1121 }
1122 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
1123 SE)) {
1124 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
1125 return true;
1126 }
1127 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1128 SE)) {
1129 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
1130 return true;
1131 }
1132 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1133 SE)) {
1134 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
1135 return true;
1136 }
1137
1138 // Not a reduction of known type.
1139 return false;
1140}
1141
1143 DominatorTree *DT) {
1144
1145 // Ensure the phi node is in the loop header and has two incoming values.
1146 if (Phi->getParent() != TheLoop->getHeader() ||
1147 Phi->getNumIncomingValues() != 2)
1148 return false;
1149
1150 // Ensure the loop has a preheader and a single latch block. The loop
1151 // vectorizer will need the latch to set up the next iteration of the loop.
1152 auto *Preheader = TheLoop->getLoopPreheader();
1153 auto *Latch = TheLoop->getLoopLatch();
1154 if (!Preheader || !Latch)
1155 return false;
1156
1157 // Ensure the phi node's incoming blocks are the loop preheader and latch.
1158 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
1159 Phi->getBasicBlockIndex(Latch) < 0)
1160 return false;
1161
1162 // Get the previous value. The previous value comes from the latch edge while
1163 // the initial value comes from the preheader edge.
1164 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
1165
1166 // If Previous is a phi in the header, go through incoming values from the
1167 // latch until we find a non-phi value. Use this as the new Previous, all uses
1168 // in the header will be dominated by the original phi, but need to be moved
1169 // after the non-phi previous value.
1171 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
1172 if (PrevPhi->getParent() != Phi->getParent())
1173 return false;
1174 if (!SeenPhis.insert(PrevPhi).second)
1175 return false;
1176 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
1177 }
1178
1179 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
1180 return false;
1181
1182 // Ensure every user of the phi node (recursively) is dominated by the
1183 // previous value. The dominance requirement ensures the loop vectorizer will
1184 // not need to vectorize the initial value prior to the first iteration of the
1185 // loop.
1186 // TODO: Consider extending this sinking to handle memory instructions.
1187
1189 BasicBlock *PhiBB = Phi->getParent();
1191 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
1192 // Cyclic dependence.
1193 if (Previous == SinkCandidate)
1194 return false;
1195
1196 if (!Seen.insert(SinkCandidate).second)
1197 return true;
1198 if (DT->dominates(Previous,
1199 SinkCandidate)) // We already are good w/o sinking.
1200 return true;
1201
1202 if (SinkCandidate->getParent() != PhiBB ||
1203 SinkCandidate->mayHaveSideEffects() ||
1204 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1205 return false;
1206
1207 // If we reach a PHI node that is not dominated by Previous, we reached a
1208 // header PHI. No need for sinking.
1209 if (isa<PHINode>(SinkCandidate))
1210 return true;
1211
1212 // Sink User tentatively and check its users
1213 WorkList.push_back(SinkCandidate);
1214 return true;
1215 };
1216
1217 WorkList.push_back(Phi);
1218 // Try to recursively sink instructions and their users after Previous.
1219 while (!WorkList.empty()) {
1220 Instruction *Current = WorkList.pop_back_val();
1221 for (User *User : Current->users()) {
1222 if (!TryToPushSinkCandidate(cast<Instruction>(User)))
1223 return false;
1224 }
1225 }
1226
1227 return true;
1228}
1229
1231 switch (Kind) {
1232 case RecurKind::Sub:
1233 return Instruction::Sub;
1235 case RecurKind::Add:
1236 return Instruction::Add;
1237 case RecurKind::Mul:
1238 return Instruction::Mul;
1239 case RecurKind::Or:
1240 return Instruction::Or;
1241 case RecurKind::And:
1242 return Instruction::And;
1243 case RecurKind::Xor:
1244 return Instruction::Xor;
1245 case RecurKind::FMul:
1246 return Instruction::FMul;
1247 case RecurKind::FMulAdd:
1248 case RecurKind::FAdd:
1249 return Instruction::FAdd;
1250 case RecurKind::SMax:
1251 case RecurKind::SMin:
1252 case RecurKind::UMax:
1253 case RecurKind::UMin:
1254 return Instruction::ICmp;
1255 case RecurKind::FMax:
1256 case RecurKind::FMin:
1261 return Instruction::FCmp;
1263 case RecurKind::AnyOf:
1264 case RecurKind::FindIV:
1265 // TODO: Set AnyOf and FindIV to Instruction::Select once in-loop reductions
1266 // are supported.
1267 default:
1268 llvm_unreachable("Unknown recurrence operation");
1269 }
1270}
1271
1274 SmallVector<Instruction *, 4> ReductionOperations;
1275 const bool IsMinMax = isMinMaxRecurrenceKind(Kind);
1276
1277 // Search down from the Phi to the LoopExitInstr, looking for instructions
1278 // with a single user of the correct type for the reduction.
1279
1280 // Note that we check that the type of the operand is correct for each item in
1281 // the chain, including the last (the loop exit value). This can come up from
1282 // sub, which would otherwise be treated as an add reduction. MinMax also need
1283 // to check for a pair of icmp/select, for which we use getNextInstruction and
1284 // isCorrectOpcode functions to step the right number of instruction, and
1285 // check the icmp/select pair.
1286 // FIXME: We also do not attempt to look through Select's yet, which might
1287 // be part of the reduction chain, or attempt to looks through And's to find a
1288 // smaller bitwidth. Subs are also currently not allowed (which are usually
1289 // treated as part of a add reduction) as they are expected to generally be
1290 // more expensive than out-of-loop reductions, and need to be costed more
1291 // carefully.
1292 unsigned ExpectedUses = 1;
1293 if (IsMinMax)
1294 ExpectedUses = 2;
1295
1296 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1297 for (auto *User : Cur->users()) {
1299 if (isa<PHINode>(UI))
1300 continue;
1301 if (IsMinMax) {
1302 // We are expecting a icmp/select pair, which we go to the next select
1303 // instruction if we can. We already know that Cur has 2 uses.
1304 if (isa<SelectInst>(UI))
1305 return UI;
1306 continue;
1307 }
1308 return UI;
1309 }
1310 return nullptr;
1311 };
1312 auto isCorrectOpcode = [&](Instruction *Cur) {
1313 if (IsMinMax) {
1314 Value *LHS, *RHS;
1316 matchSelectPattern(Cur, LHS, RHS).Flavor);
1317 }
1318 // Recognize a call to the llvm.fmuladd intrinsic.
1319 if (isFMulAddIntrinsic(Cur))
1320 return true;
1321
1322 if (Cur->getOpcode() == Instruction::Sub &&
1324 return true;
1325
1326 return Cur->getOpcode() == getOpcode();
1327 };
1328
1329 // Attempt to look through Phis which are part of the reduction chain
1330 unsigned ExtraPhiUses = 0;
1331 Instruction *RdxInstr = LoopExitInstr;
1332 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
1333 if (ExitPhi->getNumIncomingValues() != 2)
1334 return {};
1335
1336 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
1337 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
1338
1339 Instruction *Chain = nullptr;
1340 if (Inc0 == Phi)
1341 Chain = Inc1;
1342 else if (Inc1 == Phi)
1343 Chain = Inc0;
1344 else
1345 return {};
1346
1347 RdxInstr = Chain;
1348 ExtraPhiUses = 1;
1349 }
1350
1351 // The loop exit instruction we check first (as a quick test) but add last. We
1352 // check the opcode is correct (and dont allow them to be Subs) and that they
1353 // have expected to have the expected number of uses. They will have one use
1354 // from the phi and one from a LCSSA value, no matter the type.
1355 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
1356 return {};
1357
1358 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1359 // for conditional reductions.
1360 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
1361 return {};
1362
1363 Instruction *Cur = getNextInstruction(Phi);
1364
1365 // Each other instruction in the chain should have the expected number of uses
1366 // and be the correct opcode.
1367 while (Cur != RdxInstr) {
1368 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1369 return {};
1370
1371 ReductionOperations.push_back(Cur);
1372 Cur = getNextInstruction(Cur);
1373 }
1374
1375 ReductionOperations.push_back(Cur);
1376 return ReductionOperations;
1377}
1378
1379InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1380 const SCEV *Step, BinaryOperator *BOp,
1382 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
1383 assert(IK != IK_NoInduction && "Not an induction");
1384
1385 // Start value type should match the induction kind and the value
1386 // itself should not be null.
1387 assert(StartValue && "StartValue is null");
1388 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1389 "StartValue is not a pointer for pointer induction");
1390 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1391 "StartValue is not an integer for integer induction");
1392
1393 // Check the Step Value. It should be non-zero integer value.
1394 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1395 "Step value is zero");
1396
1397 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1398 "StepValue is not an integer");
1399
1400 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1401 "StepValue is not FP for FpInduction");
1402 assert((IK != IK_FpInduction ||
1403 (InductionBinOp &&
1404 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1405 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1406 "Binary opcode should be specified for FP induction");
1407
1408 if (Casts)
1409 llvm::append_range(RedundantCasts, *Casts);
1410}
1411
1413 if (isa<SCEVConstant>(Step))
1414 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
1415 return nullptr;
1416}
1417
1419 ScalarEvolution *SE,
1421
1422 // Here we only handle FP induction variables.
1423 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1424
1425 if (TheLoop->getHeader() != Phi->getParent())
1426 return false;
1427
1428 // The loop may have multiple entrances or multiple exits; we can analyze
1429 // this phi if it has a unique entry value and a unique backedge value.
1430 if (Phi->getNumIncomingValues() != 2)
1431 return false;
1432 Value *BEValue = nullptr, *StartValue = nullptr;
1433 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1434 BEValue = Phi->getIncomingValue(0);
1435 StartValue = Phi->getIncomingValue(1);
1436 } else {
1437 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1438 "Unexpected Phi node in the loop");
1439 BEValue = Phi->getIncomingValue(1);
1440 StartValue = Phi->getIncomingValue(0);
1441 }
1442
1444 if (!BOp)
1445 return false;
1446
1447 Value *Addend = nullptr;
1448 if (BOp->getOpcode() == Instruction::FAdd) {
1449 if (BOp->getOperand(0) == Phi)
1450 Addend = BOp->getOperand(1);
1451 else if (BOp->getOperand(1) == Phi)
1452 Addend = BOp->getOperand(0);
1453 } else if (BOp->getOpcode() == Instruction::FSub)
1454 if (BOp->getOperand(0) == Phi)
1455 Addend = BOp->getOperand(1);
1456
1457 if (!Addend)
1458 return false;
1459
1460 // The addend should be loop invariant
1461 if (auto *I = dyn_cast<Instruction>(Addend))
1462 if (TheLoop->contains(I))
1463 return false;
1464
1465 // FP Step has unknown SCEV
1466 const SCEV *Step = SE->getUnknown(Addend);
1467 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1468 return true;
1469}
1470
1471/// This function is called when we suspect that the update-chain of a phi node
1472/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1473/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1474/// predicate P under which the SCEV expression for the phi can be the
1475/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1476/// cast instructions that are involved in the update-chain of this induction.
1477/// A caller that adds the required runtime predicate can be free to drop these
1478/// cast instructions, and compute the phi using \p AR (instead of some scev
1479/// expression with casts).
1480///
1481/// For example, without a predicate the scev expression can take the following
1482/// form:
1483/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1484///
1485/// It corresponds to the following IR sequence:
1486/// %for.body:
1487/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1488/// %casted_phi = "ExtTrunc i64 %x"
1489/// %add = add i64 %casted_phi, %step
1490///
1491/// where %x is given in \p PN,
1492/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1493/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1494/// several forms, for example, such as:
1495/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1496/// or:
1497/// ExtTrunc2: %t = shl %x, m
1498/// %casted_phi = ashr %t, m
1499///
1500/// If we are able to find such sequence, we return the instructions
1501/// we found, namely %casted_phi and the instructions on its use-def chain up
1502/// to the phi (not including the phi).
1504 const SCEVUnknown *PhiScev,
1505 const SCEVAddRecExpr *AR,
1506 SmallVectorImpl<Instruction *> &CastInsts) {
1507
1508 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1509 auto *PN = cast<PHINode>(PhiScev->getValue());
1510 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1511 const Loop *L = AR->getLoop();
1512
1513 // Find any cast instructions that participate in the def-use chain of
1514 // PhiScev in the loop.
1515 // FORNOW/TODO: We currently expect the def-use chain to include only
1516 // two-operand instructions, where one of the operands is an invariant.
1517 // createAddRecFromPHIWithCasts() currently does not support anything more
1518 // involved than that, so we keep the search simple. This can be
1519 // extended/generalized as needed.
1520
1521 auto getDef = [&](const Value *Val) -> Value * {
1522 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1523 if (!BinOp)
1524 return nullptr;
1525 Value *Op0 = BinOp->getOperand(0);
1526 Value *Op1 = BinOp->getOperand(1);
1527 Value *Def = nullptr;
1528 if (L->isLoopInvariant(Op0))
1529 Def = Op1;
1530 else if (L->isLoopInvariant(Op1))
1531 Def = Op0;
1532 return Def;
1533 };
1534
1535 // Look for the instruction that defines the induction via the
1536 // loop backedge.
1537 BasicBlock *Latch = L->getLoopLatch();
1538 if (!Latch)
1539 return false;
1540 Value *Val = PN->getIncomingValueForBlock(Latch);
1541 if (!Val)
1542 return false;
1543
1544 // Follow the def-use chain until the induction phi is reached.
1545 // If on the way we encounter a Value that has the same SCEV Expr as the
1546 // phi node, we can consider the instructions we visit from that point
1547 // as part of the cast-sequence that can be ignored.
1548 bool InCastSequence = false;
1549 auto *Inst = dyn_cast<Instruction>(Val);
1550 while (Val != PN) {
1551 // If we encountered a phi node other than PN, or if we left the loop,
1552 // we bail out.
1553 if (!Inst || !L->contains(Inst)) {
1554 return false;
1555 }
1556 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1557 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1558 InCastSequence = true;
1559 if (InCastSequence) {
1560 // Only the last instruction in the cast sequence is expected to have
1561 // uses outside the induction def-use chain.
1562 if (!CastInsts.empty())
1563 if (!Inst->hasOneUse())
1564 return false;
1565 CastInsts.push_back(Inst);
1566 }
1567 Val = getDef(Val);
1568 if (!Val)
1569 return false;
1570 Inst = dyn_cast<Instruction>(Val);
1571 }
1572
1573 return InCastSequence;
1574}
1575
1578 InductionDescriptor &D, bool Assume) {
1579 Type *PhiTy = Phi->getType();
1580
1581 // Handle integer and pointer inductions variables.
1582 // Now we handle also FP induction but not trying to make a
1583 // recurrent expression from the PHI node in-place.
1584
1585 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1586 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1587 return false;
1588
1589 if (PhiTy->isFloatingPointTy())
1590 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1591
1592 const SCEV *PhiScev = PSE.getSCEV(Phi);
1593 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1594
1595 // We need this expression to be an AddRecExpr.
1596 if (Assume && !AR)
1597 AR = PSE.getAsAddRec(Phi);
1598
1599 if (!AR) {
1600 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1601 return false;
1602 }
1603
1604 // Record any Cast instructions that participate in the induction update
1605 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1606 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1607 // only after enabling Assume with PSCEV, this means we may have encountered
1608 // cast instructions that required adding a runtime check in order to
1609 // guarantee the correctness of the AddRecurrence respresentation of the
1610 // induction.
1611 if (PhiScev != AR && SymbolicPhi) {
1613 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1614 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1615 }
1616
1617 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1618}
1619
1621 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1622 InductionDescriptor &D, const SCEV *Expr,
1623 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1624 Type *PhiTy = Phi->getType();
1625 // isSCEVable returns true for integer and pointer types.
1626 if (!SE->isSCEVable(PhiTy))
1627 return false;
1628
1629 // Check that the PHI is consecutive.
1630 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1631 const SCEV *Step;
1632
1633 // FIXME: We are currently matching the specific loop TheLoop; if it doesn't
1634 // match, we should treat it as a uniform. Unfortunately, we don't currently
1635 // know how to handled uniform PHIs.
1636 if (!match(PhiScev, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step),
1637 m_SpecificLoop(TheLoop)))) {
1638 LLVM_DEBUG(
1639 dbgs() << "LV: PHI is not a poly recurrence for requested loop.\n");
1640 return false;
1641 }
1642
1643 // This function assumes that InductionPhi is called only on Phi nodes
1644 // present inside loop headers. Check for the same, and throw an assert if
1645 // the current Phi is not present inside the loop header.
1646 assert(Phi->getParent() == TheLoop->getHeader() &&
1647 "Invalid Phi node, not present in loop header");
1648
1649 if (!TheLoop->getLoopPreheader())
1650 return false;
1651
1652 Value *StartValue =
1653 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
1654
1655 BasicBlock *Latch = TheLoop->getLoopLatch();
1656 if (!Latch)
1657 return false;
1658
1659 if (PhiTy->isIntegerTy()) {
1660 BinaryOperator *BOp =
1661 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1662 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1663 CastsToIgnore);
1664 return true;
1665 }
1666
1667 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1668
1669 // This allows induction variables w/non-constant steps.
1670 D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
1671 return true;
1672}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl< Instruction * > &CastInsts)
This function is called when we suspect that the update-chain of a phi node (whose symbolic SCEV expr...
static RecurrenceDescriptor getMinMaxRecurrence(PHINode *Phi, Loop *TheLoop, FastMathFlags FuncFMF, ScalarEvolution *SE)
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, Type *RecurrenceType, SmallPtrSetImpl< Instruction * > &Casts, unsigned &MinWidthCastToRecurTy)
Collect cast instructions that can be ignored in the vectorizer's cost model, given a reduction exit ...
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi)
static bool isFindLastLikePhi(PHINode *Phi, PHINode *HeaderPhi, SmallPtrSetImpl< Instruction * > &ReductionInstrs)
static Instruction * lookThroughAnd(PHINode *Phi, Type *&RT, SmallPtrSetImpl< Instruction * > &Visited, SmallPtrSetImpl< Instruction * > &CI)
Determines if Phi may have been type-promoted.
static std::optional< FastMathFlags > hasRequiredFastMathFlags(FPMathOperator *FPOp, RecurKind &RK, FastMathFlags FuncFMF)
static FastMathFlags collectMinMaxFMF(Value *V)
static std::pair< Type *, bool > computeRecurrenceType(Instruction *Exit, DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT)
Compute the minimal bit width needed to represent a reduction whose exit instruction is given by Exit...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Class for arbitrary precision integers.
Definition APInt.h:78
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BinaryOps getOpcode() const
Definition InstrTypes.h:374
This is the shared class of boolean and integer constants.
Definition Constants.h:87
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:302
bool hasNoSignedZeros() const
Test if this operation can ignore the sign of zero.
Definition Operator.h:312
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool noSignedZeros() const
Definition FMF.h:70
void setNoNaNs(bool B=true)
Definition FMF.h:81
bool noNaNs() const
Definition FMF.h:68
static FastMathFlags getFast()
Definition FMF.h:53
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static LLVM_ABI bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D)
Returns true if Phi is a floating point induction in the loop L.
InductionDescriptor()=default
Default constructor - creates an invalid induction.
LLVM_ABI ConstantInt * getConstIntStepValue() const
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:66
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
This POD struct holds information about a potential recurrence operation.
Instruction * getExactFPMathInst() const
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static LLVM_ABI InstDesc isConditionalRdxPattern(Instruction *I)
Returns a struct describing if the instruction is a Select(FCmp(X, Y), (Z = X op PHINode),...
static LLVM_ABI bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl< Instruction * > &Insts, unsigned MaxNumUses)
Returns true if instruction I has multiple uses in Insts.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
static LLVM_ABI bool areAllUsesIn(Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
Returns true if all uses of the instruction I is within the Set.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static LLVM_ABI InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, FastMathFlags FuncFMF, ScalarEvolution *SE)
Returns a struct describing if the instruction 'I' can be a recurrence variable of type 'Kind' for a ...
static bool isFindRecurrenceKind(RecurKind Kind)
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static LLVM_ABI InstDesc isFindPattern(Loop *TheLoop, PHINode *OrigPhi, Instruction *I, ScalarEvolution &SE)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static LLVM_ABI bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction of type Kind and adds it to the RecurrenceDescriptor.
static LLVM_ABI bool isIntegerRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer kind.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This node represents a polynomial recurrence on the trip count of the specified loop.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getUnknown(Value *V)
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:150
bool use_empty() const
Definition Value.h:346
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > > m_OrdOrUnordFMin(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximum(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > > m_OrdOrUnordFMax(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point maximum function.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
class_match< const SCEV > m_SCEV()
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
MachineInstr * getDef(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ Mul
Product of integers.
@ None
Not a recurrence.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static bool isMinOrMax(SelectPatternFlavor SPF)
When implementing this min/max pattern as fcmp; select, does the fcmp have to be ordered?