LLVM 23.0.0git
IVDescriptors.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
20#include "llvm/IR/Dominators.h"
23#include "llvm/IR/ValueHandle.h"
24#include "llvm/Support/Debug.h"
26
27using namespace llvm;
28using namespace llvm::PatternMatch;
29using namespace llvm::SCEVPatternMatch;
30
31#define DEBUG_TYPE "iv-descriptors"
32
35 for (const Use &Use : I->operands())
36 if (!Set.count(dyn_cast<Instruction>(Use)))
37 return false;
38 return true;
39}
40
42 switch (Kind) {
43 default:
44 break;
46 case RecurKind::Sub:
47 case RecurKind::Add:
48 case RecurKind::Mul:
49 case RecurKind::Or:
50 case RecurKind::And:
51 case RecurKind::Xor:
52 case RecurKind::SMax:
53 case RecurKind::SMin:
54 case RecurKind::UMax:
55 case RecurKind::UMin:
61 // TODO: Make type-agnostic.
63 return true;
64 }
65 return false;
66}
67
71
72/// Determines if Phi may have been type-promoted. If Phi has a single user
73/// that ANDs the Phi with a type mask, return the user. RT is updated to
74/// account for the narrower bit width represented by the mask, and the AND
75/// instruction is added to CI.
79 if (!Phi->hasOneUse())
80 return Phi;
81
82 const APInt *M = nullptr;
83 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
84
85 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
86 // with a new integer type of the corresponding bit width.
87 if (match(J, m_And(m_Instruction(I), m_APInt(M)))) {
88 int32_t Bits = (*M + 1).exactLogBase2();
89 if (Bits > 0) {
90 RT = IntegerType::get(Phi->getContext(), Bits);
91 Visited.insert(Phi);
92 CI.insert(J);
93 return J;
94 }
95 }
96 return Phi;
97}
98
99/// Compute the minimal bit width needed to represent a reduction whose exit
100/// instruction is given by Exit.
101static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
102 DemandedBits *DB,
103 AssumptionCache *AC,
104 DominatorTree *DT) {
105 bool IsSigned = false;
106 const DataLayout &DL = Exit->getDataLayout();
107 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
108
109 if (DB) {
110 // Use the demanded bits analysis to determine the bits that are live out
111 // of the exit instruction, rounding up to the nearest power of two. If the
112 // use of demanded bits results in a smaller bit width, we know the value
113 // must be positive (i.e., IsSigned = false), because if this were not the
114 // case, the sign bit would have been demanded.
115 auto Mask = DB->getDemandedBits(Exit);
116 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
117 }
118
119 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
120 // If demanded bits wasn't able to limit the bit width, we can try to use
121 // value tracking instead. This can be the case, for example, if the value
122 // may be negative.
123 auto NumSignBits = ComputeNumSignBits(Exit, DL, AC, nullptr, DT);
124 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
125 MaxBitWidth = NumTypeBits - NumSignBits;
126 KnownBits Bits = computeKnownBits(Exit, DL);
127 if (!Bits.isNonNegative()) {
128 // If the value is not known to be non-negative, we set IsSigned to true,
129 // meaning that we will use sext instructions instead of zext
130 // instructions to restore the original type.
131 IsSigned = true;
132 // Make sure at least one sign bit is included in the result, so it
133 // will get properly sign-extended.
134 ++MaxBitWidth;
135 }
136 }
137 MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
138
139 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
140 IsSigned);
141}
142
143/// Collect cast instructions that can be ignored in the vectorizer's cost
144/// model, given a reduction exit value and the minimal type in which the
145// reduction can be represented. Also search casts to the recurrence type
146// to find the minimum width used by the recurrence.
147static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
148 Type *RecurrenceType,
150 unsigned &MinWidthCastToRecurTy) {
151
154 Worklist.push_back(Exit);
155 MinWidthCastToRecurTy = -1U;
156
157 while (!Worklist.empty()) {
158 Instruction *Val = Worklist.pop_back_val();
159 Visited.insert(Val);
160 if (auto *Cast = dyn_cast<CastInst>(Val)) {
161 if (Cast->getSrcTy() == RecurrenceType) {
162 // If the source type of a cast instruction is equal to the recurrence
163 // type, it will be eliminated, and should be ignored in the vectorizer
164 // cost model.
165 Casts.insert(Cast);
166 continue;
167 }
168 if (Cast->getDestTy() == RecurrenceType) {
169 // The minimum width used by the recurrence is found by checking for
170 // casts on its operands. The minimum width is used by the vectorizer
171 // when finding the widest type for in-loop reductions without any
172 // loads/stores.
173 MinWidthCastToRecurTy = std::min<unsigned>(
174 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
175 continue;
176 }
177 }
178 // Add all operands to the work list if they are loop-varying values that
179 // we haven't yet visited.
180 for (Value *O : cast<User>(Val)->operands())
181 if (auto *I = dyn_cast<Instruction>(O))
182 if (TheLoop->contains(I) && !Visited.count(I))
183 Worklist.push_back(I);
184 }
185}
186
187// Check if a given Phi node can be recognized as an ordered reduction for
188// vectorizing floating point operations without unsafe math.
189static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
190 Instruction *Exit, PHINode *Phi) {
191 // Currently only FAdd and FMulAdd are supported.
192 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
193 return false;
194
195 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
196 return false;
197
198 if (Kind == RecurKind::FMulAdd &&
200 return false;
201
202 // Ensure the exit instruction has only one user other than the reduction PHI
203 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
204 return false;
205
206 // The only pattern accepted is the one in which the reduction PHI
207 // is used as one of the operands of the exit instruction
208 auto *Op0 = Exit->getOperand(0);
209 auto *Op1 = Exit->getOperand(1);
210 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
211 return false;
212 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
213 return false;
214
215 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
216 << ", ExitInst: " << *Exit << "\n");
217
218 return true;
219}
220
221/// Returns true if \p Phi is a min/max reduction matching \p Kind where \p Phi
222/// is used outside the reduction chain. This is common for loops selecting the
223/// index of a minimum/maximum value (argmin/argmax).
225 PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes) {
226 BasicBlock *Latch = TheLoop->getLoopLatch();
227 if (!Latch)
228 return false;
229
230 assert(Phi->getNumIncomingValues() == 2 && "phi must have 2 incoming values");
231 Value *Inc = Phi->getIncomingValueForBlock(Latch);
232 if (Phi->hasOneUse() || !Inc->hasOneUse() ||
234 return false;
235
236 Value *A, *B;
237 bool IsMinMax = [&]() {
238 switch (Kind) {
239 case RecurKind::UMax:
240 return match(Inc, m_UMax(m_Value(A), m_Value(B)));
241 case RecurKind::UMin:
242 return match(Inc, m_UMin(m_Value(A), m_Value(B)));
243 case RecurKind::SMax:
244 return match(Inc, m_SMax(m_Value(A), m_Value(B)));
245 case RecurKind::SMin:
246 return match(Inc, m_SMin(m_Value(A), m_Value(B)));
247 default:
248 llvm_unreachable("all min/max kinds must be handled");
249 }
250 }();
251 if (!IsMinMax)
252 return false;
253
254 if (A == B || (A != Phi && B != Phi))
255 return false;
256
258 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
259 RedDes =
260 RecurrenceDescriptor(RdxStart, /*Exit=*/nullptr, /*Store=*/nullptr, Kind,
261 FastMathFlags(), /*ExactFP=*/nullptr, Phi->getType(),
262 /*Signed=*/false, /*Ordered=*/false, CastInsts,
263 /*MinWidthCastToRecurTy=*/-1U, /*PhiMultiUse=*/true);
264 return true;
265}
266
268 PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
271 if (Phi->getNumIncomingValues() != 2)
272 return false;
273
274 // Reduction variables are only found in the loop header block.
275 if (Phi->getParent() != TheLoop->getHeader())
276 return false;
277
278 // Check for min/max reduction variables that feed other users in the loop.
280 RedDes))
281 return true;
282
283 // Obtain the reduction start value from the value that comes from the loop
284 // preheader.
285 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
286
287 // ExitInstruction is the single value which is used outside the loop.
288 // We only allow for a single reduction value to be used outside the loop.
289 // This includes users of the reduction, variables (which form a cycle
290 // which ends in the phi node).
291 Instruction *ExitInstruction = nullptr;
292
293 // Variable to keep last visited store instruction. By the end of the
294 // algorithm this variable will be either empty or having intermediate
295 // reduction value stored in invariant address.
296 StoreInst *IntermediateStore = nullptr;
297
298 // Indicates that we found a reduction operation in our scan.
299 bool FoundReduxOp = false;
300
301 // We start with the PHI node and scan for all of the users of this
302 // instruction. All users must be instructions that can be used as reduction
303 // variables (such as ADD). We must have a single out-of-block user. The cycle
304 // must include the original PHI.
305 bool FoundStartPHI = false;
306
307 // To recognize min/max patterns formed by a icmp select sequence, we store
308 // the number of instruction we saw from the recognized min/max pattern,
309 // to make sure we only see exactly the two instructions.
310 unsigned NumCmpSelectPatternInst = 0;
311 InstDesc ReduxDesc(false, nullptr);
312
313 // Data used for determining if the recurrence has been type-promoted.
314 Type *RecurrenceType = Phi->getType();
316 unsigned MinWidthCastToRecurrenceType;
317 Instruction *Start = Phi;
318 bool IsSigned = false;
319
322
323 // Return early if the recurrence kind does not match the type of Phi. If the
324 // recurrence kind is arithmetic, we attempt to look through AND operations
325 // resulting from the type promotion performed by InstCombine. Vector
326 // operations are not limited to the legal integer widths, so we may be able
327 // to evaluate the reduction in the narrower width.
328 // Check the scalar type to handle both scalar and vector types.
329 Type *ScalarTy = RecurrenceType->getScalarType();
330 if (ScalarTy->isFloatingPointTy()) {
332 return false;
333 } else if (ScalarTy->isIntegerTy()) {
334 if (!isIntegerRecurrenceKind(Kind))
335 return false;
336 if (!isMinMaxRecurrenceKind(Kind))
337 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
338 } else {
339 // Pointer min/max may exist, but it is not supported as a reduction op.
340 return false;
341 }
342
343 Worklist.push_back(Start);
344 VisitedInsts.insert(Start);
345
346 // Start with all flags set because we will intersect this with the reduction
347 // flags from all the reduction operations.
349
350 // The first instruction in the use-def chain of the Phi node that requires
351 // exact floating point operations.
352 Instruction *ExactFPMathInst = nullptr;
353
354 // A value in the reduction can be used:
355 // - By the reduction:
356 // - Reduction operation:
357 // - One use of reduction value (safe).
358 // - Multiple use of reduction value (not safe).
359 // - PHI:
360 // - All uses of the PHI must be the reduction (safe).
361 // - Otherwise, not safe.
362 // - By instructions outside of the loop (safe).
363 // * One value may have several outside users, but all outside
364 // uses must be of the same value.
365 // - By store instructions with a loop invariant address (safe with
366 // the following restrictions):
367 // * If there are several stores, all must have the same address.
368 // * Final value should be stored in that loop invariant address.
369 // - By an instruction that is not part of the reduction (not safe).
370 // This is either:
371 // * An instruction type other than PHI or the reduction operation.
372 // * A PHI in the header other than the initial PHI.
373 while (!Worklist.empty()) {
374 Instruction *Cur = Worklist.pop_back_val();
375
376 // Store instructions are allowed iff it is the store of the reduction
377 // value to the same loop invariant memory location.
378 if (auto *SI = dyn_cast<StoreInst>(Cur)) {
379 if (!SE) {
380 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
381 << "Scalar Evolution Analysis\n");
382 return false;
383 }
384
385 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
386 // Check it is the same address as previous stores
387 if (IntermediateStore) {
388 const SCEV *OtherScev =
389 SE->getSCEV(IntermediateStore->getPointerOperand());
390
391 if (OtherScev != PtrScev) {
392 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
393 << "inside the loop: " << *SI->getPointerOperand()
394 << " and "
395 << *IntermediateStore->getPointerOperand() << '\n');
396 return false;
397 }
398 }
399
400 // Check the pointer is loop invariant
401 if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
402 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
403 << "inside the loop: " << *SI->getPointerOperand()
404 << '\n');
405 return false;
406 }
407
408 // IntermediateStore is always the last store in the loop.
410 continue;
411 }
412
413 // No Users.
414 // If the instruction has no users then this is a broken chain and can't be
415 // a reduction variable.
416 if (Cur->use_empty())
417 return false;
418
419 bool IsAPhi = isa<PHINode>(Cur);
420
421 // A header PHI use other than the original PHI.
422 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
423 return false;
424
425 // Reductions of instructions such as Div, and Sub is only possible if the
426 // LHS is the reduction variable.
427 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
428 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
429 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
430 return false;
431
432 // Any reduction instruction must be of one of the allowed kinds. We ignore
433 // the starting value (the Phi or an AND instruction if the Phi has been
434 // type-promoted).
435 if (Cur != Start) {
436 ReduxDesc =
437 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
438 ExactFPMathInst = ExactFPMathInst == nullptr
439 ? ReduxDesc.getExactFPMathInst()
440 : ExactFPMathInst;
441 if (!ReduxDesc.isRecurrence())
442 return false;
443 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
444 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
445 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
446 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {
447 // Accept FMF on either fcmp or select of a min/max idiom.
448 // TODO: This is a hack to work-around the fact that FMF may not be
449 // assigned/propagated correctly. If that problem is fixed or we
450 // standardize on fmin/fmax via intrinsics, this can be removed.
451 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
452 CurFMF |= FCmp->getFastMathFlags();
453 }
454 FMF &= CurFMF;
455 }
456 // Update this reduction kind if we matched a new instruction.
457 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
458 // state accurate while processing the worklist?
459 if (ReduxDesc.getRecKind() != RecurKind::None)
460 Kind = ReduxDesc.getRecKind();
461 }
462
463 bool IsASelect = isa<SelectInst>(Cur);
464
465 // A conditional reduction operation must only have 2 or less uses in
466 // VisitedInsts.
467 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
468 hasMultipleUsesOf(Cur, VisitedInsts, 2))
469 return false;
470
471 // A reduction operation must only have one use of the reduction value.
472 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
473 !isAnyOfRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1))
474 return false;
475
476 // All inputs to a PHI node must be a reduction value.
477 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
478 return false;
479
480 if (isIntMinMaxRecurrenceKind(Kind) && (isa<ICmpInst>(Cur) || IsASelect))
481 ++NumCmpSelectPatternInst;
482 if (isFPMinMaxRecurrenceKind(Kind) && (isa<FCmpInst>(Cur) || IsASelect))
483 ++NumCmpSelectPatternInst;
484 if (isAnyOfRecurrenceKind(Kind) && IsASelect)
485 ++NumCmpSelectPatternInst;
486
487 // Check whether we found a reduction operator.
488 FoundReduxOp |= !IsAPhi && Cur != Start;
489
490 // Process users of current instruction. Push non-PHI nodes after PHI nodes
491 // onto the stack. This way we are going to have seen all inputs to PHI
492 // nodes once we get to them.
495 for (User *U : Cur->users()) {
497
498 // If the user is a call to llvm.fmuladd then the instruction can only be
499 // the final operand.
500 if (isFMulAddIntrinsic(UI))
501 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
502 return false;
503
504 // Check if we found the exit user.
505 BasicBlock *Parent = UI->getParent();
506 if (!TheLoop->contains(Parent)) {
507 // If we already know this instruction is used externally, move on to
508 // the next user.
509 if (ExitInstruction == Cur)
510 continue;
511
512 // Exit if you find multiple values used outside or if the header phi
513 // node is being used. In this case the user uses the value of the
514 // previous iteration, in which case we would loose "VF-1" iterations of
515 // the reduction operation if we vectorize.
516 if (ExitInstruction != nullptr || Cur == Phi)
517 return false;
518
519 // The instruction used by an outside user must be the last instruction
520 // before we feed back to the reduction phi. Otherwise, we loose VF-1
521 // operations on the value.
522 if (!is_contained(Phi->operands(), Cur))
523 return false;
524
525 ExitInstruction = Cur;
526 continue;
527 }
528
529 // Process instructions only once (termination). Each reduction cycle
530 // value must only be used once, except by phi nodes and min/max
531 // reductions which are represented as a cmp followed by a select.
532 InstDesc IgnoredVal(false, nullptr);
533 if (VisitedInsts.insert(UI).second) {
534 if (isa<PHINode>(UI)) {
535 PHIs.push_back(UI);
536 } else {
538 if (SI && SI->getPointerOperand() == Cur) {
539 // Reduction variable chain can only be stored somewhere but it
540 // can't be used as an address.
541 return false;
542 }
543 NonPHIs.push_back(UI);
544 }
545 } else if (!isa<PHINode>(UI) &&
546 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
547 !isa<SelectInst>(UI)) ||
548 (!isConditionalRdxPattern(UI).isRecurrence() &&
549 !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)
550 .isRecurrence() &&
551 !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
552 return false;
553
554 // Remember that we completed the cycle.
555 if (UI == Phi)
556 FoundStartPHI = true;
557 }
558 Worklist.append(PHIs.begin(), PHIs.end());
559 Worklist.append(NonPHIs.begin(), NonPHIs.end());
560 }
561
562 // This means we have seen one but not the other instruction of the
563 // pattern or more than just a select and cmp. Zero implies that we saw a
564 // llvm.min/max intrinsic, which is always OK.
565 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
566 NumCmpSelectPatternInst != 0)
567 return false;
568
569 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
570 return false;
571
572 if (IntermediateStore) {
573 // Check that stored value goes to the phi node again. This way we make sure
574 // that the value stored in IntermediateStore is indeed the final reduction
575 // value.
576 if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
577 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
578 << *IntermediateStore << '\n');
579 return false;
580 }
581
582 // If there is an exit instruction it's value should be stored in
583 // IntermediateStore
584 if (ExitInstruction &&
585 IntermediateStore->getValueOperand() != ExitInstruction) {
586 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
587 "store last calculated value of the reduction: "
588 << *IntermediateStore << '\n');
589 return false;
590 }
591
592 // If all uses are inside the loop (intermediate stores), then the
593 // reduction value after the loop will be the one used in the last store.
594 if (!ExitInstruction)
595 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
596 }
597
598 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
599 return false;
600
601 const bool IsOrdered =
602 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
603
604 if (Start != Phi) {
605 // If the starting value is not the same as the phi node, we speculatively
606 // looked through an 'and' instruction when evaluating a potential
607 // arithmetic reduction to determine if it may have been type-promoted.
608 //
609 // We now compute the minimal bit width that is required to represent the
610 // reduction. If this is the same width that was indicated by the 'and', we
611 // can represent the reduction in the smaller type. The 'and' instruction
612 // will be eliminated since it will essentially be a cast instruction that
613 // can be ignore in the cost model. If we compute a different type than we
614 // did when evaluating the 'and', the 'and' will not be eliminated, and we
615 // will end up with different kinds of operations in the recurrence
616 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
617 // the case.
618 //
619 // The vectorizer relies on InstCombine to perform the actual
620 // type-shrinking. It does this by inserting instructions to truncate the
621 // exit value of the reduction to the width indicated by RecurrenceType and
622 // then extend this value back to the original width. If IsSigned is false,
623 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
624 // used.
625 //
626 // TODO: We should not rely on InstCombine to rewrite the reduction in the
627 // smaller type. We should just generate a correctly typed expression
628 // to begin with.
629 Type *ComputedType;
630 std::tie(ComputedType, IsSigned) =
631 computeRecurrenceType(ExitInstruction, DB, AC, DT);
632 if (ComputedType != RecurrenceType)
633 return false;
634 }
635
636 // Collect cast instructions and the minimum width used by the recurrence.
637 // If the starting value is not the same as the phi node and the computed
638 // recurrence type is equal to the recurrence type, the recurrence expression
639 // will be represented in a narrower or wider type. If there are any cast
640 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
641 // Note that the 'and' instruction was already included in this list.
642 //
643 // TODO: A better way to represent this may be to tag in some way all the
644 // instructions that are a part of the reduction. The vectorizer cost
645 // model could then apply the recurrence type to these instructions,
646 // without needing a white list of instructions to ignore.
647 // This may also be useful for the inloop reductions, if it can be
648 // kept simple enough.
649 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
650 MinWidthCastToRecurrenceType);
651
652 // We found a reduction var if we have reached the original phi node and we
653 // only have a single instruction with out-of-loop users.
654
655 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
656 // is saved as part of the RecurrenceDescriptor.
657
658 // Save the description of this reduction variable.
659 RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
660 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
661 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
662 RedDes = RD;
663
664 return true;
665}
666
667// We are looking for loops that do something like this:
668// int r = 0;
669// for (int i = 0; i < n; i++) {
670// if (src[i] > 3)
671// r = 3;
672// }
673// where the reduction value (r) only has two states, in this example 0 or 3.
674// The generated LLVM IR for this type of loop will be like this:
675// for.body:
676// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
677// ...
678// %cmp = icmp sgt i32 %5, 3
679// %spec.select = select i1 %cmp, i32 3, i32 %r
680// ...
681// In general we can support vectorization of loops where 'r' flips between
682// any two non-constants, provided they are loop invariant. The only thing
683// we actually care about at the end of the loop is whether or not any lane
684// in the selected vector is different from the start value. The final
685// across-vector reduction after the loop simply involves choosing the start
686// value if nothing changed (0 in the example above) or the other selected
687// value (3 in the example above).
690 Instruction *I, InstDesc &Prev) {
691 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
692 // the select.
693 if (match(I, m_OneUse(m_Cmp()))) {
694 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
695 return InstDesc(Select, Prev.getRecKind());
696 }
697
698 if (!match(I, m_Select(m_Cmp(), m_Value(), m_Value())))
699 return InstDesc(false, I);
700
702 Value *NonPhi = nullptr;
703
704 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
705 NonPhi = SI->getFalseValue();
706 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
707 NonPhi = SI->getTrueValue();
708 else
709 return InstDesc(false, I);
710
711 // We are looking for selects of the form:
712 // select(cmp(), phi, loop_invariant) or
713 // select(cmp(), loop_invariant, phi)
714 if (!Loop->isLoopInvariant(NonPhi))
715 return InstDesc(false, I);
716
717 return InstDesc(I, RecurKind::AnyOf);
718}
719
720// We are looking for loops that do something like this:
721// int r = 0;
722// for (int i = 0; i < n; i++) {
723// if (src[i] > 3)
724// r = i;
725// }
726// or like this:
727// int r = 0;
728// for (int i = 0; i < n; i++) {
729// if (src[i] > 3)
730// r = <loop-varying value>;
731// }
732// The reduction value (r) is derived from either the values of an induction
733// variable (i) sequence, an arbitrary loop-varying value, or from the start
734// value (0). The LLVM IR generated for such loops would be as follows:
735// for.body:
736// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
737// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
738// ...
739// %cmp = icmp sgt i32 %5, 3
740// %spec.select = select i1 %cmp, i32 %i, i32 %r
741// %inc = add nsw i32 %i, 1
742// ...
743// When searching for an induction variable (i), the reduction value after the
744// loop will be the maximum (increasing induction) or minimum (decreasing
745// induction) value of 'i' that the condition (src[i] > 3) is satisfied, or the
746// start value (0 in the example above). When the start value of the induction
747// variable 'i' is greater than the minimum (increasing induction) or maximum
748// (decreasing induction) value of the data type, we can use the minimum
749// (increasing induction) or maximum (decreasing induction) value of the data
750// type as a sentinel value to replace the start value. This allows us to
751// perform a single reduction max (increasing induction) or min (decreasing
752// induction) operation to obtain the final reduction result.
753// TODO: It is possible to solve the case where the start value is the minimum
754// value of the data type or a non-constant value by using mask and multiple
755// reduction operations.
756//
757// When searching for an arbitrary loop-varying value, the reduction value will
758// either be the initial value (0) if the condition was never met, or the value
759// of the loop-varying value in the most recent loop iteration where the
760// condition was met.
764 // TODO: Support the vectorization of FindLastIV when the reduction phi is
765 // used by more than one select instruction. This vectorization is only
766 // performed when the SCEV of each increasing induction variable used by the
767 // select instructions is identical.
768 if (!OrigPhi->hasOneUse())
769 return InstDesc(false, I);
770
771 // We are looking for selects of the form:
772 // select(cmp(), phi, value) or
773 // select(cmp(), value, phi)
774 // where 'value' must be a loop induction variable
775 // (for FindFirstIV/FindLastIV) or an arbitrary value (for FindLast).
776 // TODO: Match selects with multi-use cmp conditions.
777 Value *NonRdxPhi = nullptr;
778 if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
779 m_Specific(OrigPhi)),
780 m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi),
781 m_Value(NonRdxPhi)))))
782 return InstDesc(false, I);
783
784 // Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
785 // std::nullopt.
786 auto GetFindFirstLastIVRecurKind = [&](Value *V) -> std::optional<RecurKind> {
787 Type *Ty = V->getType();
788 if (!SE.isSCEVable(Ty))
789 return std::nullopt;
790
791 auto *AR = SE.getSCEV(V);
792 const SCEV *Step;
793 if (!match(AR, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step),
794 m_SpecificLoop(TheLoop))))
795 return std::nullopt;
796
797 // We must have a known positive or negative step for FindIV
798 const bool PositiveStep = SE.isKnownPositive(Step);
799 if (!SE.isKnownNonZero(Step))
800 return std::nullopt;
801
802 // Check if the minimum (FindLast) or maximum (FindFirst) value of the
803 // recurrence type can be used as a sentinel value. The maximum acceptable
804 // range for the induction variable, called the valid range will exclude
805 // <sentinel value>, where <sentinel value> is
806 // [Signed|Unsigned]Min(<recurrence type>) for FindLastIV or
807 // [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV.
808 // TODO: This range restriction can be lifted by adding an additional
809 // virtual OR reduction.
810 auto CheckRange = [&](bool IsSigned) {
811 const ConstantRange IVRange =
812 IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR);
813 unsigned NumBits = Ty->getIntegerBitWidth();
815 if (PositiveStep) {
816 Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
817 : APInt::getMinValue(NumBits);
818 } else {
819 Sentinel = IsSigned ? APInt::getSignedMaxValue(NumBits)
820 : APInt::getMaxValue(NumBits);
821 }
823
825 dbgs() << "LV: " << (PositiveStep ? "FindLastIV" : "FindFirstIV")
826 << " valid range is " << ValidRange << ", and the range of "
827 << *AR << " is " << IVRange << "\n");
828
829 // Ensure the induction variable does not wrap around by verifying that
830 // its range is fully contained within the valid range.
831 return ValidRange.contains(IVRange);
832 };
833 if (PositiveStep) {
834 if (CheckRange(true))
836 if (CheckRange(false))
838 return std::nullopt;
839 }
840
841 if (CheckRange(true))
843 if (CheckRange(false))
845 return std::nullopt;
846 };
847
848 if (auto RK = GetFindFirstLastIVRecurKind(NonRdxPhi))
849 return InstDesc(I, *RK);
850
851 // If the recurrence is not specific to an IV, return a generic FindLast.
853}
854
857 const InstDesc &Prev) {
859 "Expected a cmp or select or call instruction");
860 if (!isMinMaxRecurrenceKind(Kind))
861 return InstDesc(false, I);
862
863 // We must handle the select(cmp()) as a single instruction. Advance to the
864 // select.
865 if (match(I, m_OneUse(m_Cmp()))) {
866 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
867 return InstDesc(Select, Prev.getRecKind());
868 }
869
870 // Only match select with single use cmp condition, or a min/max intrinsic.
871 if (!isa<IntrinsicInst>(I) &&
873 return InstDesc(false, I);
874
875 // Look for a min/max pattern.
876 if (match(I, m_UMin(m_Value(), m_Value())))
877 return InstDesc(Kind == RecurKind::UMin, I);
878 if (match(I, m_UMax(m_Value(), m_Value())))
879 return InstDesc(Kind == RecurKind::UMax, I);
880 if (match(I, m_SMax(m_Value(), m_Value())))
881 return InstDesc(Kind == RecurKind::SMax, I);
882 if (match(I, m_SMin(m_Value(), m_Value())))
883 return InstDesc(Kind == RecurKind::SMin, I);
885 return InstDesc(Kind == RecurKind::FMin, I);
887 return InstDesc(Kind == RecurKind::FMax, I);
888 if (match(I, m_FMinNum(m_Value(), m_Value())))
889 return InstDesc(Kind == RecurKind::FMin, I);
890 if (match(I, m_FMaxNum(m_Value(), m_Value())))
891 return InstDesc(Kind == RecurKind::FMax, I);
892 if (match(I, m_FMinimumNum(m_Value(), m_Value())))
893 return InstDesc(Kind == RecurKind::FMinimumNum, I);
894 if (match(I, m_FMaximumNum(m_Value(), m_Value())))
895 return InstDesc(Kind == RecurKind::FMaximumNum, I);
896 if (match(I, m_FMinimum(m_Value(), m_Value())))
897 return InstDesc(Kind == RecurKind::FMinimum, I);
898 if (match(I, m_FMaximum(m_Value(), m_Value())))
899 return InstDesc(Kind == RecurKind::FMaximum, I);
900
901 return InstDesc(false, I);
902}
903
904/// Returns true if the select instruction has users in the compare-and-add
905/// reduction pattern below. The select instruction argument is the last one
906/// in the sequence.
907///
908/// %sum.1 = phi ...
909/// ...
910/// %cmp = fcmp pred %0, %CFP
911/// %add = fadd %0, %sum.1
912/// %sum.2 = select %cmp, %add, %sum.1
915 Value *TrueVal, *FalseVal;
916 // Only handle single use cases for now.
917 if (!match(I,
918 m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal))))
919 return InstDesc(false, I);
920
921 // Handle only when either of operands of select instruction is a PHI
922 // node for now.
923 if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
924 (!isa<PHINode>(TrueVal) && !isa<PHINode>(FalseVal)))
925 return InstDesc(false, I);
926
927 Instruction *I1 = isa<PHINode>(TrueVal) ? dyn_cast<Instruction>(FalseVal)
928 : dyn_cast<Instruction>(TrueVal);
929 if (!I1 || !I1->isBinaryOp())
930 return InstDesc(false, I);
931
932 Value *Op1, *Op2;
933 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
934 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
935 I1->isFast()) ||
936 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
937 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
938 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
939 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
940 return InstDesc(false, I);
941
944 if (!IPhi || IPhi != FalseVal)
945 return InstDesc(false, I);
946
947 return InstDesc(true, I);
948}
949
951 Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
952 FastMathFlags FuncFMF, ScalarEvolution *SE) {
953 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
954 switch (I->getOpcode()) {
955 default:
956 return InstDesc(false, I);
957 case Instruction::PHI:
958 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
959 case Instruction::Sub:
960 return InstDesc(
961 Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs, I);
962 case Instruction::Add:
963 return InstDesc(
964 Kind == RecurKind::Add || Kind == RecurKind::AddChainWithSubs, I);
965 case Instruction::Mul:
966 return InstDesc(Kind == RecurKind::Mul, I);
967 case Instruction::And:
968 return InstDesc(Kind == RecurKind::And, I);
969 case Instruction::Or:
970 return InstDesc(Kind == RecurKind::Or, I);
971 case Instruction::Xor:
972 return InstDesc(Kind == RecurKind::Xor, I);
973 case Instruction::FDiv:
974 case Instruction::FMul:
975 return InstDesc(Kind == RecurKind::FMul, I,
976 I->hasAllowReassoc() ? nullptr : I);
977 case Instruction::FSub:
978 case Instruction::FAdd:
979 return InstDesc(Kind == RecurKind::FAdd, I,
980 I->hasAllowReassoc() ? nullptr : I);
981 case Instruction::Select:
982 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
983 Kind == RecurKind::Add || Kind == RecurKind::Mul ||
986 if (isFindRecurrenceKind(Kind) && SE)
987 return isFindPattern(L, OrigPhi, I, *SE);
988 [[fallthrough]];
989 case Instruction::FCmp:
990 case Instruction::ICmp:
991 case Instruction::Call:
992 if (isAnyOfRecurrenceKind(Kind))
993 return isAnyOfPattern(L, OrigPhi, I, Prev);
994 auto HasRequiredFMF = [&]() {
995 if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
996 return true;
997 if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
998 return true;
999 // minimum/minnum and maximum/maxnum intrinsics do not require nsz and nnan
1000 // flags since NaN and signed zeroes are propagated in the intrinsic
1001 // implementation.
1004 match(I,
1007 };
1008 if (isIntMinMaxRecurrenceKind(Kind))
1009 return isMinMaxPattern(I, Kind, Prev);
1010 if (isFPMinMaxRecurrenceKind(Kind)) {
1011 InstDesc Res = isMinMaxPattern(I, Kind, Prev);
1012 if (!Res.isRecurrence())
1013 return InstDesc(false, I);
1014 if (HasRequiredFMF())
1015 return Res;
1016 // We may be able to vectorize FMax/FMin reductions using maxnum/minnum
1017 // intrinsics with extra checks ensuring the vector loop handles only
1018 // non-NaN inputs.
1020 assert(Kind == RecurKind::FMax &&
1021 "unexpected recurrence kind for maxnum");
1022 return InstDesc(I, RecurKind::FMaxNum);
1023 }
1025 assert(Kind == RecurKind::FMin &&
1026 "unexpected recurrence kind for minnum");
1027 return InstDesc(I, RecurKind::FMinNum);
1028 }
1029 return InstDesc(false, I);
1030 }
1031 if (isFMulAddIntrinsic(I))
1032 return InstDesc(Kind == RecurKind::FMulAdd, I,
1033 I->hasAllowReassoc() ? nullptr : I);
1034 return InstDesc(false, I);
1035 }
1036}
1037
1040 unsigned MaxNumUses) {
1041 unsigned NumUses = 0;
1042 for (const Use &U : I->operands()) {
1043 if (Insts.count(dyn_cast<Instruction>(U)))
1044 ++NumUses;
1045 if (NumUses > MaxNumUses)
1046 return true;
1047 }
1048
1049 return false;
1050}
1051
1053 RecurrenceDescriptor &RedDes,
1055 DominatorTree *DT,
1056 ScalarEvolution *SE) {
1057 BasicBlock *Header = TheLoop->getHeader();
1058 Function &F = *Header->getParent();
1059 FastMathFlags FMF;
1060 FMF.setNoNaNs(
1061 F.getFnAttribute("no-nans-fp-math").getValueAsBool());
1062 FMF.setNoSignedZeros(
1063 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
1064
1065 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
1066 SE)) {
1067 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
1068 return true;
1069 }
1070 if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, FMF, RedDes, DB, AC, DT,
1071 SE)) {
1072 LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n");
1073 return true;
1074 }
1075 if (AddReductionVar(Phi, RecurKind::AddChainWithSubs, TheLoop, FMF, RedDes,
1076 DB, AC, DT, SE)) {
1077 LLVM_DEBUG(dbgs() << "Found a chained ADD-SUB reduction PHI." << *Phi
1078 << "\n");
1079 return true;
1080 }
1081 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
1082 SE)) {
1083 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
1084 return true;
1085 }
1086 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
1087 SE)) {
1088 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
1089 return true;
1090 }
1091 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
1092 SE)) {
1093 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
1094 return true;
1095 }
1096 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
1097 SE)) {
1098 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
1099 return true;
1100 }
1101 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,
1102 SE)) {
1103 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
1104 return true;
1105 }
1106 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,
1107 SE)) {
1108 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
1109 return true;
1110 }
1111 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,
1112 SE)) {
1113 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
1114 return true;
1115 }
1116 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,
1117 SE)) {
1118 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
1119 return true;
1120 }
1121 if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
1122 SE)) {
1123 LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi
1124 << "\n");
1125 return true;
1126 }
1127 if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC,
1128 DT, SE)) {
1129 LLVM_DEBUG(dbgs() << "Found a Find reduction PHI." << *Phi << "\n");
1130 return true;
1131 }
1132 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
1133 SE)) {
1134 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
1135 return true;
1136 }
1137 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1138 SE)) {
1139 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
1140 return true;
1141 }
1142 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,
1143 SE)) {
1144 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
1145 return true;
1146 }
1147 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,
1148 SE)) {
1149 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
1150 return true;
1151 }
1152 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1153 SE)) {
1154 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
1155 return true;
1156 }
1157 if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
1158 SE)) {
1159 LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
1160 return true;
1161 }
1162 if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
1163 SE)) {
1164 LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
1165 return true;
1166 }
1167 if (AddReductionVar(Phi, RecurKind::FMaximumNum, TheLoop, FMF, RedDes, DB, AC,
1168 DT, SE)) {
1169 LLVM_DEBUG(dbgs() << "Found a float MAXIMUMNUM reduction PHI." << *Phi
1170 << "\n");
1171 return true;
1172 }
1173 if (AddReductionVar(Phi, RecurKind::FMinimumNum, TheLoop, FMF, RedDes, DB, AC,
1174 DT, SE)) {
1175 LLVM_DEBUG(dbgs() << "Found a float MINIMUMNUM reduction PHI." << *Phi
1176 << "\n");
1177 return true;
1178 }
1179 // Not a reduction of known type.
1180 return false;
1181}
1182
1184 DominatorTree *DT) {
1185
1186 // Ensure the phi node is in the loop header and has two incoming values.
1187 if (Phi->getParent() != TheLoop->getHeader() ||
1188 Phi->getNumIncomingValues() != 2)
1189 return false;
1190
1191 // Ensure the loop has a preheader and a single latch block. The loop
1192 // vectorizer will need the latch to set up the next iteration of the loop.
1193 auto *Preheader = TheLoop->getLoopPreheader();
1194 auto *Latch = TheLoop->getLoopLatch();
1195 if (!Preheader || !Latch)
1196 return false;
1197
1198 // Ensure the phi node's incoming blocks are the loop preheader and latch.
1199 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
1200 Phi->getBasicBlockIndex(Latch) < 0)
1201 return false;
1202
1203 // Get the previous value. The previous value comes from the latch edge while
1204 // the initial value comes from the preheader edge.
1205 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
1206
1207 // If Previous is a phi in the header, go through incoming values from the
1208 // latch until we find a non-phi value. Use this as the new Previous, all uses
1209 // in the header will be dominated by the original phi, but need to be moved
1210 // after the non-phi previous value.
1212 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
1213 if (PrevPhi->getParent() != Phi->getParent())
1214 return false;
1215 if (!SeenPhis.insert(PrevPhi).second)
1216 return false;
1217 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
1218 }
1219
1220 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
1221 return false;
1222
1223 // Ensure every user of the phi node (recursively) is dominated by the
1224 // previous value. The dominance requirement ensures the loop vectorizer will
1225 // not need to vectorize the initial value prior to the first iteration of the
1226 // loop.
1227 // TODO: Consider extending this sinking to handle memory instructions.
1228
1230 BasicBlock *PhiBB = Phi->getParent();
1232 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
1233 // Cyclic dependence.
1234 if (Previous == SinkCandidate)
1235 return false;
1236
1237 if (!Seen.insert(SinkCandidate).second)
1238 return true;
1239 if (DT->dominates(Previous,
1240 SinkCandidate)) // We already are good w/o sinking.
1241 return true;
1242
1243 if (SinkCandidate->getParent() != PhiBB ||
1244 SinkCandidate->mayHaveSideEffects() ||
1245 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1246 return false;
1247
1248 // If we reach a PHI node that is not dominated by Previous, we reached a
1249 // header PHI. No need for sinking.
1250 if (isa<PHINode>(SinkCandidate))
1251 return true;
1252
1253 // Sink User tentatively and check its users
1254 WorkList.push_back(SinkCandidate);
1255 return true;
1256 };
1257
1258 WorkList.push_back(Phi);
1259 // Try to recursively sink instructions and their users after Previous.
1260 while (!WorkList.empty()) {
1261 Instruction *Current = WorkList.pop_back_val();
1262 for (User *User : Current->users()) {
1263 if (!TryToPushSinkCandidate(cast<Instruction>(User)))
1264 return false;
1265 }
1266 }
1267
1268 return true;
1269}
1270
1272 switch (Kind) {
1273 case RecurKind::Sub:
1274 return Instruction::Sub;
1276 case RecurKind::Add:
1277 return Instruction::Add;
1278 case RecurKind::Mul:
1279 return Instruction::Mul;
1280 case RecurKind::Or:
1281 return Instruction::Or;
1282 case RecurKind::And:
1283 return Instruction::And;
1284 case RecurKind::Xor:
1285 return Instruction::Xor;
1286 case RecurKind::FMul:
1287 return Instruction::FMul;
1288 case RecurKind::FMulAdd:
1289 case RecurKind::FAdd:
1290 return Instruction::FAdd;
1291 case RecurKind::SMax:
1292 case RecurKind::SMin:
1293 case RecurKind::UMax:
1294 case RecurKind::UMin:
1295 return Instruction::ICmp;
1296 case RecurKind::FMax:
1297 case RecurKind::FMin:
1302 return Instruction::FCmp;
1304 case RecurKind::AnyOf:
1309 // TODO: Set AnyOf and FindIV to Instruction::Select once in-loop reductions
1310 // are supported.
1311 default:
1312 llvm_unreachable("Unknown recurrence operation");
1313 }
1314}
1315
1318 SmallVector<Instruction *, 4> ReductionOperations;
1319 const bool IsMinMax = isMinMaxRecurrenceKind(Kind);
1320
1321 // Search down from the Phi to the LoopExitInstr, looking for instructions
1322 // with a single user of the correct type for the reduction.
1323
1324 // Note that we check that the type of the operand is correct for each item in
1325 // the chain, including the last (the loop exit value). This can come up from
1326 // sub, which would otherwise be treated as an add reduction. MinMax also need
1327 // to check for a pair of icmp/select, for which we use getNextInstruction and
1328 // isCorrectOpcode functions to step the right number of instruction, and
1329 // check the icmp/select pair.
1330 // FIXME: We also do not attempt to look through Select's yet, which might
1331 // be part of the reduction chain, or attempt to looks through And's to find a
1332 // smaller bitwidth. Subs are also currently not allowed (which are usually
1333 // treated as part of a add reduction) as they are expected to generally be
1334 // more expensive than out-of-loop reductions, and need to be costed more
1335 // carefully.
1336 unsigned ExpectedUses = 1;
1337 if (IsMinMax)
1338 ExpectedUses = 2;
1339
1340 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1341 for (auto *User : Cur->users()) {
1343 if (isa<PHINode>(UI))
1344 continue;
1345 if (IsMinMax) {
1346 // We are expecting a icmp/select pair, which we go to the next select
1347 // instruction if we can. We already know that Cur has 2 uses.
1348 if (isa<SelectInst>(UI))
1349 return UI;
1350 continue;
1351 }
1352 return UI;
1353 }
1354 return nullptr;
1355 };
1356 auto isCorrectOpcode = [&](Instruction *Cur) {
1357 if (IsMinMax) {
1358 Value *LHS, *RHS;
1360 matchSelectPattern(Cur, LHS, RHS).Flavor);
1361 }
1362 // Recognize a call to the llvm.fmuladd intrinsic.
1363 if (isFMulAddIntrinsic(Cur))
1364 return true;
1365
1366 if (Cur->getOpcode() == Instruction::Sub &&
1368 return true;
1369
1370 return Cur->getOpcode() == getOpcode();
1371 };
1372
1373 // Attempt to look through Phis which are part of the reduction chain
1374 unsigned ExtraPhiUses = 0;
1375 Instruction *RdxInstr = LoopExitInstr;
1376 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
1377 if (ExitPhi->getNumIncomingValues() != 2)
1378 return {};
1379
1380 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
1381 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
1382
1383 Instruction *Chain = nullptr;
1384 if (Inc0 == Phi)
1385 Chain = Inc1;
1386 else if (Inc1 == Phi)
1387 Chain = Inc0;
1388 else
1389 return {};
1390
1391 RdxInstr = Chain;
1392 ExtraPhiUses = 1;
1393 }
1394
1395 // The loop exit instruction we check first (as a quick test) but add last. We
1396 // check the opcode is correct (and dont allow them to be Subs) and that they
1397 // have expected to have the expected number of uses. They will have one use
1398 // from the phi and one from a LCSSA value, no matter the type.
1399 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
1400 return {};
1401
1402 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1403 // for conditional reductions.
1404 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
1405 return {};
1406
1407 Instruction *Cur = getNextInstruction(Phi);
1408
1409 // Each other instruction in the chain should have the expected number of uses
1410 // and be the correct opcode.
1411 while (Cur != RdxInstr) {
1412 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1413 return {};
1414
1415 ReductionOperations.push_back(Cur);
1416 Cur = getNextInstruction(Cur);
1417 }
1418
1419 ReductionOperations.push_back(Cur);
1420 return ReductionOperations;
1421}
1422
1423InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1424 const SCEV *Step, BinaryOperator *BOp,
1426 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
1427 assert(IK != IK_NoInduction && "Not an induction");
1428
1429 // Start value type should match the induction kind and the value
1430 // itself should not be null.
1431 assert(StartValue && "StartValue is null");
1432 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1433 "StartValue is not a pointer for pointer induction");
1434 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1435 "StartValue is not an integer for integer induction");
1436
1437 // Check the Step Value. It should be non-zero integer value.
1438 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1439 "Step value is zero");
1440
1441 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1442 "StepValue is not an integer");
1443
1444 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1445 "StepValue is not FP for FpInduction");
1446 assert((IK != IK_FpInduction ||
1447 (InductionBinOp &&
1448 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1449 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1450 "Binary opcode should be specified for FP induction");
1451
1452 if (Casts)
1453 llvm::append_range(RedundantCasts, *Casts);
1454}
1455
1457 if (isa<SCEVConstant>(Step))
1458 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
1459 return nullptr;
1460}
1461
1463 ScalarEvolution *SE,
1465
1466 // Here we only handle FP induction variables.
1467 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1468
1469 if (TheLoop->getHeader() != Phi->getParent())
1470 return false;
1471
1472 // The loop may have multiple entrances or multiple exits; we can analyze
1473 // this phi if it has a unique entry value and a unique backedge value.
1474 if (Phi->getNumIncomingValues() != 2)
1475 return false;
1476 Value *BEValue = nullptr, *StartValue = nullptr;
1477 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1478 BEValue = Phi->getIncomingValue(0);
1479 StartValue = Phi->getIncomingValue(1);
1480 } else {
1481 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1482 "Unexpected Phi node in the loop");
1483 BEValue = Phi->getIncomingValue(1);
1484 StartValue = Phi->getIncomingValue(0);
1485 }
1486
1488 if (!BOp)
1489 return false;
1490
1491 Value *Addend = nullptr;
1492 if (BOp->getOpcode() == Instruction::FAdd) {
1493 if (BOp->getOperand(0) == Phi)
1494 Addend = BOp->getOperand(1);
1495 else if (BOp->getOperand(1) == Phi)
1496 Addend = BOp->getOperand(0);
1497 } else if (BOp->getOpcode() == Instruction::FSub)
1498 if (BOp->getOperand(0) == Phi)
1499 Addend = BOp->getOperand(1);
1500
1501 if (!Addend)
1502 return false;
1503
1504 // The addend should be loop invariant
1505 if (auto *I = dyn_cast<Instruction>(Addend))
1506 if (TheLoop->contains(I))
1507 return false;
1508
1509 // FP Step has unknown SCEV
1510 const SCEV *Step = SE->getUnknown(Addend);
1511 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1512 return true;
1513}
1514
1515/// This function is called when we suspect that the update-chain of a phi node
1516/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1517/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1518/// predicate P under which the SCEV expression for the phi can be the
1519/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1520/// cast instructions that are involved in the update-chain of this induction.
1521/// A caller that adds the required runtime predicate can be free to drop these
1522/// cast instructions, and compute the phi using \p AR (instead of some scev
1523/// expression with casts).
1524///
1525/// For example, without a predicate the scev expression can take the following
1526/// form:
1527/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1528///
1529/// It corresponds to the following IR sequence:
1530/// %for.body:
1531/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1532/// %casted_phi = "ExtTrunc i64 %x"
1533/// %add = add i64 %casted_phi, %step
1534///
1535/// where %x is given in \p PN,
1536/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1537/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1538/// several forms, for example, such as:
1539/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1540/// or:
1541/// ExtTrunc2: %t = shl %x, m
1542/// %casted_phi = ashr %t, m
1543///
1544/// If we are able to find such sequence, we return the instructions
1545/// we found, namely %casted_phi and the instructions on its use-def chain up
1546/// to the phi (not including the phi).
1548 const SCEVUnknown *PhiScev,
1549 const SCEVAddRecExpr *AR,
1550 SmallVectorImpl<Instruction *> &CastInsts) {
1551
1552 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1553 auto *PN = cast<PHINode>(PhiScev->getValue());
1554 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1555 const Loop *L = AR->getLoop();
1556
1557 // Find any cast instructions that participate in the def-use chain of
1558 // PhiScev in the loop.
1559 // FORNOW/TODO: We currently expect the def-use chain to include only
1560 // two-operand instructions, where one of the operands is an invariant.
1561 // createAddRecFromPHIWithCasts() currently does not support anything more
1562 // involved than that, so we keep the search simple. This can be
1563 // extended/generalized as needed.
1564
1565 auto getDef = [&](const Value *Val) -> Value * {
1566 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1567 if (!BinOp)
1568 return nullptr;
1569 Value *Op0 = BinOp->getOperand(0);
1570 Value *Op1 = BinOp->getOperand(1);
1571 Value *Def = nullptr;
1572 if (L->isLoopInvariant(Op0))
1573 Def = Op1;
1574 else if (L->isLoopInvariant(Op1))
1575 Def = Op0;
1576 return Def;
1577 };
1578
1579 // Look for the instruction that defines the induction via the
1580 // loop backedge.
1581 BasicBlock *Latch = L->getLoopLatch();
1582 if (!Latch)
1583 return false;
1584 Value *Val = PN->getIncomingValueForBlock(Latch);
1585 if (!Val)
1586 return false;
1587
1588 // Follow the def-use chain until the induction phi is reached.
1589 // If on the way we encounter a Value that has the same SCEV Expr as the
1590 // phi node, we can consider the instructions we visit from that point
1591 // as part of the cast-sequence that can be ignored.
1592 bool InCastSequence = false;
1593 auto *Inst = dyn_cast<Instruction>(Val);
1594 while (Val != PN) {
1595 // If we encountered a phi node other than PN, or if we left the loop,
1596 // we bail out.
1597 if (!Inst || !L->contains(Inst)) {
1598 return false;
1599 }
1600 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1601 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1602 InCastSequence = true;
1603 if (InCastSequence) {
1604 // Only the last instruction in the cast sequence is expected to have
1605 // uses outside the induction def-use chain.
1606 if (!CastInsts.empty())
1607 if (!Inst->hasOneUse())
1608 return false;
1609 CastInsts.push_back(Inst);
1610 }
1611 Val = getDef(Val);
1612 if (!Val)
1613 return false;
1614 Inst = dyn_cast<Instruction>(Val);
1615 }
1616
1617 return InCastSequence;
1618}
1619
1622 InductionDescriptor &D, bool Assume) {
1623 Type *PhiTy = Phi->getType();
1624
1625 // Handle integer and pointer inductions variables.
1626 // Now we handle also FP induction but not trying to make a
1627 // recurrent expression from the PHI node in-place.
1628
1629 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1630 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1631 return false;
1632
1633 if (PhiTy->isFloatingPointTy())
1634 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1635
1636 const SCEV *PhiScev = PSE.getSCEV(Phi);
1637 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1638
1639 // We need this expression to be an AddRecExpr.
1640 if (Assume && !AR)
1641 AR = PSE.getAsAddRec(Phi);
1642
1643 if (!AR) {
1644 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1645 return false;
1646 }
1647
1648 // Record any Cast instructions that participate in the induction update
1649 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1650 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1651 // only after enabling Assume with PSCEV, this means we may have encountered
1652 // cast instructions that required adding a runtime check in order to
1653 // guarantee the correctness of the AddRecurrence respresentation of the
1654 // induction.
1655 if (PhiScev != AR && SymbolicPhi) {
1657 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1658 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1659 }
1660
1661 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1662}
1663
1665 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1666 InductionDescriptor &D, const SCEV *Expr,
1667 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1668 Type *PhiTy = Phi->getType();
1669 // isSCEVable returns true for integer and pointer types.
1670 if (!SE->isSCEVable(PhiTy))
1671 return false;
1672
1673 // Check that the PHI is consecutive.
1674 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1675 const SCEV *Step;
1676
1677 // FIXME: We are currently matching the specific loop TheLoop; if it doesn't
1678 // match, we should treat it as a uniform. Unfortunately, we don't currently
1679 // know how to handled uniform PHIs.
1680 if (!match(PhiScev, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step),
1681 m_SpecificLoop(TheLoop)))) {
1682 LLVM_DEBUG(
1683 dbgs() << "LV: PHI is not a poly recurrence for requested loop.\n");
1684 return false;
1685 }
1686
1687 // This function assumes that InductionPhi is called only on Phi nodes
1688 // present inside loop headers. Check for the same, and throw an assert if
1689 // the current Phi is not present inside the loop header.
1690 assert(Phi->getParent() == TheLoop->getHeader() &&
1691 "Invalid Phi node, not present in loop header");
1692
1693 Value *StartValue =
1694 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
1695
1696 BasicBlock *Latch = TheLoop->getLoopLatch();
1697 if (!Latch)
1698 return false;
1699
1700 if (PhiTy->isIntegerTy()) {
1701 BinaryOperator *BOp =
1702 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1703 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1704 CastsToIgnore);
1705 return true;
1706 }
1707
1708 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1709
1710 // This allows induction variables w/non-constant steps.
1711 D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
1712 return true;
1713}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl< Instruction * > &CastInsts)
This function is called when we suspect that the update-chain of a phi node (whose symbolic SCEV expr...
static bool isMinMaxReductionPhiWithUsersOutsideReductionChain(PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes)
Returns true if Phi is a min/max reduction matching Kind where Phi is used outside the reduction chai...
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, Type *RecurrenceType, SmallPtrSetImpl< Instruction * > &Casts, unsigned &MinWidthCastToRecurTy)
Collect cast instructions that can be ignored in the vectorizer's cost model, given a reduction exit ...
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi)
static Instruction * lookThroughAnd(PHINode *Phi, Type *&RT, SmallPtrSetImpl< Instruction * > &Visited, SmallPtrSetImpl< Instruction * > &CI)
Determines if Phi may have been type-promoted.
static std::pair< Type *, bool > computeRecurrenceType(Instruction *Exit, DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT)
Compute the minimal bit width needed to represent a reduction whose exit instruction is given by Exit...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define LLVM_DEBUG(...)
Definition Debug.h:114
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BinaryOps getOpcode() const
Definition InstrTypes.h:374
This is the shared class of boolean and integer constants.
Definition Constants.h:87
This class represents a range of values.
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
bool noSignedZeros() const
Definition FMF.h:67
void setNoNaNs(bool B=true)
Definition FMF.h:78
bool noNaNs() const
Definition FMF.h:65
static FastMathFlags getFast()
Definition FMF.h:50
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static LLVM_ABI bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D)
Returns true if Phi is a floating point induction in the loop L.
InductionDescriptor()=default
Default constructor - creates an invalid induction.
LLVM_ABI ConstantInt * getConstIntStepValue() const
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
This POD struct holds information about a potential recurrence operation.
Instruction * getExactFPMathInst() const
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static LLVM_ABI InstDesc isConditionalRdxPattern(Instruction *I)
Returns a struct describing if the instruction is a Select(FCmp(X, Y), (Z = X op PHINode),...
static LLVM_ABI bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl< Instruction * > &Insts, unsigned MaxNumUses)
Returns true if instruction I has multiple uses in Insts.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
static LLVM_ABI bool areAllUsesIn(Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
Returns true if all uses of the instruction I is within the Set.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static LLVM_ABI InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, FastMathFlags FuncFMF, ScalarEvolution *SE)
Returns a struct describing if the instruction 'I' can be a recurrence variable of type 'Kind' for a ...
static bool isFindRecurrenceKind(RecurKind Kind)
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static LLVM_ABI InstDesc isFindPattern(Loop *TheLoop, PHINode *OrigPhi, Instruction *I, ScalarEvolution &SE)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static LLVM_ABI InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind, const InstDesc &Prev)
Returns a struct describing if the instruction is a llvm.
static LLVM_ABI bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction of type Kind and adds it to the RecurrenceDescriptor.
static LLVM_ABI bool isIntegerRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer kind.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This node represents a polynomial recurrence on the trip count of the specified loop.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getUnknown(Value *V)
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:150
bool use_empty() const
Definition Value.h:346
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaxNum(const Opnd0 &Op0, const Opnd1 &Op1)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > > m_OrdOrUnordFMin(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximum(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximumNum(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimumNum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > > m_OrdOrUnordFMax(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point maximum function.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinNum(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
class_match< const SCEV > m_SCEV()
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
MachineInstr * getDef(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ FindLastIVUMax
FindLast reduction with select(cmp(),x,y) where one of (x,y) is increasing loop induction,...
@ FindFirstIVUMin
FindFirst reduction with select(icmp(),x,y) where one of (x,y) is a decreasing loop induction,...
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ FindLastIVSMax
FindFirst reduction with select(icmp(),x,y) where one of (x,y) is a decreasing loop induction,...
@ Mul
Product of integers.
@ None
Not a recurrence.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y are an integer type, one is the current recur...
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
static bool isMinOrMax(SelectPatternFlavor SPF)
When implementing this min/max pattern as fcmp; select, does the fcmp have to be ordered?