LLVM 22.0.0git
IVDescriptors.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
20#include "llvm/IR/Dominators.h"
23#include "llvm/IR/ValueHandle.h"
24#include "llvm/Support/Debug.h"
26
27using namespace llvm;
28using namespace llvm::PatternMatch;
29using namespace llvm::SCEVPatternMatch;
30
31#define DEBUG_TYPE "iv-descriptors"
32
35 for (const Use &Use : I->operands())
36 if (!Set.count(dyn_cast<Instruction>(Use)))
37 return false;
38 return true;
39}
40
42 switch (Kind) {
43 default:
44 break;
46 case RecurKind::Sub:
47 case RecurKind::Add:
48 case RecurKind::Mul:
49 case RecurKind::Or:
50 case RecurKind::And:
51 case RecurKind::Xor:
52 case RecurKind::SMax:
53 case RecurKind::SMin:
54 case RecurKind::UMax:
55 case RecurKind::UMin:
61 return true;
62 }
63 return false;
64}
65
69
70/// Determines if Phi may have been type-promoted. If Phi has a single user
71/// that ANDs the Phi with a type mask, return the user. RT is updated to
72/// account for the narrower bit width represented by the mask, and the AND
73/// instruction is added to CI.
77 if (!Phi->hasOneUse())
78 return Phi;
79
80 const APInt *M = nullptr;
81 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
82
83 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
84 // with a new integer type of the corresponding bit width.
85 if (match(J, m_And(m_Instruction(I), m_APInt(M)))) {
86 int32_t Bits = (*M + 1).exactLogBase2();
87 if (Bits > 0) {
88 RT = IntegerType::get(Phi->getContext(), Bits);
89 Visited.insert(Phi);
90 CI.insert(J);
91 return J;
92 }
93 }
94 return Phi;
95}
96
97/// Compute the minimal bit width needed to represent a reduction whose exit
98/// instruction is given by Exit.
99static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
100 DemandedBits *DB,
101 AssumptionCache *AC,
102 DominatorTree *DT) {
103 bool IsSigned = false;
104 const DataLayout &DL = Exit->getDataLayout();
105 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
106
107 if (DB) {
108 // Use the demanded bits analysis to determine the bits that are live out
109 // of the exit instruction, rounding up to the nearest power of two. If the
110 // use of demanded bits results in a smaller bit width, we know the value
111 // must be positive (i.e., IsSigned = false), because if this were not the
112 // case, the sign bit would have been demanded.
113 auto Mask = DB->getDemandedBits(Exit);
114 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
115 }
116
117 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
118 // If demanded bits wasn't able to limit the bit width, we can try to use
119 // value tracking instead. This can be the case, for example, if the value
120 // may be negative.
121 auto NumSignBits = ComputeNumSignBits(Exit, DL, AC, nullptr, DT);
122 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
123 MaxBitWidth = NumTypeBits - NumSignBits;
124 KnownBits Bits = computeKnownBits(Exit, DL);
125 if (!Bits.isNonNegative()) {
126 // If the value is not known to be non-negative, we set IsSigned to true,
127 // meaning that we will use sext instructions instead of zext
128 // instructions to restore the original type.
129 IsSigned = true;
130 // Make sure at least one sign bit is included in the result, so it
131 // will get properly sign-extended.
132 ++MaxBitWidth;
133 }
134 }
135 MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
136
137 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
138 IsSigned);
139}
140
141/// Collect cast instructions that can be ignored in the vectorizer's cost
142/// model, given a reduction exit value and the minimal type in which the
143// reduction can be represented. Also search casts to the recurrence type
144// to find the minimum width used by the recurrence.
145static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
146 Type *RecurrenceType,
148 unsigned &MinWidthCastToRecurTy) {
149
152 Worklist.push_back(Exit);
153 MinWidthCastToRecurTy = -1U;
154
155 while (!Worklist.empty()) {
156 Instruction *Val = Worklist.pop_back_val();
157 Visited.insert(Val);
158 if (auto *Cast = dyn_cast<CastInst>(Val)) {
159 if (Cast->getSrcTy() == RecurrenceType) {
160 // If the source type of a cast instruction is equal to the recurrence
161 // type, it will be eliminated, and should be ignored in the vectorizer
162 // cost model.
163 Casts.insert(Cast);
164 continue;
165 }
166 if (Cast->getDestTy() == RecurrenceType) {
167 // The minimum width used by the recurrence is found by checking for
168 // casts on its operands. The minimum width is used by the vectorizer
169 // when finding the widest type for in-loop reductions without any
170 // loads/stores.
171 MinWidthCastToRecurTy = std::min<unsigned>(
172 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
173 continue;
174 }
175 }
176 // Add all operands to the work list if they are loop-varying values that
177 // we haven't yet visited.
178 for (Value *O : cast<User>(Val)->operands())
179 if (auto *I = dyn_cast<Instruction>(O))
180 if (TheLoop->contains(I) && !Visited.count(I))
181 Worklist.push_back(I);
182 }
183}
184
185// Check if a given Phi node can be recognized as an ordered reduction for
186// vectorizing floating point operations without unsafe math.
187static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
188 Instruction *Exit, PHINode *Phi) {
189 // Currently only FAdd and FMulAdd are supported.
190 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
191 return false;
192
193 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
194 return false;
195
196 if (Kind == RecurKind::FMulAdd &&
198 return false;
199
200 // Ensure the exit instruction has only one user other than the reduction PHI
201 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
202 return false;
203
204 // The only pattern accepted is the one in which the reduction PHI
205 // is used as one of the operands of the exit instruction
206 auto *Op0 = Exit->getOperand(0);
207 auto *Op1 = Exit->getOperand(1);
208 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
209 return false;
210 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
211 return false;
212
213 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
214 << ", ExitInst: " << *Exit << "\n");
215
216 return true;
217}
218
219/// Returns true if \p Phi is a min/max reduction matching \p Kind where \p Phi
220/// is used outside the reduction chain. This is common for loops selecting the
221/// index of a minimum/maximum value (argmin/argmax).
223 PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes) {
224 BasicBlock *Latch = TheLoop->getLoopLatch();
225 if (!Latch)
226 return false;
227
228 assert(Phi->getNumIncomingValues() == 2 && "phi must have 2 incoming values");
229 Value *Inc = Phi->getIncomingValueForBlock(Latch);
230 if (Phi->hasOneUse() || !Inc->hasOneUse() ||
232 return false;
233
234 Value *A, *B;
235 bool IsMinMax = [&]() {
236 switch (Kind) {
237 case RecurKind::UMax:
238 return match(Inc, m_UMax(m_Value(A), m_Value(B)));
239 case RecurKind::UMin:
240 return match(Inc, m_UMin(m_Value(A), m_Value(B)));
241 case RecurKind::SMax:
242 return match(Inc, m_SMax(m_Value(A), m_Value(B)));
243 case RecurKind::SMin:
244 return match(Inc, m_SMin(m_Value(A), m_Value(B)));
245 default:
246 llvm_unreachable("all min/max kinds must be handled");
247 }
248 }();
249 if (!IsMinMax)
250 return false;
251
252 if (A == B || (A != Phi && B != Phi))
253 return false;
254
256 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
257 RedDes =
258 RecurrenceDescriptor(RdxStart, /*Exit=*/nullptr, /*Store=*/nullptr, Kind,
259 FastMathFlags(), /*ExactFP=*/nullptr, Phi->getType(),
260 /*Signed=*/false, /*Ordered=*/false, CastInsts,
261 /*MinWidthCastToRecurTy=*/-1U, /*PhiMultiUse=*/true);
262 return true;
263}
264
266 PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
269 if (Phi->getNumIncomingValues() != 2)
270 return false;
271
272 // Reduction variables are only found in the loop header block.
273 if (Phi->getParent() != TheLoop->getHeader())
274 return false;
275
276 // Check for min/max reduction variables that feed other users in the loop.
278 RedDes))
279 return true;
280
281 // Obtain the reduction start value from the value that comes from the loop
282 // preheader.
283 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
284
285 // ExitInstruction is the single value which is used outside the loop.
286 // We only allow for a single reduction value to be used outside the loop.
287 // This includes users of the reduction, variables (which form a cycle
288 // which ends in the phi node).
289 Instruction *ExitInstruction = nullptr;
290
291 // Variable to keep last visited store instruction. By the end of the
292 // algorithm this variable will be either empty or having intermediate
293 // reduction value stored in invariant address.
294 StoreInst *IntermediateStore = nullptr;
295
296 // Indicates that we found a reduction operation in our scan.
297 bool FoundReduxOp = false;
298
299 // We start with the PHI node and scan for all of the users of this
300 // instruction. All users must be instructions that can be used as reduction
301 // variables (such as ADD). We must have a single out-of-block user. The cycle
302 // must include the original PHI.
303 bool FoundStartPHI = false;
304
305 // To recognize min/max patterns formed by a icmp select sequence, we store
306 // the number of instruction we saw from the recognized min/max pattern,
307 // to make sure we only see exactly the two instructions.
308 unsigned NumCmpSelectPatternInst = 0;
309 InstDesc ReduxDesc(false, nullptr);
310
311 // Data used for determining if the recurrence has been type-promoted.
312 Type *RecurrenceType = Phi->getType();
314 unsigned MinWidthCastToRecurrenceType;
315 Instruction *Start = Phi;
316 bool IsSigned = false;
317
320
321 // Return early if the recurrence kind does not match the type of Phi. If the
322 // recurrence kind is arithmetic, we attempt to look through AND operations
323 // resulting from the type promotion performed by InstCombine. Vector
324 // operations are not limited to the legal integer widths, so we may be able
325 // to evaluate the reduction in the narrower width.
326 // Check the scalar type to handle both scalar and vector types.
327 Type *ScalarTy = RecurrenceType->getScalarType();
328 if (ScalarTy->isFloatingPointTy()) {
330 return false;
331 } else if (ScalarTy->isIntegerTy()) {
332 if (!isIntegerRecurrenceKind(Kind))
333 return false;
334 if (!isMinMaxRecurrenceKind(Kind))
335 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
336 } else {
337 // Pointer min/max may exist, but it is not supported as a reduction op.
338 return false;
339 }
340
341 Worklist.push_back(Start);
342 VisitedInsts.insert(Start);
343
344 // Start with all flags set because we will intersect this with the reduction
345 // flags from all the reduction operations.
347
348 // The first instruction in the use-def chain of the Phi node that requires
349 // exact floating point operations.
350 Instruction *ExactFPMathInst = nullptr;
351
352 // A value in the reduction can be used:
353 // - By the reduction:
354 // - Reduction operation:
355 // - One use of reduction value (safe).
356 // - Multiple use of reduction value (not safe).
357 // - PHI:
358 // - All uses of the PHI must be the reduction (safe).
359 // - Otherwise, not safe.
360 // - By instructions outside of the loop (safe).
361 // * One value may have several outside users, but all outside
362 // uses must be of the same value.
363 // - By store instructions with a loop invariant address (safe with
364 // the following restrictions):
365 // * If there are several stores, all must have the same address.
366 // * Final value should be stored in that loop invariant address.
367 // - By an instruction that is not part of the reduction (not safe).
368 // This is either:
369 // * An instruction type other than PHI or the reduction operation.
370 // * A PHI in the header other than the initial PHI.
371 while (!Worklist.empty()) {
372 Instruction *Cur = Worklist.pop_back_val();
373
374 // Store instructions are allowed iff it is the store of the reduction
375 // value to the same loop invariant memory location.
376 if (auto *SI = dyn_cast<StoreInst>(Cur)) {
377 if (!SE) {
378 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
379 << "Scalar Evolution Analysis\n");
380 return false;
381 }
382
383 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
384 // Check it is the same address as previous stores
385 if (IntermediateStore) {
386 const SCEV *OtherScev =
387 SE->getSCEV(IntermediateStore->getPointerOperand());
388
389 if (OtherScev != PtrScev) {
390 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
391 << "inside the loop: " << *SI->getPointerOperand()
392 << " and "
393 << *IntermediateStore->getPointerOperand() << '\n');
394 return false;
395 }
396 }
397
398 // Check the pointer is loop invariant
399 if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
400 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
401 << "inside the loop: " << *SI->getPointerOperand()
402 << '\n');
403 return false;
404 }
405
406 // IntermediateStore is always the last store in the loop.
408 continue;
409 }
410
411 // No Users.
412 // If the instruction has no users then this is a broken chain and can't be
413 // a reduction variable.
414 if (Cur->use_empty())
415 return false;
416
417 bool IsAPhi = isa<PHINode>(Cur);
418
419 // A header PHI use other than the original PHI.
420 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
421 return false;
422
423 // Reductions of instructions such as Div, and Sub is only possible if the
424 // LHS is the reduction variable.
425 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
426 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
427 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
428 return false;
429
430 // Any reduction instruction must be of one of the allowed kinds. We ignore
431 // the starting value (the Phi or an AND instruction if the Phi has been
432 // type-promoted).
433 if (Cur != Start) {
434 ReduxDesc =
435 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
436 ExactFPMathInst = ExactFPMathInst == nullptr
437 ? ReduxDesc.getExactFPMathInst()
438 : ExactFPMathInst;
439 if (!ReduxDesc.isRecurrence())
440 return false;
441 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
442 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
443 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
444 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {
445 // Accept FMF on either fcmp or select of a min/max idiom.
446 // TODO: This is a hack to work-around the fact that FMF may not be
447 // assigned/propagated correctly. If that problem is fixed or we
448 // standardize on fmin/fmax via intrinsics, this can be removed.
449 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
450 CurFMF |= FCmp->getFastMathFlags();
451 }
452 FMF &= CurFMF;
453 }
454 // Update this reduction kind if we matched a new instruction.
455 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
456 // state accurate while processing the worklist?
457 if (ReduxDesc.getRecKind() != RecurKind::None)
458 Kind = ReduxDesc.getRecKind();
459 }
460
461 bool IsASelect = isa<SelectInst>(Cur);
462
463 // A conditional reduction operation must only have 2 or less uses in
464 // VisitedInsts.
465 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
466 hasMultipleUsesOf(Cur, VisitedInsts, 2))
467 return false;
468
469 // A reduction operation must only have one use of the reduction value.
470 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
471 !isAnyOfRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1))
472 return false;
473
474 // All inputs to a PHI node must be a reduction value.
475 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
476 return false;
477
478 if (isIntMinMaxRecurrenceKind(Kind) && (isa<ICmpInst>(Cur) || IsASelect))
479 ++NumCmpSelectPatternInst;
480 if (isFPMinMaxRecurrenceKind(Kind) && (isa<FCmpInst>(Cur) || IsASelect))
481 ++NumCmpSelectPatternInst;
482 if (isAnyOfRecurrenceKind(Kind) && IsASelect)
483 ++NumCmpSelectPatternInst;
484
485 // Check whether we found a reduction operator.
486 FoundReduxOp |= !IsAPhi && Cur != Start;
487
488 // Process users of current instruction. Push non-PHI nodes after PHI nodes
489 // onto the stack. This way we are going to have seen all inputs to PHI
490 // nodes once we get to them.
493 for (User *U : Cur->users()) {
495
496 // If the user is a call to llvm.fmuladd then the instruction can only be
497 // the final operand.
498 if (isFMulAddIntrinsic(UI))
499 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
500 return false;
501
502 // Check if we found the exit user.
503 BasicBlock *Parent = UI->getParent();
504 if (!TheLoop->contains(Parent)) {
505 // If we already know this instruction is used externally, move on to
506 // the next user.
507 if (ExitInstruction == Cur)
508 continue;
509
510 // Exit if you find multiple values used outside or if the header phi
511 // node is being used. In this case the user uses the value of the
512 // previous iteration, in which case we would loose "VF-1" iterations of
513 // the reduction operation if we vectorize.
514 if (ExitInstruction != nullptr || Cur == Phi)
515 return false;
516
517 // The instruction used by an outside user must be the last instruction
518 // before we feed back to the reduction phi. Otherwise, we loose VF-1
519 // operations on the value.
520 if (!is_contained(Phi->operands(), Cur))
521 return false;
522
523 ExitInstruction = Cur;
524 continue;
525 }
526
527 // Process instructions only once (termination). Each reduction cycle
528 // value must only be used once, except by phi nodes and min/max
529 // reductions which are represented as a cmp followed by a select.
530 InstDesc IgnoredVal(false, nullptr);
531 if (VisitedInsts.insert(UI).second) {
532 if (isa<PHINode>(UI)) {
533 PHIs.push_back(UI);
534 } else {
536 if (SI && SI->getPointerOperand() == Cur) {
537 // Reduction variable chain can only be stored somewhere but it
538 // can't be used as an address.
539 return false;
540 }
541 NonPHIs.push_back(UI);
542 }
543 } else if (!isa<PHINode>(UI) &&
544 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
545 !isa<SelectInst>(UI)) ||
546 (!isConditionalRdxPattern(UI).isRecurrence() &&
547 !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)
548 .isRecurrence() &&
549 !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
550 return false;
551
552 // Remember that we completed the cycle.
553 if (UI == Phi)
554 FoundStartPHI = true;
555 }
556 Worklist.append(PHIs.begin(), PHIs.end());
557 Worklist.append(NonPHIs.begin(), NonPHIs.end());
558 }
559
560 // This means we have seen one but not the other instruction of the
561 // pattern or more than just a select and cmp. Zero implies that we saw a
562 // llvm.min/max intrinsic, which is always OK.
563 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
564 NumCmpSelectPatternInst != 0)
565 return false;
566
567 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
568 return false;
569
570 if (IntermediateStore) {
571 // Check that stored value goes to the phi node again. This way we make sure
572 // that the value stored in IntermediateStore is indeed the final reduction
573 // value.
574 if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
575 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
576 << *IntermediateStore << '\n');
577 return false;
578 }
579
580 // If there is an exit instruction it's value should be stored in
581 // IntermediateStore
582 if (ExitInstruction &&
583 IntermediateStore->getValueOperand() != ExitInstruction) {
584 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
585 "store last calculated value of the reduction: "
586 << *IntermediateStore << '\n');
587 return false;
588 }
589
590 // If all uses are inside the loop (intermediate stores), then the
591 // reduction value after the loop will be the one used in the last store.
592 if (!ExitInstruction)
593 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
594 }
595
596 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
597 return false;
598
599 const bool IsOrdered =
600 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
601
602 if (Start != Phi) {
603 // If the starting value is not the same as the phi node, we speculatively
604 // looked through an 'and' instruction when evaluating a potential
605 // arithmetic reduction to determine if it may have been type-promoted.
606 //
607 // We now compute the minimal bit width that is required to represent the
608 // reduction. If this is the same width that was indicated by the 'and', we
609 // can represent the reduction in the smaller type. The 'and' instruction
610 // will be eliminated since it will essentially be a cast instruction that
611 // can be ignore in the cost model. If we compute a different type than we
612 // did when evaluating the 'and', the 'and' will not be eliminated, and we
613 // will end up with different kinds of operations in the recurrence
614 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
615 // the case.
616 //
617 // The vectorizer relies on InstCombine to perform the actual
618 // type-shrinking. It does this by inserting instructions to truncate the
619 // exit value of the reduction to the width indicated by RecurrenceType and
620 // then extend this value back to the original width. If IsSigned is false,
621 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
622 // used.
623 //
624 // TODO: We should not rely on InstCombine to rewrite the reduction in the
625 // smaller type. We should just generate a correctly typed expression
626 // to begin with.
627 Type *ComputedType;
628 std::tie(ComputedType, IsSigned) =
629 computeRecurrenceType(ExitInstruction, DB, AC, DT);
630 if (ComputedType != RecurrenceType)
631 return false;
632 }
633
634 // Collect cast instructions and the minimum width used by the recurrence.
635 // If the starting value is not the same as the phi node and the computed
636 // recurrence type is equal to the recurrence type, the recurrence expression
637 // will be represented in a narrower or wider type. If there are any cast
638 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
639 // Note that the 'and' instruction was already included in this list.
640 //
641 // TODO: A better way to represent this may be to tag in some way all the
642 // instructions that are a part of the reduction. The vectorizer cost
643 // model could then apply the recurrence type to these instructions,
644 // without needing a white list of instructions to ignore.
645 // This may also be useful for the inloop reductions, if it can be
646 // kept simple enough.
647 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
648 MinWidthCastToRecurrenceType);
649
650 // We found a reduction var if we have reached the original phi node and we
651 // only have a single instruction with out-of-loop users.
652
653 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
654 // is saved as part of the RecurrenceDescriptor.
655
656 // Save the description of this reduction variable.
657 RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
658 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
659 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
660 RedDes = RD;
661
662 return true;
663}
664
665// We are looking for loops that do something like this:
666// int r = 0;
667// for (int i = 0; i < n; i++) {
668// if (src[i] > 3)
669// r = 3;
670// }
671// where the reduction value (r) only has two states, in this example 0 or 3.
672// The generated LLVM IR for this type of loop will be like this:
673// for.body:
674// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
675// ...
676// %cmp = icmp sgt i32 %5, 3
677// %spec.select = select i1 %cmp, i32 3, i32 %r
678// ...
679// In general we can support vectorization of loops where 'r' flips between
680// any two non-constants, provided they are loop invariant. The only thing
681// we actually care about at the end of the loop is whether or not any lane
682// in the selected vector is different from the start value. The final
683// across-vector reduction after the loop simply involves choosing the start
684// value if nothing changed (0 in the example above) or the other selected
685// value (3 in the example above).
688 Instruction *I, InstDesc &Prev) {
689 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
690 // the select.
691 if (match(I, m_OneUse(m_Cmp()))) {
692 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
693 return InstDesc(Select, Prev.getRecKind());
694 }
695
696 if (!match(I, m_Select(m_Cmp(), m_Value(), m_Value())))
697 return InstDesc(false, I);
698
700 Value *NonPhi = nullptr;
701
702 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
703 NonPhi = SI->getFalseValue();
704 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
705 NonPhi = SI->getTrueValue();
706 else
707 return InstDesc(false, I);
708
709 // We are looking for selects of the form:
710 // select(cmp(), phi, loop_invariant) or
711 // select(cmp(), loop_invariant, phi)
712 if (!Loop->isLoopInvariant(NonPhi))
713 return InstDesc(false, I);
714
715 return InstDesc(I, RecurKind::AnyOf);
716}
717
718// We are looking for loops that do something like this:
719// int r = 0;
720// for (int i = 0; i < n; i++) {
721// if (src[i] > 3)
722// r = i;
723// }
724// The reduction value (r) is derived from either the values of an induction
725// variable (i) sequence, or from the start value (0). The LLVM IR generated for
726// such loops would be as follows:
727// for.body:
728// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
729// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
730// ...
731// %cmp = icmp sgt i32 %5, 3
732// %spec.select = select i1 %cmp, i32 %i, i32 %r
733// %inc = add nsw i32 %i, 1
734// ...
735// Since 'i' is an induction variable, the reduction value after the loop will
736// be the maximum (increasing induction) or minimum (decreasing induction) value
737// of 'i' that the condition (src[i] > 3) is satisfied, or the start value (0 in
738// the example above). When the start value of the induction variable 'i' is
739// greater than the minimum (increasing induction) or maximum (decreasing
740// induction) value of the data type, we can use the minimum (increasing
741// induction) or maximum (decreasing induction) value of the data type as a
742// sentinel value to replace the start value. This allows us to perform a single
743// reduction max (increasing induction) or min (decreasing induction) operation
744// to obtain the final reduction result.
745// TODO: It is possible to solve the case where the start value is the minimum
746// value of the data type or a non-constant value by using mask and multiple
747// reduction operations.
750 PHINode *OrigPhi, Instruction *I,
751 ScalarEvolution &SE) {
752 // TODO: Support the vectorization of FindLastIV when the reduction phi is
753 // used by more than one select instruction. This vectorization is only
754 // performed when the SCEV of each increasing induction variable used by the
755 // select instructions is identical.
756 if (!OrigPhi->hasOneUse())
757 return InstDesc(false, I);
758
759 // We are looking for selects of the form:
760 // select(cmp(), phi, loop_induction) or
761 // select(cmp(), loop_induction, phi)
762 // TODO: Match selects with multi-use cmp conditions.
763 Value *NonRdxPhi = nullptr;
764 if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
765 m_Specific(OrigPhi)),
766 m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi),
767 m_Value(NonRdxPhi)))))
768 return InstDesc(false, I);
769
770 // Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
771 // std::nullopt.
772 auto GetRecurKind = [&](Value *V) -> std::optional<RecurKind> {
773 Type *Ty = V->getType();
774 if (!SE.isSCEVable(Ty))
775 return std::nullopt;
776
777 auto *AR = SE.getSCEV(V);
778 const SCEV *Step;
779 if (!match(AR, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step),
780 m_SpecificLoop(TheLoop))))
781 return std::nullopt;
782
783 if ((isFindFirstIVRecurrenceKind(Kind) && !SE.isKnownNegative(Step)) ||
784 (isFindLastIVRecurrenceKind(Kind) && !SE.isKnownPositive(Step)))
785 return std::nullopt;
786
787 // Check if the minimum (FindLast) or maximum (FindFirst) value of the
788 // recurrence type can be used as a sentinel value. The maximum acceptable
789 // range for the induction variable, called the valid range will exclude
790 // <sentinel value>, where <sentinel value> is
791 // [Signed|Unsigned]Min(<recurrence type>) for FindLastIV or
792 // [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV.
793 // TODO: This range restriction can be lifted by adding an additional
794 // virtual OR reduction.
795 auto CheckRange = [&](bool IsSigned) {
796 const ConstantRange IVRange =
797 IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR);
798 unsigned NumBits = Ty->getIntegerBitWidth();
799 ConstantRange ValidRange = ConstantRange::getEmpty(NumBits);
800 if (isFindLastIVRecurrenceKind(Kind)) {
801 APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
802 : APInt::getMinValue(NumBits);
804 } else {
805 if (IsSigned)
806 ValidRange =
808 APInt::getSignedMaxValue(NumBits) - 1);
809 else
810 ValidRange = ConstantRange::getNonEmpty(
811 APInt::getMinValue(NumBits), APInt::getMaxValue(NumBits) - 1);
812 }
813
814 LLVM_DEBUG(dbgs() << "LV: "
815 << (isFindLastIVRecurrenceKind(Kind) ? "FindLastIV"
816 : "FindFirstIV")
817 << " valid range is " << ValidRange
818 << ", and the range of " << *AR << " is " << IVRange
819 << "\n");
820
821 // Ensure the induction variable does not wrap around by verifying that
822 // its range is fully contained within the valid range.
823 return ValidRange.contains(IVRange);
824 };
825 if (isFindLastIVRecurrenceKind(Kind)) {
826 if (CheckRange(true))
828 if (CheckRange(false))
830 return std::nullopt;
831 }
833 "Kind must either be a FindLastIV or FindFirstIV");
834
835 if (CheckRange(true))
837 if (CheckRange(false))
839 return std::nullopt;
840 };
841
842 if (auto RK = GetRecurKind(NonRdxPhi))
843 return InstDesc(I, *RK);
844
845 return InstDesc(false, I);
846}
847
850 const InstDesc &Prev) {
852 "Expected a cmp or select or call instruction");
853 if (!isMinMaxRecurrenceKind(Kind))
854 return InstDesc(false, I);
855
856 // We must handle the select(cmp()) as a single instruction. Advance to the
857 // select.
858 if (match(I, m_OneUse(m_Cmp()))) {
859 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
860 return InstDesc(Select, Prev.getRecKind());
861 }
862
863 // Only match select with single use cmp condition, or a min/max intrinsic.
864 if (!isa<IntrinsicInst>(I) &&
866 return InstDesc(false, I);
867
868 // Look for a min/max pattern.
869 if (match(I, m_UMin(m_Value(), m_Value())))
870 return InstDesc(Kind == RecurKind::UMin, I);
871 if (match(I, m_UMax(m_Value(), m_Value())))
872 return InstDesc(Kind == RecurKind::UMax, I);
873 if (match(I, m_SMax(m_Value(), m_Value())))
874 return InstDesc(Kind == RecurKind::SMax, I);
875 if (match(I, m_SMin(m_Value(), m_Value())))
876 return InstDesc(Kind == RecurKind::SMin, I);
878 return InstDesc(Kind == RecurKind::FMin, I);
880 return InstDesc(Kind == RecurKind::FMax, I);
881 if (match(I, m_FMinNum(m_Value(), m_Value())))
882 return InstDesc(Kind == RecurKind::FMin, I);
883 if (match(I, m_FMaxNum(m_Value(), m_Value())))
884 return InstDesc(Kind == RecurKind::FMax, I);
885 if (match(I, m_FMinimumNum(m_Value(), m_Value())))
886 return InstDesc(Kind == RecurKind::FMinimumNum, I);
887 if (match(I, m_FMaximumNum(m_Value(), m_Value())))
888 return InstDesc(Kind == RecurKind::FMaximumNum, I);
889 if (match(I, m_FMinimum(m_Value(), m_Value())))
890 return InstDesc(Kind == RecurKind::FMinimum, I);
891 if (match(I, m_FMaximum(m_Value(), m_Value())))
892 return InstDesc(Kind == RecurKind::FMaximum, I);
893
894 return InstDesc(false, I);
895}
896
897/// Returns true if the select instruction has users in the compare-and-add
898/// reduction pattern below. The select instruction argument is the last one
899/// in the sequence.
900///
901/// %sum.1 = phi ...
902/// ...
903/// %cmp = fcmp pred %0, %CFP
904/// %add = fadd %0, %sum.1
905/// %sum.2 = select %cmp, %add, %sum.1
908 Value *TrueVal, *FalseVal;
909 // Only handle single use cases for now.
910 if (!match(I,
911 m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal))))
912 return InstDesc(false, I);
913
914 // Handle only when either of operands of select instruction is a PHI
915 // node for now.
916 if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
917 (!isa<PHINode>(TrueVal) && !isa<PHINode>(FalseVal)))
918 return InstDesc(false, I);
919
920 Instruction *I1 = isa<PHINode>(TrueVal) ? dyn_cast<Instruction>(FalseVal)
921 : dyn_cast<Instruction>(TrueVal);
922 if (!I1 || !I1->isBinaryOp())
923 return InstDesc(false, I);
924
925 Value *Op1, *Op2;
926 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
927 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
928 I1->isFast()) ||
929 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
930 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
931 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
932 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
933 return InstDesc(false, I);
934
937 if (!IPhi || IPhi != FalseVal)
938 return InstDesc(false, I);
939
940 return InstDesc(true, I);
941}
942
944 Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
945 FastMathFlags FuncFMF, ScalarEvolution *SE) {
946 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
947 switch (I->getOpcode()) {
948 default:
949 return InstDesc(false, I);
950 case Instruction::PHI:
951 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
952 case Instruction::Sub:
953 return InstDesc(
954 Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs, I);
955 case Instruction::Add:
956 return InstDesc(
957 Kind == RecurKind::Add || Kind == RecurKind::AddChainWithSubs, I);
958 case Instruction::Mul:
959 return InstDesc(Kind == RecurKind::Mul, I);
960 case Instruction::And:
961 return InstDesc(Kind == RecurKind::And, I);
962 case Instruction::Or:
963 return InstDesc(Kind == RecurKind::Or, I);
964 case Instruction::Xor:
965 return InstDesc(Kind == RecurKind::Xor, I);
966 case Instruction::FDiv:
967 case Instruction::FMul:
968 return InstDesc(Kind == RecurKind::FMul, I,
969 I->hasAllowReassoc() ? nullptr : I);
970 case Instruction::FSub:
971 case Instruction::FAdd:
972 return InstDesc(Kind == RecurKind::FAdd, I,
973 I->hasAllowReassoc() ? nullptr : I);
974 case Instruction::Select:
975 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
976 Kind == RecurKind::Add || Kind == RecurKind::Mul ||
979 if (isFindIVRecurrenceKind(Kind) && SE)
980 return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
981 [[fallthrough]];
982 case Instruction::FCmp:
983 case Instruction::ICmp:
984 case Instruction::Call:
985 if (isAnyOfRecurrenceKind(Kind))
986 return isAnyOfPattern(L, OrigPhi, I, Prev);
987 auto HasRequiredFMF = [&]() {
988 if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
989 return true;
990 if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
991 return true;
992 // minimum/minnum and maximum/maxnum intrinsics do not require nsz and nnan
993 // flags since NaN and signed zeroes are propagated in the intrinsic
994 // implementation.
997 match(I,
1000 };
1001 if (isIntMinMaxRecurrenceKind(Kind))
1002 return isMinMaxPattern(I, Kind, Prev);
1003 if (isFPMinMaxRecurrenceKind(Kind)) {
1004 InstDesc Res = isMinMaxPattern(I, Kind, Prev);
1005 if (!Res.isRecurrence())
1006 return InstDesc(false, I);
1007 if (HasRequiredFMF())
1008 return Res;
1009 // We may be able to vectorize FMax/FMin reductions using maxnum/minnum
1010 // intrinsics with extra checks ensuring the vector loop handles only
1011 // non-NaN inputs.
1013 assert(Kind == RecurKind::FMax &&
1014 "unexpected recurrence kind for maxnum");
1015 return InstDesc(I, RecurKind::FMaxNum);
1016 }
1018 assert(Kind == RecurKind::FMin &&
1019 "unexpected recurrence kind for minnum");
1020 return InstDesc(I, RecurKind::FMinNum);
1021 }
1022 return InstDesc(false, I);
1023 }
1024 if (isFMulAddIntrinsic(I))
1025 return InstDesc(Kind == RecurKind::FMulAdd, I,
1026 I->hasAllowReassoc() ? nullptr : I);
1027 return InstDesc(false, I);
1028 }
1029}
1030
1033 unsigned MaxNumUses) {
1034 unsigned NumUses = 0;
1035 for (const Use &U : I->operands()) {
1036 if (Insts.count(dyn_cast<Instruction>(U)))
1037 ++NumUses;
1038 if (NumUses > MaxNumUses)
1039 return true;
1040 }
1041
1042 return false;
1043}
1044
1046 RecurrenceDescriptor &RedDes,
1048 DominatorTree *DT,
1049 ScalarEvolution *SE) {
1050 BasicBlock *Header = TheLoop->getHeader();
1051 Function &F = *Header->getParent();
1052 FastMathFlags FMF;
1053 FMF.setNoNaNs(
1054 F.getFnAttribute("no-nans-fp-math").getValueAsBool());
1055 FMF.setNoSignedZeros(
1056 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
1057
1058 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
1059 SE)) {
1060 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
1061 return true;
1062 }
1063 if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, FMF, RedDes, DB, AC, DT,
1064 SE)) {
1065 LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n");
1066 return true;
1067 }
1068 if (AddReductionVar(Phi, RecurKind::AddChainWithSubs, TheLoop, FMF, RedDes,
1069 DB, AC, DT, SE)) {
1070 LLVM_DEBUG(dbgs() << "Found a chained ADD-SUB reduction PHI." << *Phi
1071 << "\n");
1072 return true;
1073 }
1074 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
1075 SE)) {
1076 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
1077 return true;
1078 }
1079 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
1080 SE)) {
1081 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
1082 return true;
1083 }
1084 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
1085 SE)) {
1086 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
1087 return true;
1088 }
1089 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
1090 SE)) {
1091 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
1092 return true;
1093 }
1094 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,
1095 SE)) {
1096 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
1097 return true;
1098 }
1099 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,
1100 SE)) {
1101 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
1102 return true;
1103 }
1104 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,
1105 SE)) {
1106 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
1107 return true;
1108 }
1109 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,
1110 SE)) {
1111 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
1112 return true;
1113 }
1114 if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
1115 SE)) {
1116 LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi
1117 << "\n");
1118 return true;
1119 }
1120 if (AddReductionVar(Phi, RecurKind::FindLastIVSMax, TheLoop, FMF, RedDes, DB,
1121 AC, DT, SE)) {
1122 LLVM_DEBUG(dbgs() << "Found a FindLastIV reduction PHI." << *Phi << "\n");
1123 return true;
1124 }
1125 if (AddReductionVar(Phi, RecurKind::FindFirstIVSMin, TheLoop, FMF, RedDes, DB,
1126 AC, DT, SE)) {
1127 LLVM_DEBUG(dbgs() << "Found a FindFirstIV reduction PHI." << *Phi << "\n");
1128 return true;
1129 }
1130 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
1131 SE)) {
1132 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
1133 return true;
1134 }
1135 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1136 SE)) {
1137 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
1138 return true;
1139 }
1140 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,
1141 SE)) {
1142 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
1143 return true;
1144 }
1145 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,
1146 SE)) {
1147 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
1148 return true;
1149 }
1150 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1151 SE)) {
1152 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
1153 return true;
1154 }
1155 if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
1156 SE)) {
1157 LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
1158 return true;
1159 }
1160 if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
1161 SE)) {
1162 LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
1163 return true;
1164 }
1165 if (AddReductionVar(Phi, RecurKind::FMaximumNum, TheLoop, FMF, RedDes, DB, AC,
1166 DT, SE)) {
1167 LLVM_DEBUG(dbgs() << "Found a float MAXIMUMNUM reduction PHI." << *Phi
1168 << "\n");
1169 return true;
1170 }
1171 if (AddReductionVar(Phi, RecurKind::FMinimumNum, TheLoop, FMF, RedDes, DB, AC,
1172 DT, SE)) {
1173 LLVM_DEBUG(dbgs() << "Found a float MINIMUMNUM reduction PHI." << *Phi
1174 << "\n");
1175 return true;
1176 }
1177
1178 // Not a reduction of known type.
1179 return false;
1180}
1181
1183 DominatorTree *DT) {
1184
1185 // Ensure the phi node is in the loop header and has two incoming values.
1186 if (Phi->getParent() != TheLoop->getHeader() ||
1187 Phi->getNumIncomingValues() != 2)
1188 return false;
1189
1190 // Ensure the loop has a preheader and a single latch block. The loop
1191 // vectorizer will need the latch to set up the next iteration of the loop.
1192 auto *Preheader = TheLoop->getLoopPreheader();
1193 auto *Latch = TheLoop->getLoopLatch();
1194 if (!Preheader || !Latch)
1195 return false;
1196
1197 // Ensure the phi node's incoming blocks are the loop preheader and latch.
1198 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
1199 Phi->getBasicBlockIndex(Latch) < 0)
1200 return false;
1201
1202 // Get the previous value. The previous value comes from the latch edge while
1203 // the initial value comes from the preheader edge.
1204 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
1205
1206 // If Previous is a phi in the header, go through incoming values from the
1207 // latch until we find a non-phi value. Use this as the new Previous, all uses
1208 // in the header will be dominated by the original phi, but need to be moved
1209 // after the non-phi previous value.
1211 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
1212 if (PrevPhi->getParent() != Phi->getParent())
1213 return false;
1214 if (!SeenPhis.insert(PrevPhi).second)
1215 return false;
1216 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
1217 }
1218
1219 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
1220 return false;
1221
1222 // Ensure every user of the phi node (recursively) is dominated by the
1223 // previous value. The dominance requirement ensures the loop vectorizer will
1224 // not need to vectorize the initial value prior to the first iteration of the
1225 // loop.
1226 // TODO: Consider extending this sinking to handle memory instructions.
1227
1229 BasicBlock *PhiBB = Phi->getParent();
1231 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
1232 // Cyclic dependence.
1233 if (Previous == SinkCandidate)
1234 return false;
1235
1236 if (!Seen.insert(SinkCandidate).second)
1237 return true;
1238 if (DT->dominates(Previous,
1239 SinkCandidate)) // We already are good w/o sinking.
1240 return true;
1241
1242 if (SinkCandidate->getParent() != PhiBB ||
1243 SinkCandidate->mayHaveSideEffects() ||
1244 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1245 return false;
1246
1247 // If we reach a PHI node that is not dominated by Previous, we reached a
1248 // header PHI. No need for sinking.
1249 if (isa<PHINode>(SinkCandidate))
1250 return true;
1251
1252 // Sink User tentatively and check its users
1253 WorkList.push_back(SinkCandidate);
1254 return true;
1255 };
1256
1257 WorkList.push_back(Phi);
1258 // Try to recursively sink instructions and their users after Previous.
1259 while (!WorkList.empty()) {
1260 Instruction *Current = WorkList.pop_back_val();
1261 for (User *User : Current->users()) {
1262 if (!TryToPushSinkCandidate(cast<Instruction>(User)))
1263 return false;
1264 }
1265 }
1266
1267 return true;
1268}
1269
1271 switch (Kind) {
1272 case RecurKind::Sub:
1273 return Instruction::Sub;
1275 case RecurKind::Add:
1276 return Instruction::Add;
1277 case RecurKind::Mul:
1278 return Instruction::Mul;
1279 case RecurKind::Or:
1280 return Instruction::Or;
1281 case RecurKind::And:
1282 return Instruction::And;
1283 case RecurKind::Xor:
1284 return Instruction::Xor;
1285 case RecurKind::FMul:
1286 return Instruction::FMul;
1287 case RecurKind::FMulAdd:
1288 case RecurKind::FAdd:
1289 return Instruction::FAdd;
1290 case RecurKind::SMax:
1291 case RecurKind::SMin:
1292 case RecurKind::UMax:
1293 case RecurKind::UMin:
1294 return Instruction::ICmp;
1295 case RecurKind::FMax:
1296 case RecurKind::FMin:
1301 return Instruction::FCmp;
1302 case RecurKind::AnyOf:
1307 // TODO: Set AnyOf and FindIV to Instruction::Select once in-loop reductions
1308 // are supported.
1309 default:
1310 llvm_unreachable("Unknown recurrence operation");
1311 }
1312}
1313
1316 SmallVector<Instruction *, 4> ReductionOperations;
1317 const bool IsMinMax = isMinMaxRecurrenceKind(Kind);
1318
1319 // Search down from the Phi to the LoopExitInstr, looking for instructions
1320 // with a single user of the correct type for the reduction.
1321
1322 // Note that we check that the type of the operand is correct for each item in
1323 // the chain, including the last (the loop exit value). This can come up from
1324 // sub, which would otherwise be treated as an add reduction. MinMax also need
1325 // to check for a pair of icmp/select, for which we use getNextInstruction and
1326 // isCorrectOpcode functions to step the right number of instruction, and
1327 // check the icmp/select pair.
1328 // FIXME: We also do not attempt to look through Select's yet, which might
1329 // be part of the reduction chain, or attempt to looks through And's to find a
1330 // smaller bitwidth. Subs are also currently not allowed (which are usually
1331 // treated as part of a add reduction) as they are expected to generally be
1332 // more expensive than out-of-loop reductions, and need to be costed more
1333 // carefully.
1334 unsigned ExpectedUses = 1;
1335 if (IsMinMax)
1336 ExpectedUses = 2;
1337
1338 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1339 for (auto *User : Cur->users()) {
1341 if (isa<PHINode>(UI))
1342 continue;
1343 if (IsMinMax) {
1344 // We are expecting a icmp/select pair, which we go to the next select
1345 // instruction if we can. We already know that Cur has 2 uses.
1346 if (isa<SelectInst>(UI))
1347 return UI;
1348 continue;
1349 }
1350 return UI;
1351 }
1352 return nullptr;
1353 };
1354 auto isCorrectOpcode = [&](Instruction *Cur) {
1355 if (IsMinMax) {
1356 Value *LHS, *RHS;
1358 matchSelectPattern(Cur, LHS, RHS).Flavor);
1359 }
1360 // Recognize a call to the llvm.fmuladd intrinsic.
1361 if (isFMulAddIntrinsic(Cur))
1362 return true;
1363
1364 if (Cur->getOpcode() == Instruction::Sub &&
1366 return true;
1367
1368 return Cur->getOpcode() == getOpcode();
1369 };
1370
1371 // Attempt to look through Phis which are part of the reduction chain
1372 unsigned ExtraPhiUses = 0;
1373 Instruction *RdxInstr = LoopExitInstr;
1374 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
1375 if (ExitPhi->getNumIncomingValues() != 2)
1376 return {};
1377
1378 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
1379 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
1380
1381 Instruction *Chain = nullptr;
1382 if (Inc0 == Phi)
1383 Chain = Inc1;
1384 else if (Inc1 == Phi)
1385 Chain = Inc0;
1386 else
1387 return {};
1388
1389 RdxInstr = Chain;
1390 ExtraPhiUses = 1;
1391 }
1392
1393 // The loop exit instruction we check first (as a quick test) but add last. We
1394 // check the opcode is correct (and dont allow them to be Subs) and that they
1395 // have expected to have the expected number of uses. They will have one use
1396 // from the phi and one from a LCSSA value, no matter the type.
1397 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
1398 return {};
1399
1400 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1401 // for conditional reductions.
1402 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
1403 return {};
1404
1405 Instruction *Cur = getNextInstruction(Phi);
1406
1407 // Each other instruction in the chain should have the expected number of uses
1408 // and be the correct opcode.
1409 while (Cur != RdxInstr) {
1410 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1411 return {};
1412
1413 ReductionOperations.push_back(Cur);
1414 Cur = getNextInstruction(Cur);
1415 }
1416
1417 ReductionOperations.push_back(Cur);
1418 return ReductionOperations;
1419}
1420
1421InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1422 const SCEV *Step, BinaryOperator *BOp,
1424 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
1425 assert(IK != IK_NoInduction && "Not an induction");
1426
1427 // Start value type should match the induction kind and the value
1428 // itself should not be null.
1429 assert(StartValue && "StartValue is null");
1430 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1431 "StartValue is not a pointer for pointer induction");
1432 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1433 "StartValue is not an integer for integer induction");
1434
1435 // Check the Step Value. It should be non-zero integer value.
1436 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1437 "Step value is zero");
1438
1439 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1440 "StepValue is not an integer");
1441
1442 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1443 "StepValue is not FP for FpInduction");
1444 assert((IK != IK_FpInduction ||
1445 (InductionBinOp &&
1446 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1447 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1448 "Binary opcode should be specified for FP induction");
1449
1450 if (Casts)
1451 llvm::append_range(RedundantCasts, *Casts);
1452}
1453
1455 if (isa<SCEVConstant>(Step))
1456 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
1457 return nullptr;
1458}
1459
1461 ScalarEvolution *SE,
1463
1464 // Here we only handle FP induction variables.
1465 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1466
1467 if (TheLoop->getHeader() != Phi->getParent())
1468 return false;
1469
1470 // The loop may have multiple entrances or multiple exits; we can analyze
1471 // this phi if it has a unique entry value and a unique backedge value.
1472 if (Phi->getNumIncomingValues() != 2)
1473 return false;
1474 Value *BEValue = nullptr, *StartValue = nullptr;
1475 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1476 BEValue = Phi->getIncomingValue(0);
1477 StartValue = Phi->getIncomingValue(1);
1478 } else {
1479 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1480 "Unexpected Phi node in the loop");
1481 BEValue = Phi->getIncomingValue(1);
1482 StartValue = Phi->getIncomingValue(0);
1483 }
1484
1486 if (!BOp)
1487 return false;
1488
1489 Value *Addend = nullptr;
1490 if (BOp->getOpcode() == Instruction::FAdd) {
1491 if (BOp->getOperand(0) == Phi)
1492 Addend = BOp->getOperand(1);
1493 else if (BOp->getOperand(1) == Phi)
1494 Addend = BOp->getOperand(0);
1495 } else if (BOp->getOpcode() == Instruction::FSub)
1496 if (BOp->getOperand(0) == Phi)
1497 Addend = BOp->getOperand(1);
1498
1499 if (!Addend)
1500 return false;
1501
1502 // The addend should be loop invariant
1503 if (auto *I = dyn_cast<Instruction>(Addend))
1504 if (TheLoop->contains(I))
1505 return false;
1506
1507 // FP Step has unknown SCEV
1508 const SCEV *Step = SE->getUnknown(Addend);
1509 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1510 return true;
1511}
1512
1513/// This function is called when we suspect that the update-chain of a phi node
1514/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1515/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1516/// predicate P under which the SCEV expression for the phi can be the
1517/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1518/// cast instructions that are involved in the update-chain of this induction.
1519/// A caller that adds the required runtime predicate can be free to drop these
1520/// cast instructions, and compute the phi using \p AR (instead of some scev
1521/// expression with casts).
1522///
1523/// For example, without a predicate the scev expression can take the following
1524/// form:
1525/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1526///
1527/// It corresponds to the following IR sequence:
1528/// %for.body:
1529/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1530/// %casted_phi = "ExtTrunc i64 %x"
1531/// %add = add i64 %casted_phi, %step
1532///
1533/// where %x is given in \p PN,
1534/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1535/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1536/// several forms, for example, such as:
1537/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1538/// or:
1539/// ExtTrunc2: %t = shl %x, m
1540/// %casted_phi = ashr %t, m
1541///
1542/// If we are able to find such sequence, we return the instructions
1543/// we found, namely %casted_phi and the instructions on its use-def chain up
1544/// to the phi (not including the phi).
1546 const SCEVUnknown *PhiScev,
1547 const SCEVAddRecExpr *AR,
1548 SmallVectorImpl<Instruction *> &CastInsts) {
1549
1550 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1551 auto *PN = cast<PHINode>(PhiScev->getValue());
1552 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1553 const Loop *L = AR->getLoop();
1554
1555 // Find any cast instructions that participate in the def-use chain of
1556 // PhiScev in the loop.
1557 // FORNOW/TODO: We currently expect the def-use chain to include only
1558 // two-operand instructions, where one of the operands is an invariant.
1559 // createAddRecFromPHIWithCasts() currently does not support anything more
1560 // involved than that, so we keep the search simple. This can be
1561 // extended/generalized as needed.
1562
1563 auto getDef = [&](const Value *Val) -> Value * {
1564 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1565 if (!BinOp)
1566 return nullptr;
1567 Value *Op0 = BinOp->getOperand(0);
1568 Value *Op1 = BinOp->getOperand(1);
1569 Value *Def = nullptr;
1570 if (L->isLoopInvariant(Op0))
1571 Def = Op1;
1572 else if (L->isLoopInvariant(Op1))
1573 Def = Op0;
1574 return Def;
1575 };
1576
1577 // Look for the instruction that defines the induction via the
1578 // loop backedge.
1579 BasicBlock *Latch = L->getLoopLatch();
1580 if (!Latch)
1581 return false;
1582 Value *Val = PN->getIncomingValueForBlock(Latch);
1583 if (!Val)
1584 return false;
1585
1586 // Follow the def-use chain until the induction phi is reached.
1587 // If on the way we encounter a Value that has the same SCEV Expr as the
1588 // phi node, we can consider the instructions we visit from that point
1589 // as part of the cast-sequence that can be ignored.
1590 bool InCastSequence = false;
1591 auto *Inst = dyn_cast<Instruction>(Val);
1592 while (Val != PN) {
1593 // If we encountered a phi node other than PN, or if we left the loop,
1594 // we bail out.
1595 if (!Inst || !L->contains(Inst)) {
1596 return false;
1597 }
1598 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1599 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1600 InCastSequence = true;
1601 if (InCastSequence) {
1602 // Only the last instruction in the cast sequence is expected to have
1603 // uses outside the induction def-use chain.
1604 if (!CastInsts.empty())
1605 if (!Inst->hasOneUse())
1606 return false;
1607 CastInsts.push_back(Inst);
1608 }
1609 Val = getDef(Val);
1610 if (!Val)
1611 return false;
1612 Inst = dyn_cast<Instruction>(Val);
1613 }
1614
1615 return InCastSequence;
1616}
1617
1620 InductionDescriptor &D, bool Assume) {
1621 Type *PhiTy = Phi->getType();
1622
1623 // Handle integer and pointer inductions variables.
1624 // Now we handle also FP induction but not trying to make a
1625 // recurrent expression from the PHI node in-place.
1626
1627 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1628 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1629 return false;
1630
1631 if (PhiTy->isFloatingPointTy())
1632 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1633
1634 const SCEV *PhiScev = PSE.getSCEV(Phi);
1635 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1636
1637 // We need this expression to be an AddRecExpr.
1638 if (Assume && !AR)
1639 AR = PSE.getAsAddRec(Phi);
1640
1641 if (!AR) {
1642 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1643 return false;
1644 }
1645
1646 // Record any Cast instructions that participate in the induction update
1647 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1648 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1649 // only after enabling Assume with PSCEV, this means we may have encountered
1650 // cast instructions that required adding a runtime check in order to
1651 // guarantee the correctness of the AddRecurrence respresentation of the
1652 // induction.
1653 if (PhiScev != AR && SymbolicPhi) {
1655 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1656 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1657 }
1658
1659 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1660}
1661
1663 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1664 InductionDescriptor &D, const SCEV *Expr,
1665 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1666 Type *PhiTy = Phi->getType();
1667 // isSCEVable returns true for integer and pointer types.
1668 if (!SE->isSCEVable(PhiTy))
1669 return false;
1670
1671 // Check that the PHI is consecutive.
1672 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1673 const SCEV *Step;
1674
1675 // FIXME: We are currently matching the specific loop TheLoop; if it doesn't
1676 // match, we should treat it as a uniform. Unfortunately, we don't currently
1677 // know how to handled uniform PHIs.
1678 if (!match(PhiScev, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step),
1679 m_SpecificLoop(TheLoop)))) {
1680 LLVM_DEBUG(
1681 dbgs() << "LV: PHI is not a poly recurrence for requested loop.\n");
1682 return false;
1683 }
1684
1685 // This function assumes that InductionPhi is called only on Phi nodes
1686 // present inside loop headers. Check for the same, and throw an assert if
1687 // the current Phi is not present inside the loop header.
1688 assert(Phi->getParent() == TheLoop->getHeader() &&
1689 "Invalid Phi node, not present in loop header");
1690
1691 Value *StartValue =
1692 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
1693
1694 BasicBlock *Latch = TheLoop->getLoopLatch();
1695 if (!Latch)
1696 return false;
1697
1698 if (PhiTy->isIntegerTy()) {
1699 BinaryOperator *BOp =
1700 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1701 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1702 CastsToIgnore);
1703 return true;
1704 }
1705
1706 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1707
1708 // This allows induction variables w/non-constant steps.
1709 D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
1710 return true;
1711}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl< Instruction * > &CastInsts)
This function is called when we suspect that the update-chain of a phi node (whose symbolic SCEV expr...
static bool isMinMaxReductionPhiWithUsersOutsideReductionChain(PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes)
Returns true if Phi is a min/max reduction matching Kind where Phi is used outside the reduction chai...
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, Type *RecurrenceType, SmallPtrSetImpl< Instruction * > &Casts, unsigned &MinWidthCastToRecurTy)
Collect cast instructions that can be ignored in the vectorizer's cost model, given a reduction exit ...
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi)
static Instruction * lookThroughAnd(PHINode *Phi, Type *&RT, SmallPtrSetImpl< Instruction * > &Visited, SmallPtrSetImpl< Instruction * > &CI)
Determines if Phi may have been type-promoted.
static std::pair< Type *, bool > computeRecurrenceType(Instruction *Exit, DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT)
Compute the minimal bit width needed to represent a reduction whose exit instruction is given by Exit...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define LLVM_DEBUG(...)
Definition Debug.h:114
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BinaryOps getOpcode() const
Definition InstrTypes.h:374
This is the shared class of boolean and integer constants.
Definition Constants.h:87
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
bool noSignedZeros() const
Definition FMF.h:67
void setNoNaNs(bool B=true)
Definition FMF.h:78
bool noNaNs() const
Definition FMF.h:65
static FastMathFlags getFast()
Definition FMF.h:50
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static LLVM_ABI bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D)
Returns true if Phi is a floating point induction in the loop L.
InductionDescriptor()=default
Default constructor - creates an invalid induction.
LLVM_ABI ConstantInt * getConstIntStepValue() const
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
This POD struct holds information about a potential recurrence operation.
Instruction * getExactFPMathInst() const
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static bool isFindFirstIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static LLVM_ABI InstDesc isConditionalRdxPattern(Instruction *I)
Returns a struct describing if the instruction is a Select(FCmp(X, Y), (Z = X op PHINode),...
static LLVM_ABI bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl< Instruction * > &Insts, unsigned MaxNumUses)
Returns true if instruction I has multiple uses in Insts.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
static LLVM_ABI bool areAllUsesIn(Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
Returns true if all uses of the instruction I is within the Set.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static LLVM_ABI InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, FastMathFlags FuncFMF, ScalarEvolution *SE)
Returns a struct describing if the instruction 'I' can be a recurrence variable of type 'Kind' for a ...
static LLVM_ABI InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop, PHINode *OrigPhi, Instruction *I, ScalarEvolution &SE)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind, const InstDesc &Prev)
Returns a struct describing if the instruction is a llvm.
static LLVM_ABI bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction of type Kind and adds it to the RecurrenceDescriptor.
static LLVM_ABI bool isIntegerRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer kind.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This node represents a polynomial recurrence on the trip count of the specified loop.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getUnknown(Value *V)
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:150
bool use_empty() const
Definition Value.h:346
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaxNum(const Opnd0 &Op0, const Opnd1 &Op1)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > > m_OrdOrUnordFMin(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximum(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximumNum(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimumNum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > > m_OrdOrUnordFMax(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point maximum function.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinNum(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
class_match< const SCEV > m_SCEV()
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
MachineInstr * getDef(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ FindLastIVUMax
FindLast reduction with select(cmp(),x,y) where one of (x,y) is increasing loop induction,...
@ FindFirstIVUMin
FindFirst reduction with select(icmp(),x,y) where one of (x,y) is a decreasing loop induction,...
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ FindLastIVSMax
FindFirst reduction with select(icmp(),x,y) where one of (x,y) is a decreasing loop induction,...
@ Mul
Product of integers.
@ None
Not a recurrence.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
static bool isMinOrMax(SelectPatternFlavor SPF)
When implementing this min/max pattern as fcmp; select, does the fcmp have to be ordered?