LLVM 22.0.0git
IVDescriptors.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
19#include "llvm/IR/Dominators.h"
22#include "llvm/IR/ValueHandle.h"
23#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27using namespace llvm::PatternMatch;
28
29#define DEBUG_TYPE "iv-descriptors"
30
33 for (const Use &Use : I->operands())
34 if (!Set.count(dyn_cast<Instruction>(Use)))
35 return false;
36 return true;
37}
38
40 switch (Kind) {
41 default:
42 break;
44 case RecurKind::Sub:
45 case RecurKind::Add:
46 case RecurKind::Mul:
47 case RecurKind::Or:
48 case RecurKind::And:
49 case RecurKind::Xor:
50 case RecurKind::SMax:
51 case RecurKind::SMin:
52 case RecurKind::UMax:
53 case RecurKind::UMin:
59 return true;
60 }
61 return false;
62}
63
67
68/// Determines if Phi may have been type-promoted. If Phi has a single user
69/// that ANDs the Phi with a type mask, return the user. RT is updated to
70/// account for the narrower bit width represented by the mask, and the AND
71/// instruction is added to CI.
75 if (!Phi->hasOneUse())
76 return Phi;
77
78 const APInt *M = nullptr;
79 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
80
81 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
82 // with a new integer type of the corresponding bit width.
83 if (match(J, m_And(m_Instruction(I), m_APInt(M)))) {
84 int32_t Bits = (*M + 1).exactLogBase2();
85 if (Bits > 0) {
86 RT = IntegerType::get(Phi->getContext(), Bits);
87 Visited.insert(Phi);
88 CI.insert(J);
89 return J;
90 }
91 }
92 return Phi;
93}
94
95/// Compute the minimal bit width needed to represent a reduction whose exit
96/// instruction is given by Exit.
97static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
98 DemandedBits *DB,
100 DominatorTree *DT) {
101 bool IsSigned = false;
102 const DataLayout &DL = Exit->getDataLayout();
103 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
104
105 if (DB) {
106 // Use the demanded bits analysis to determine the bits that are live out
107 // of the exit instruction, rounding up to the nearest power of two. If the
108 // use of demanded bits results in a smaller bit width, we know the value
109 // must be positive (i.e., IsSigned = false), because if this were not the
110 // case, the sign bit would have been demanded.
111 auto Mask = DB->getDemandedBits(Exit);
112 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
113 }
114
115 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
116 // If demanded bits wasn't able to limit the bit width, we can try to use
117 // value tracking instead. This can be the case, for example, if the value
118 // may be negative.
119 auto NumSignBits = ComputeNumSignBits(Exit, DL, AC, nullptr, DT);
120 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
121 MaxBitWidth = NumTypeBits - NumSignBits;
122 KnownBits Bits = computeKnownBits(Exit, DL);
123 if (!Bits.isNonNegative()) {
124 // If the value is not known to be non-negative, we set IsSigned to true,
125 // meaning that we will use sext instructions instead of zext
126 // instructions to restore the original type.
127 IsSigned = true;
128 // Make sure at least one sign bit is included in the result, so it
129 // will get properly sign-extended.
130 ++MaxBitWidth;
131 }
132 }
133 MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
134
135 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
136 IsSigned);
137}
138
139/// Collect cast instructions that can be ignored in the vectorizer's cost
140/// model, given a reduction exit value and the minimal type in which the
141// reduction can be represented. Also search casts to the recurrence type
142// to find the minimum width used by the recurrence.
143static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
144 Type *RecurrenceType,
146 unsigned &MinWidthCastToRecurTy) {
147
150 Worklist.push_back(Exit);
151 MinWidthCastToRecurTy = -1U;
152
153 while (!Worklist.empty()) {
154 Instruction *Val = Worklist.pop_back_val();
155 Visited.insert(Val);
156 if (auto *Cast = dyn_cast<CastInst>(Val)) {
157 if (Cast->getSrcTy() == RecurrenceType) {
158 // If the source type of a cast instruction is equal to the recurrence
159 // type, it will be eliminated, and should be ignored in the vectorizer
160 // cost model.
161 Casts.insert(Cast);
162 continue;
163 }
164 if (Cast->getDestTy() == RecurrenceType) {
165 // The minimum width used by the recurrence is found by checking for
166 // casts on its operands. The minimum width is used by the vectorizer
167 // when finding the widest type for in-loop reductions without any
168 // loads/stores.
169 MinWidthCastToRecurTy = std::min<unsigned>(
170 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
171 continue;
172 }
173 }
174 // Add all operands to the work list if they are loop-varying values that
175 // we haven't yet visited.
176 for (Value *O : cast<User>(Val)->operands())
177 if (auto *I = dyn_cast<Instruction>(O))
178 if (TheLoop->contains(I) && !Visited.count(I))
179 Worklist.push_back(I);
180 }
181}
182
183// Check if a given Phi node can be recognized as an ordered reduction for
184// vectorizing floating point operations without unsafe math.
185static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
186 Instruction *Exit, PHINode *Phi) {
187 // Currently only FAdd and FMulAdd are supported.
188 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
189 return false;
190
191 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
192 return false;
193
194 if (Kind == RecurKind::FMulAdd &&
196 return false;
197
198 // Ensure the exit instruction has only one user other than the reduction PHI
199 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
200 return false;
201
202 // The only pattern accepted is the one in which the reduction PHI
203 // is used as one of the operands of the exit instruction
204 auto *Op0 = Exit->getOperand(0);
205 auto *Op1 = Exit->getOperand(1);
206 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
207 return false;
208 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
209 return false;
210
211 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
212 << ", ExitInst: " << *Exit << "\n");
213
214 return true;
215}
216
218 PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
221 if (Phi->getNumIncomingValues() != 2)
222 return false;
223
224 // Reduction variables are only found in the loop header block.
225 if (Phi->getParent() != TheLoop->getHeader())
226 return false;
227
228 // Obtain the reduction start value from the value that comes from the loop
229 // preheader.
230 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
231
232 // ExitInstruction is the single value which is used outside the loop.
233 // We only allow for a single reduction value to be used outside the loop.
234 // This includes users of the reduction, variables (which form a cycle
235 // which ends in the phi node).
236 Instruction *ExitInstruction = nullptr;
237
238 // Variable to keep last visited store instruction. By the end of the
239 // algorithm this variable will be either empty or having intermediate
240 // reduction value stored in invariant address.
241 StoreInst *IntermediateStore = nullptr;
242
243 // Indicates that we found a reduction operation in our scan.
244 bool FoundReduxOp = false;
245
246 // We start with the PHI node and scan for all of the users of this
247 // instruction. All users must be instructions that can be used as reduction
248 // variables (such as ADD). We must have a single out-of-block user. The cycle
249 // must include the original PHI.
250 bool FoundStartPHI = false;
251
252 // To recognize min/max patterns formed by a icmp select sequence, we store
253 // the number of instruction we saw from the recognized min/max pattern,
254 // to make sure we only see exactly the two instructions.
255 unsigned NumCmpSelectPatternInst = 0;
256 InstDesc ReduxDesc(false, nullptr);
257
258 // Data used for determining if the recurrence has been type-promoted.
259 Type *RecurrenceType = Phi->getType();
261 unsigned MinWidthCastToRecurrenceType;
262 Instruction *Start = Phi;
263 bool IsSigned = false;
264
267
268 // Return early if the recurrence kind does not match the type of Phi. If the
269 // recurrence kind is arithmetic, we attempt to look through AND operations
270 // resulting from the type promotion performed by InstCombine. Vector
271 // operations are not limited to the legal integer widths, so we may be able
272 // to evaluate the reduction in the narrower width.
273 if (RecurrenceType->isFloatingPointTy()) {
275 return false;
276 } else if (RecurrenceType->isIntegerTy()) {
277 if (!isIntegerRecurrenceKind(Kind))
278 return false;
279 if (!isMinMaxRecurrenceKind(Kind))
280 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
281 } else {
282 // Pointer min/max may exist, but it is not supported as a reduction op.
283 return false;
284 }
285
286 Worklist.push_back(Start);
287 VisitedInsts.insert(Start);
288
289 // Start with all flags set because we will intersect this with the reduction
290 // flags from all the reduction operations.
292
293 // The first instruction in the use-def chain of the Phi node that requires
294 // exact floating point operations.
295 Instruction *ExactFPMathInst = nullptr;
296
297 // A value in the reduction can be used:
298 // - By the reduction:
299 // - Reduction operation:
300 // - One use of reduction value (safe).
301 // - Multiple use of reduction value (not safe).
302 // - PHI:
303 // - All uses of the PHI must be the reduction (safe).
304 // - Otherwise, not safe.
305 // - By instructions outside of the loop (safe).
306 // * One value may have several outside users, but all outside
307 // uses must be of the same value.
308 // - By store instructions with a loop invariant address (safe with
309 // the following restrictions):
310 // * If there are several stores, all must have the same address.
311 // * Final value should be stored in that loop invariant address.
312 // - By an instruction that is not part of the reduction (not safe).
313 // This is either:
314 // * An instruction type other than PHI or the reduction operation.
315 // * A PHI in the header other than the initial PHI.
316 while (!Worklist.empty()) {
317 Instruction *Cur = Worklist.pop_back_val();
318
319 // Store instructions are allowed iff it is the store of the reduction
320 // value to the same loop invariant memory location.
321 if (auto *SI = dyn_cast<StoreInst>(Cur)) {
322 if (!SE) {
323 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
324 << "Scalar Evolution Analysis\n");
325 return false;
326 }
327
328 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
329 // Check it is the same address as previous stores
330 if (IntermediateStore) {
331 const SCEV *OtherScev =
332 SE->getSCEV(IntermediateStore->getPointerOperand());
333
334 if (OtherScev != PtrScev) {
335 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
336 << "inside the loop: " << *SI->getPointerOperand()
337 << " and "
338 << *IntermediateStore->getPointerOperand() << '\n');
339 return false;
340 }
341 }
342
343 // Check the pointer is loop invariant
344 if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
345 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
346 << "inside the loop: " << *SI->getPointerOperand()
347 << '\n');
348 return false;
349 }
350
351 // IntermediateStore is always the last store in the loop.
353 continue;
354 }
355
356 // No Users.
357 // If the instruction has no users then this is a broken chain and can't be
358 // a reduction variable.
359 if (Cur->use_empty())
360 return false;
361
362 bool IsAPhi = isa<PHINode>(Cur);
363
364 // A header PHI use other than the original PHI.
365 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
366 return false;
367
368 // Reductions of instructions such as Div, and Sub is only possible if the
369 // LHS is the reduction variable.
370 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
371 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
372 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
373 return false;
374
375 // Any reduction instruction must be of one of the allowed kinds. We ignore
376 // the starting value (the Phi or an AND instruction if the Phi has been
377 // type-promoted).
378 if (Cur != Start) {
379 ReduxDesc =
380 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
381 ExactFPMathInst = ExactFPMathInst == nullptr
382 ? ReduxDesc.getExactFPMathInst()
383 : ExactFPMathInst;
384 if (!ReduxDesc.isRecurrence())
385 return false;
386 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
387 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
388 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
389 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {
390 // Accept FMF on either fcmp or select of a min/max idiom.
391 // TODO: This is a hack to work-around the fact that FMF may not be
392 // assigned/propagated correctly. If that problem is fixed or we
393 // standardize on fmin/fmax via intrinsics, this can be removed.
394 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
395 CurFMF |= FCmp->getFastMathFlags();
396 }
397 FMF &= CurFMF;
398 }
399 // Update this reduction kind if we matched a new instruction.
400 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
401 // state accurate while processing the worklist?
402 if (ReduxDesc.getRecKind() != RecurKind::None)
403 Kind = ReduxDesc.getRecKind();
404 }
405
406 bool IsASelect = isa<SelectInst>(Cur);
407
408 // A conditional reduction operation must only have 2 or less uses in
409 // VisitedInsts.
410 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
411 hasMultipleUsesOf(Cur, VisitedInsts, 2))
412 return false;
413
414 // A reduction operation must only have one use of the reduction value.
415 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
416 !isAnyOfRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1))
417 return false;
418
419 // All inputs to a PHI node must be a reduction value.
420 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
421 return false;
422
423 if (isIntMinMaxRecurrenceKind(Kind) && (isa<ICmpInst>(Cur) || IsASelect))
424 ++NumCmpSelectPatternInst;
425 if (isFPMinMaxRecurrenceKind(Kind) && (isa<FCmpInst>(Cur) || IsASelect))
426 ++NumCmpSelectPatternInst;
427 if (isAnyOfRecurrenceKind(Kind) && IsASelect)
428 ++NumCmpSelectPatternInst;
429
430 // Check whether we found a reduction operator.
431 FoundReduxOp |= !IsAPhi && Cur != Start;
432
433 // Process users of current instruction. Push non-PHI nodes after PHI nodes
434 // onto the stack. This way we are going to have seen all inputs to PHI
435 // nodes once we get to them.
438 for (User *U : Cur->users()) {
440
441 // If the user is a call to llvm.fmuladd then the instruction can only be
442 // the final operand.
443 if (isFMulAddIntrinsic(UI))
444 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
445 return false;
446
447 // Check if we found the exit user.
448 BasicBlock *Parent = UI->getParent();
449 if (!TheLoop->contains(Parent)) {
450 // If we already know this instruction is used externally, move on to
451 // the next user.
452 if (ExitInstruction == Cur)
453 continue;
454
455 // Exit if you find multiple values used outside or if the header phi
456 // node is being used. In this case the user uses the value of the
457 // previous iteration, in which case we would loose "VF-1" iterations of
458 // the reduction operation if we vectorize.
459 if (ExitInstruction != nullptr || Cur == Phi)
460 return false;
461
462 // The instruction used by an outside user must be the last instruction
463 // before we feed back to the reduction phi. Otherwise, we loose VF-1
464 // operations on the value.
465 if (!is_contained(Phi->operands(), Cur))
466 return false;
467
468 ExitInstruction = Cur;
469 continue;
470 }
471
472 // Process instructions only once (termination). Each reduction cycle
473 // value must only be used once, except by phi nodes and min/max
474 // reductions which are represented as a cmp followed by a select.
475 InstDesc IgnoredVal(false, nullptr);
476 if (VisitedInsts.insert(UI).second) {
477 if (isa<PHINode>(UI)) {
478 PHIs.push_back(UI);
479 } else {
481 if (SI && SI->getPointerOperand() == Cur) {
482 // Reduction variable chain can only be stored somewhere but it
483 // can't be used as an address.
484 return false;
485 }
486 NonPHIs.push_back(UI);
487 }
488 } else if (!isa<PHINode>(UI) &&
489 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
490 !isa<SelectInst>(UI)) ||
491 (!isConditionalRdxPattern(UI).isRecurrence() &&
492 !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)
493 .isRecurrence() &&
494 !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
495 return false;
496
497 // Remember that we completed the cycle.
498 if (UI == Phi)
499 FoundStartPHI = true;
500 }
501 Worklist.append(PHIs.begin(), PHIs.end());
502 Worklist.append(NonPHIs.begin(), NonPHIs.end());
503 }
504
505 // This means we have seen one but not the other instruction of the
506 // pattern or more than just a select and cmp. Zero implies that we saw a
507 // llvm.min/max intrinsic, which is always OK.
508 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
509 NumCmpSelectPatternInst != 0)
510 return false;
511
512 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
513 return false;
514
515 if (IntermediateStore) {
516 // Check that stored value goes to the phi node again. This way we make sure
517 // that the value stored in IntermediateStore is indeed the final reduction
518 // value.
519 if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
520 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
521 << *IntermediateStore << '\n');
522 return false;
523 }
524
525 // If there is an exit instruction it's value should be stored in
526 // IntermediateStore
527 if (ExitInstruction &&
528 IntermediateStore->getValueOperand() != ExitInstruction) {
529 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
530 "store last calculated value of the reduction: "
531 << *IntermediateStore << '\n');
532 return false;
533 }
534
535 // If all uses are inside the loop (intermediate stores), then the
536 // reduction value after the loop will be the one used in the last store.
537 if (!ExitInstruction)
538 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
539 }
540
541 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
542 return false;
543
544 const bool IsOrdered =
545 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
546
547 if (Start != Phi) {
548 // If the starting value is not the same as the phi node, we speculatively
549 // looked through an 'and' instruction when evaluating a potential
550 // arithmetic reduction to determine if it may have been type-promoted.
551 //
552 // We now compute the minimal bit width that is required to represent the
553 // reduction. If this is the same width that was indicated by the 'and', we
554 // can represent the reduction in the smaller type. The 'and' instruction
555 // will be eliminated since it will essentially be a cast instruction that
556 // can be ignore in the cost model. If we compute a different type than we
557 // did when evaluating the 'and', the 'and' will not be eliminated, and we
558 // will end up with different kinds of operations in the recurrence
559 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
560 // the case.
561 //
562 // The vectorizer relies on InstCombine to perform the actual
563 // type-shrinking. It does this by inserting instructions to truncate the
564 // exit value of the reduction to the width indicated by RecurrenceType and
565 // then extend this value back to the original width. If IsSigned is false,
566 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
567 // used.
568 //
569 // TODO: We should not rely on InstCombine to rewrite the reduction in the
570 // smaller type. We should just generate a correctly typed expression
571 // to begin with.
572 Type *ComputedType;
573 std::tie(ComputedType, IsSigned) =
574 computeRecurrenceType(ExitInstruction, DB, AC, DT);
575 if (ComputedType != RecurrenceType)
576 return false;
577 }
578
579 // Collect cast instructions and the minimum width used by the recurrence.
580 // If the starting value is not the same as the phi node and the computed
581 // recurrence type is equal to the recurrence type, the recurrence expression
582 // will be represented in a narrower or wider type. If there are any cast
583 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
584 // Note that the 'and' instruction was already included in this list.
585 //
586 // TODO: A better way to represent this may be to tag in some way all the
587 // instructions that are a part of the reduction. The vectorizer cost
588 // model could then apply the recurrence type to these instructions,
589 // without needing a white list of instructions to ignore.
590 // This may also be useful for the inloop reductions, if it can be
591 // kept simple enough.
592 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
593 MinWidthCastToRecurrenceType);
594
595 // We found a reduction var if we have reached the original phi node and we
596 // only have a single instruction with out-of-loop users.
597
598 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
599 // is saved as part of the RecurrenceDescriptor.
600
601 // Save the description of this reduction variable.
602 RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
603 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
604 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
605 RedDes = RD;
606
607 return true;
608}
609
610// We are looking for loops that do something like this:
611// int r = 0;
612// for (int i = 0; i < n; i++) {
613// if (src[i] > 3)
614// r = 3;
615// }
616// where the reduction value (r) only has two states, in this example 0 or 3.
617// The generated LLVM IR for this type of loop will be like this:
618// for.body:
619// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
620// ...
621// %cmp = icmp sgt i32 %5, 3
622// %spec.select = select i1 %cmp, i32 3, i32 %r
623// ...
624// In general we can support vectorization of loops where 'r' flips between
625// any two non-constants, provided they are loop invariant. The only thing
626// we actually care about at the end of the loop is whether or not any lane
627// in the selected vector is different from the start value. The final
628// across-vector reduction after the loop simply involves choosing the start
629// value if nothing changed (0 in the example above) or the other selected
630// value (3 in the example above).
633 Instruction *I, InstDesc &Prev) {
634 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
635 // the select.
636 if (match(I, m_OneUse(m_Cmp()))) {
637 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
638 return InstDesc(Select, Prev.getRecKind());
639 }
640
641 if (!match(I, m_Select(m_Cmp(), m_Value(), m_Value())))
642 return InstDesc(false, I);
643
645 Value *NonPhi = nullptr;
646
647 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
648 NonPhi = SI->getFalseValue();
649 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
650 NonPhi = SI->getTrueValue();
651 else
652 return InstDesc(false, I);
653
654 // We are looking for selects of the form:
655 // select(cmp(), phi, loop_invariant) or
656 // select(cmp(), loop_invariant, phi)
657 if (!Loop->isLoopInvariant(NonPhi))
658 return InstDesc(false, I);
659
660 return InstDesc(I, RecurKind::AnyOf);
661}
662
663// We are looking for loops that do something like this:
664// int r = 0;
665// for (int i = 0; i < n; i++) {
666// if (src[i] > 3)
667// r = i;
668// }
669// The reduction value (r) is derived from either the values of an induction
670// variable (i) sequence, or from the start value (0). The LLVM IR generated for
671// such loops would be as follows:
672// for.body:
673// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
674// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
675// ...
676// %cmp = icmp sgt i32 %5, 3
677// %spec.select = select i1 %cmp, i32 %i, i32 %r
678// %inc = add nsw i32 %i, 1
679// ...
680// Since 'i' is an induction variable, the reduction value after the loop will
681// be the maximum (increasing induction) or minimum (decreasing induction) value
682// of 'i' that the condition (src[i] > 3) is satisfied, or the start value (0 in
683// the example above). When the start value of the induction variable 'i' is
684// greater than the minimum (increasing induction) or maximum (decreasing
685// induction) value of the data type, we can use the minimum (increasing
686// induction) or maximum (decreasing induction) value of the data type as a
687// sentinel value to replace the start value. This allows us to perform a single
688// reduction max (increasing induction) or min (decreasing induction) operation
689// to obtain the final reduction result.
690// TODO: It is possible to solve the case where the start value is the minimum
691// value of the data type or a non-constant value by using mask and multiple
692// reduction operations.
695 PHINode *OrigPhi, Instruction *I,
696 ScalarEvolution &SE) {
697 // TODO: Support the vectorization of FindLastIV when the reduction phi is
698 // used by more than one select instruction. This vectorization is only
699 // performed when the SCEV of each increasing induction variable used by the
700 // select instructions is identical.
701 if (!OrigPhi->hasOneUse())
702 return InstDesc(false, I);
703
704 // We are looking for selects of the form:
705 // select(cmp(), phi, loop_induction) or
706 // select(cmp(), loop_induction, phi)
707 // TODO: Match selects with multi-use cmp conditions.
708 Value *NonRdxPhi = nullptr;
709 if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
710 m_Specific(OrigPhi)),
711 m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi),
712 m_Value(NonRdxPhi)))))
713 return InstDesc(false, I);
714
715 // Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
716 // std::nullopt.
717 auto GetRecurKind = [&](Value *V) -> std::optional<RecurKind> {
718 Type *Ty = V->getType();
719 if (!SE.isSCEVable(Ty))
720 return std::nullopt;
721
722 auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V));
723 if (!AR || AR->getLoop() != TheLoop)
724 return std::nullopt;
725
726 const SCEV *Step = AR->getStepRecurrence(SE);
727 if ((isFindFirstIVRecurrenceKind(Kind) && !SE.isKnownNegative(Step)) ||
728 (isFindLastIVRecurrenceKind(Kind) && !SE.isKnownPositive(Step)))
729 return std::nullopt;
730
731 // Check if the minimum (FindLast) or maximum (FindFirst) value of the
732 // recurrence type can be used as a sentinel value. The maximum acceptable
733 // range for the induction variable, called the valid range will exclude
734 // <sentinel value>, where <sentinel value> is
735 // [Signed|Unsigned]Min(<recurrence type>) for FindLastIV or
736 // [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV.
737 // TODO: This range restriction can be lifted by adding an additional
738 // virtual OR reduction.
739 auto CheckRange = [&](bool IsSigned) {
740 const ConstantRange IVRange =
741 IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR);
742 unsigned NumBits = Ty->getIntegerBitWidth();
743 ConstantRange ValidRange = ConstantRange::getEmpty(NumBits);
744 if (isFindLastIVRecurrenceKind(Kind)) {
745 APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
746 : APInt::getMinValue(NumBits);
748 } else {
749 if (IsSigned)
750 ValidRange =
752 APInt::getSignedMaxValue(NumBits) - 1);
753 else
754 ValidRange = ConstantRange::getNonEmpty(
755 APInt::getMinValue(NumBits), APInt::getMaxValue(NumBits) - 1);
756 }
757
758 LLVM_DEBUG(dbgs() << "LV: "
759 << (isFindLastIVRecurrenceKind(Kind) ? "FindLastIV"
760 : "FindFirstIV")
761 << " valid range is " << ValidRange
762 << ", and the range of " << *AR << " is " << IVRange
763 << "\n");
764
765 // Ensure the induction variable does not wrap around by verifying that
766 // its range is fully contained within the valid range.
767 return ValidRange.contains(IVRange);
768 };
769 if (isFindLastIVRecurrenceKind(Kind)) {
770 if (CheckRange(true))
772 if (CheckRange(false))
774 return std::nullopt;
775 }
777 "Kind must either be a FindLastIV or FindFirstIV");
778
779 if (CheckRange(true))
781 if (CheckRange(false))
783 return std::nullopt;
784 };
785
786 if (auto RK = GetRecurKind(NonRdxPhi))
787 return InstDesc(I, *RK);
788
789 return InstDesc(false, I);
790}
791
794 const InstDesc &Prev) {
796 "Expected a cmp or select or call instruction");
797 if (!isMinMaxRecurrenceKind(Kind))
798 return InstDesc(false, I);
799
800 // We must handle the select(cmp()) as a single instruction. Advance to the
801 // select.
802 if (match(I, m_OneUse(m_Cmp()))) {
803 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
804 return InstDesc(Select, Prev.getRecKind());
805 }
806
807 // Only match select with single use cmp condition, or a min/max intrinsic.
808 if (!isa<IntrinsicInst>(I) &&
810 return InstDesc(false, I);
811
812 // Look for a min/max pattern.
813 if (match(I, m_UMin(m_Value(), m_Value())))
814 return InstDesc(Kind == RecurKind::UMin, I);
815 if (match(I, m_UMax(m_Value(), m_Value())))
816 return InstDesc(Kind == RecurKind::UMax, I);
817 if (match(I, m_SMax(m_Value(), m_Value())))
818 return InstDesc(Kind == RecurKind::SMax, I);
819 if (match(I, m_SMin(m_Value(), m_Value())))
820 return InstDesc(Kind == RecurKind::SMin, I);
822 return InstDesc(Kind == RecurKind::FMin, I);
824 return InstDesc(Kind == RecurKind::FMax, I);
825 if (match(I, m_FMinNum(m_Value(), m_Value())))
826 return InstDesc(Kind == RecurKind::FMin, I);
827 if (match(I, m_FMaxNum(m_Value(), m_Value())))
828 return InstDesc(Kind == RecurKind::FMax, I);
829 if (match(I, m_FMinimumNum(m_Value(), m_Value())))
830 return InstDesc(Kind == RecurKind::FMinimumNum, I);
831 if (match(I, m_FMaximumNum(m_Value(), m_Value())))
832 return InstDesc(Kind == RecurKind::FMaximumNum, I);
833 if (match(I, m_FMinimum(m_Value(), m_Value())))
834 return InstDesc(Kind == RecurKind::FMinimum, I);
835 if (match(I, m_FMaximum(m_Value(), m_Value())))
836 return InstDesc(Kind == RecurKind::FMaximum, I);
837
838 return InstDesc(false, I);
839}
840
841/// Returns true if the select instruction has users in the compare-and-add
842/// reduction pattern below. The select instruction argument is the last one
843/// in the sequence.
844///
845/// %sum.1 = phi ...
846/// ...
847/// %cmp = fcmp pred %0, %CFP
848/// %add = fadd %0, %sum.1
849/// %sum.2 = select %cmp, %add, %sum.1
853 if (!SI)
854 return InstDesc(false, I);
855
856 CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
857 // Only handle single use cases for now.
858 if (!CI || !CI->hasOneUse())
859 return InstDesc(false, I);
860
861 Value *TrueVal = SI->getTrueValue();
862 Value *FalseVal = SI->getFalseValue();
863 // Handle only when either of operands of select instruction is a PHI
864 // node for now.
865 if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
866 (!isa<PHINode>(TrueVal) && !isa<PHINode>(FalseVal)))
867 return InstDesc(false, I);
868
869 Instruction *I1 = isa<PHINode>(TrueVal) ? dyn_cast<Instruction>(FalseVal)
870 : dyn_cast<Instruction>(TrueVal);
871 if (!I1 || !I1->isBinaryOp())
872 return InstDesc(false, I);
873
874 Value *Op1, *Op2;
875 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
876 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
877 I1->isFast()) ||
878 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
879 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
880 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
881 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
882 return InstDesc(false, I);
883
886 if (!IPhi || IPhi != FalseVal)
887 return InstDesc(false, I);
888
889 return InstDesc(true, SI);
890}
891
893 Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
894 FastMathFlags FuncFMF, ScalarEvolution *SE) {
895 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
896 switch (I->getOpcode()) {
897 default:
898 return InstDesc(false, I);
899 case Instruction::PHI:
900 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
901 case Instruction::Sub:
902 return InstDesc(
903 Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs, I);
904 case Instruction::Add:
905 return InstDesc(
906 Kind == RecurKind::Add || Kind == RecurKind::AddChainWithSubs, I);
907 case Instruction::Mul:
908 return InstDesc(Kind == RecurKind::Mul, I);
909 case Instruction::And:
910 return InstDesc(Kind == RecurKind::And, I);
911 case Instruction::Or:
912 return InstDesc(Kind == RecurKind::Or, I);
913 case Instruction::Xor:
914 return InstDesc(Kind == RecurKind::Xor, I);
915 case Instruction::FDiv:
916 case Instruction::FMul:
917 return InstDesc(Kind == RecurKind::FMul, I,
918 I->hasAllowReassoc() ? nullptr : I);
919 case Instruction::FSub:
920 case Instruction::FAdd:
921 return InstDesc(Kind == RecurKind::FAdd, I,
922 I->hasAllowReassoc() ? nullptr : I);
923 case Instruction::Select:
924 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
925 Kind == RecurKind::Add || Kind == RecurKind::Mul ||
928 if (isFindIVRecurrenceKind(Kind) && SE)
929 return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
930 [[fallthrough]];
931 case Instruction::FCmp:
932 case Instruction::ICmp:
933 case Instruction::Call:
934 if (isAnyOfRecurrenceKind(Kind))
935 return isAnyOfPattern(L, OrigPhi, I, Prev);
936 auto HasRequiredFMF = [&]() {
937 if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
938 return true;
939 if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
940 return true;
941 // minimum/minnum and maximum/maxnum intrinsics do not require nsz and nnan
942 // flags since NaN and signed zeroes are propagated in the intrinsic
943 // implementation.
946 match(I,
949 };
951 return isMinMaxPattern(I, Kind, Prev);
952 if (isFPMinMaxRecurrenceKind(Kind)) {
953 InstDesc Res = isMinMaxPattern(I, Kind, Prev);
954 if (!Res.isRecurrence())
955 return InstDesc(false, I);
956 if (HasRequiredFMF())
957 return Res;
958 // We may be able to vectorize FMax/FMin reductions using maxnum/minnum
959 // intrinsics with extra checks ensuring the vector loop handles only
960 // non-NaN inputs.
962 assert(Kind == RecurKind::FMax &&
963 "unexpected recurrence kind for maxnum");
965 }
967 assert(Kind == RecurKind::FMin &&
968 "unexpected recurrence kind for minnum");
970 }
971 return InstDesc(false, I);
972 }
974 return InstDesc(Kind == RecurKind::FMulAdd, I,
975 I->hasAllowReassoc() ? nullptr : I);
976 return InstDesc(false, I);
977 }
978}
979
982 unsigned MaxNumUses) {
983 unsigned NumUses = 0;
984 for (const Use &U : I->operands()) {
985 if (Insts.count(dyn_cast<Instruction>(U)))
986 ++NumUses;
987 if (NumUses > MaxNumUses)
988 return true;
989 }
990
991 return false;
992}
993
995 RecurrenceDescriptor &RedDes,
997 DominatorTree *DT,
998 ScalarEvolution *SE) {
999 BasicBlock *Header = TheLoop->getHeader();
1000 Function &F = *Header->getParent();
1001 FastMathFlags FMF;
1002 FMF.setNoNaNs(
1003 F.getFnAttribute("no-nans-fp-math").getValueAsBool());
1004 FMF.setNoSignedZeros(
1005 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
1006
1007 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
1008 SE)) {
1009 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
1010 return true;
1011 }
1012 if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, FMF, RedDes, DB, AC, DT,
1013 SE)) {
1014 LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n");
1015 return true;
1016 }
1017 if (AddReductionVar(Phi, RecurKind::AddChainWithSubs, TheLoop, FMF, RedDes,
1018 DB, AC, DT, SE)) {
1019 LLVM_DEBUG(dbgs() << "Found a chained ADD-SUB reduction PHI." << *Phi
1020 << "\n");
1021 return true;
1022 }
1023 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
1024 SE)) {
1025 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
1026 return true;
1027 }
1028 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
1029 SE)) {
1030 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
1031 return true;
1032 }
1033 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
1034 SE)) {
1035 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
1036 return true;
1037 }
1038 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
1039 SE)) {
1040 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
1041 return true;
1042 }
1043 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,
1044 SE)) {
1045 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
1046 return true;
1047 }
1048 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,
1049 SE)) {
1050 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
1051 return true;
1052 }
1053 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,
1054 SE)) {
1055 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
1056 return true;
1057 }
1058 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,
1059 SE)) {
1060 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
1061 return true;
1062 }
1063 if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
1064 SE)) {
1065 LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi
1066 << "\n");
1067 return true;
1068 }
1069 if (AddReductionVar(Phi, RecurKind::FindLastIVSMax, TheLoop, FMF, RedDes, DB,
1070 AC, DT, SE)) {
1071 LLVM_DEBUG(dbgs() << "Found a FindLastIV reduction PHI." << *Phi << "\n");
1072 return true;
1073 }
1074 if (AddReductionVar(Phi, RecurKind::FindFirstIVSMin, TheLoop, FMF, RedDes, DB,
1075 AC, DT, SE)) {
1076 LLVM_DEBUG(dbgs() << "Found a FindFirstIV reduction PHI." << *Phi << "\n");
1077 return true;
1078 }
1079 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
1080 SE)) {
1081 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
1082 return true;
1083 }
1084 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1085 SE)) {
1086 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
1087 return true;
1088 }
1089 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,
1090 SE)) {
1091 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
1092 return true;
1093 }
1094 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,
1095 SE)) {
1096 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
1097 return true;
1098 }
1099 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1100 SE)) {
1101 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
1102 return true;
1103 }
1104 if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
1105 SE)) {
1106 LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
1107 return true;
1108 }
1109 if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
1110 SE)) {
1111 LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
1112 return true;
1113 }
1114 if (AddReductionVar(Phi, RecurKind::FMaximumNum, TheLoop, FMF, RedDes, DB, AC,
1115 DT, SE)) {
1116 LLVM_DEBUG(dbgs() << "Found a float MAXIMUMNUM reduction PHI." << *Phi
1117 << "\n");
1118 return true;
1119 }
1120 if (AddReductionVar(Phi, RecurKind::FMinimumNum, TheLoop, FMF, RedDes, DB, AC,
1121 DT, SE)) {
1122 LLVM_DEBUG(dbgs() << "Found a float MINIMUMNUM reduction PHI." << *Phi
1123 << "\n");
1124 return true;
1125 }
1126
1127 // Not a reduction of known type.
1128 return false;
1129}
1130
1132 DominatorTree *DT) {
1133
1134 // Ensure the phi node is in the loop header and has two incoming values.
1135 if (Phi->getParent() != TheLoop->getHeader() ||
1136 Phi->getNumIncomingValues() != 2)
1137 return false;
1138
1139 // Ensure the loop has a preheader and a single latch block. The loop
1140 // vectorizer will need the latch to set up the next iteration of the loop.
1141 auto *Preheader = TheLoop->getLoopPreheader();
1142 auto *Latch = TheLoop->getLoopLatch();
1143 if (!Preheader || !Latch)
1144 return false;
1145
1146 // Ensure the phi node's incoming blocks are the loop preheader and latch.
1147 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
1148 Phi->getBasicBlockIndex(Latch) < 0)
1149 return false;
1150
1151 // Get the previous value. The previous value comes from the latch edge while
1152 // the initial value comes from the preheader edge.
1153 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
1154
1155 // If Previous is a phi in the header, go through incoming values from the
1156 // latch until we find a non-phi value. Use this as the new Previous, all uses
1157 // in the header will be dominated by the original phi, but need to be moved
1158 // after the non-phi previous value.
1160 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
1161 if (PrevPhi->getParent() != Phi->getParent())
1162 return false;
1163 if (!SeenPhis.insert(PrevPhi).second)
1164 return false;
1165 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
1166 }
1167
1168 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
1169 return false;
1170
1171 // Ensure every user of the phi node (recursively) is dominated by the
1172 // previous value. The dominance requirement ensures the loop vectorizer will
1173 // not need to vectorize the initial value prior to the first iteration of the
1174 // loop.
1175 // TODO: Consider extending this sinking to handle memory instructions.
1176
1178 BasicBlock *PhiBB = Phi->getParent();
1180 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
1181 // Cyclic dependence.
1182 if (Previous == SinkCandidate)
1183 return false;
1184
1185 if (!Seen.insert(SinkCandidate).second)
1186 return true;
1187 if (DT->dominates(Previous,
1188 SinkCandidate)) // We already are good w/o sinking.
1189 return true;
1190
1191 if (SinkCandidate->getParent() != PhiBB ||
1192 SinkCandidate->mayHaveSideEffects() ||
1193 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1194 return false;
1195
1196 // If we reach a PHI node that is not dominated by Previous, we reached a
1197 // header PHI. No need for sinking.
1198 if (isa<PHINode>(SinkCandidate))
1199 return true;
1200
1201 // Sink User tentatively and check its users
1202 WorkList.push_back(SinkCandidate);
1203 return true;
1204 };
1205
1206 WorkList.push_back(Phi);
1207 // Try to recursively sink instructions and their users after Previous.
1208 while (!WorkList.empty()) {
1209 Instruction *Current = WorkList.pop_back_val();
1210 for (User *User : Current->users()) {
1211 if (!TryToPushSinkCandidate(cast<Instruction>(User)))
1212 return false;
1213 }
1214 }
1215
1216 return true;
1217}
1218
1220 switch (Kind) {
1221 case RecurKind::Sub:
1222 return Instruction::Sub;
1224 case RecurKind::Add:
1225 return Instruction::Add;
1226 case RecurKind::Mul:
1227 return Instruction::Mul;
1228 case RecurKind::AnyOf:
1233 case RecurKind::Or:
1234 return Instruction::Or;
1235 case RecurKind::And:
1236 return Instruction::And;
1237 case RecurKind::Xor:
1238 return Instruction::Xor;
1239 case RecurKind::FMul:
1240 return Instruction::FMul;
1241 case RecurKind::FMulAdd:
1242 case RecurKind::FAdd:
1243 return Instruction::FAdd;
1244 case RecurKind::SMax:
1245 case RecurKind::SMin:
1246 case RecurKind::UMax:
1247 case RecurKind::UMin:
1248 return Instruction::ICmp;
1249 case RecurKind::FMax:
1250 case RecurKind::FMin:
1255 return Instruction::FCmp;
1256 default:
1257 llvm_unreachable("Unknown recurrence operation");
1258 }
1259}
1260
1263 SmallVector<Instruction *, 4> ReductionOperations;
1264 const bool IsMinMax = isMinMaxRecurrenceKind(Kind);
1265
1266 // Search down from the Phi to the LoopExitInstr, looking for instructions
1267 // with a single user of the correct type for the reduction.
1268
1269 // Note that we check that the type of the operand is correct for each item in
1270 // the chain, including the last (the loop exit value). This can come up from
1271 // sub, which would otherwise be treated as an add reduction. MinMax also need
1272 // to check for a pair of icmp/select, for which we use getNextInstruction and
1273 // isCorrectOpcode functions to step the right number of instruction, and
1274 // check the icmp/select pair.
1275 // FIXME: We also do not attempt to look through Select's yet, which might
1276 // be part of the reduction chain, or attempt to looks through And's to find a
1277 // smaller bitwidth. Subs are also currently not allowed (which are usually
1278 // treated as part of a add reduction) as they are expected to generally be
1279 // more expensive than out-of-loop reductions, and need to be costed more
1280 // carefully.
1281 unsigned ExpectedUses = 1;
1282 if (IsMinMax)
1283 ExpectedUses = 2;
1284
1285 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1286 for (auto *User : Cur->users()) {
1288 if (isa<PHINode>(UI))
1289 continue;
1290 if (IsMinMax) {
1291 // We are expecting a icmp/select pair, which we go to the next select
1292 // instruction if we can. We already know that Cur has 2 uses.
1293 if (isa<SelectInst>(UI))
1294 return UI;
1295 continue;
1296 }
1297 return UI;
1298 }
1299 return nullptr;
1300 };
1301 auto isCorrectOpcode = [&](Instruction *Cur) {
1302 if (IsMinMax) {
1303 Value *LHS, *RHS;
1305 matchSelectPattern(Cur, LHS, RHS).Flavor);
1306 }
1307 // Recognize a call to the llvm.fmuladd intrinsic.
1308 if (isFMulAddIntrinsic(Cur))
1309 return true;
1310
1311 if (Cur->getOpcode() == Instruction::Sub &&
1313 return true;
1314
1315 return Cur->getOpcode() == getOpcode();
1316 };
1317
1318 // Attempt to look through Phis which are part of the reduction chain
1319 unsigned ExtraPhiUses = 0;
1320 Instruction *RdxInstr = LoopExitInstr;
1321 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
1322 if (ExitPhi->getNumIncomingValues() != 2)
1323 return {};
1324
1325 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
1326 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
1327
1328 Instruction *Chain = nullptr;
1329 if (Inc0 == Phi)
1330 Chain = Inc1;
1331 else if (Inc1 == Phi)
1332 Chain = Inc0;
1333 else
1334 return {};
1335
1336 RdxInstr = Chain;
1337 ExtraPhiUses = 1;
1338 }
1339
1340 // The loop exit instruction we check first (as a quick test) but add last. We
1341 // check the opcode is correct (and dont allow them to be Subs) and that they
1342 // have expected to have the expected number of uses. They will have one use
1343 // from the phi and one from a LCSSA value, no matter the type.
1344 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
1345 return {};
1346
1347 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1348 // for conditional reductions.
1349 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
1350 return {};
1351
1352 Instruction *Cur = getNextInstruction(Phi);
1353
1354 // Each other instruction in the chain should have the expected number of uses
1355 // and be the correct opcode.
1356 while (Cur != RdxInstr) {
1357 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1358 return {};
1359
1360 ReductionOperations.push_back(Cur);
1361 Cur = getNextInstruction(Cur);
1362 }
1363
1364 ReductionOperations.push_back(Cur);
1365 return ReductionOperations;
1366}
1367
1368InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1369 const SCEV *Step, BinaryOperator *BOp,
1371 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
1372 assert(IK != IK_NoInduction && "Not an induction");
1373
1374 // Start value type should match the induction kind and the value
1375 // itself should not be null.
1376 assert(StartValue && "StartValue is null");
1377 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1378 "StartValue is not a pointer for pointer induction");
1379 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1380 "StartValue is not an integer for integer induction");
1381
1382 // Check the Step Value. It should be non-zero integer value.
1383 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1384 "Step value is zero");
1385
1386 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1387 "StepValue is not an integer");
1388
1389 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1390 "StepValue is not FP for FpInduction");
1391 assert((IK != IK_FpInduction ||
1392 (InductionBinOp &&
1393 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1394 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1395 "Binary opcode should be specified for FP induction");
1396
1397 if (Casts)
1398 llvm::append_range(RedundantCasts, *Casts);
1399}
1400
1402 if (isa<SCEVConstant>(Step))
1403 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
1404 return nullptr;
1405}
1406
1408 ScalarEvolution *SE,
1410
1411 // Here we only handle FP induction variables.
1412 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1413
1414 if (TheLoop->getHeader() != Phi->getParent())
1415 return false;
1416
1417 // The loop may have multiple entrances or multiple exits; we can analyze
1418 // this phi if it has a unique entry value and a unique backedge value.
1419 if (Phi->getNumIncomingValues() != 2)
1420 return false;
1421 Value *BEValue = nullptr, *StartValue = nullptr;
1422 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1423 BEValue = Phi->getIncomingValue(0);
1424 StartValue = Phi->getIncomingValue(1);
1425 } else {
1426 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1427 "Unexpected Phi node in the loop");
1428 BEValue = Phi->getIncomingValue(1);
1429 StartValue = Phi->getIncomingValue(0);
1430 }
1431
1433 if (!BOp)
1434 return false;
1435
1436 Value *Addend = nullptr;
1437 if (BOp->getOpcode() == Instruction::FAdd) {
1438 if (BOp->getOperand(0) == Phi)
1439 Addend = BOp->getOperand(1);
1440 else if (BOp->getOperand(1) == Phi)
1441 Addend = BOp->getOperand(0);
1442 } else if (BOp->getOpcode() == Instruction::FSub)
1443 if (BOp->getOperand(0) == Phi)
1444 Addend = BOp->getOperand(1);
1445
1446 if (!Addend)
1447 return false;
1448
1449 // The addend should be loop invariant
1450 if (auto *I = dyn_cast<Instruction>(Addend))
1451 if (TheLoop->contains(I))
1452 return false;
1453
1454 // FP Step has unknown SCEV
1455 const SCEV *Step = SE->getUnknown(Addend);
1456 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1457 return true;
1458}
1459
1460/// This function is called when we suspect that the update-chain of a phi node
1461/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1462/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1463/// predicate P under which the SCEV expression for the phi can be the
1464/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1465/// cast instructions that are involved in the update-chain of this induction.
1466/// A caller that adds the required runtime predicate can be free to drop these
1467/// cast instructions, and compute the phi using \p AR (instead of some scev
1468/// expression with casts).
1469///
1470/// For example, without a predicate the scev expression can take the following
1471/// form:
1472/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1473///
1474/// It corresponds to the following IR sequence:
1475/// %for.body:
1476/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1477/// %casted_phi = "ExtTrunc i64 %x"
1478/// %add = add i64 %casted_phi, %step
1479///
1480/// where %x is given in \p PN,
1481/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1482/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1483/// several forms, for example, such as:
1484/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1485/// or:
1486/// ExtTrunc2: %t = shl %x, m
1487/// %casted_phi = ashr %t, m
1488///
1489/// If we are able to find such sequence, we return the instructions
1490/// we found, namely %casted_phi and the instructions on its use-def chain up
1491/// to the phi (not including the phi).
1493 const SCEVUnknown *PhiScev,
1494 const SCEVAddRecExpr *AR,
1495 SmallVectorImpl<Instruction *> &CastInsts) {
1496
1497 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1498 auto *PN = cast<PHINode>(PhiScev->getValue());
1499 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1500 const Loop *L = AR->getLoop();
1501
1502 // Find any cast instructions that participate in the def-use chain of
1503 // PhiScev in the loop.
1504 // FORNOW/TODO: We currently expect the def-use chain to include only
1505 // two-operand instructions, where one of the operands is an invariant.
1506 // createAddRecFromPHIWithCasts() currently does not support anything more
1507 // involved than that, so we keep the search simple. This can be
1508 // extended/generalized as needed.
1509
1510 auto getDef = [&](const Value *Val) -> Value * {
1511 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1512 if (!BinOp)
1513 return nullptr;
1514 Value *Op0 = BinOp->getOperand(0);
1515 Value *Op1 = BinOp->getOperand(1);
1516 Value *Def = nullptr;
1517 if (L->isLoopInvariant(Op0))
1518 Def = Op1;
1519 else if (L->isLoopInvariant(Op1))
1520 Def = Op0;
1521 return Def;
1522 };
1523
1524 // Look for the instruction that defines the induction via the
1525 // loop backedge.
1526 BasicBlock *Latch = L->getLoopLatch();
1527 if (!Latch)
1528 return false;
1529 Value *Val = PN->getIncomingValueForBlock(Latch);
1530 if (!Val)
1531 return false;
1532
1533 // Follow the def-use chain until the induction phi is reached.
1534 // If on the way we encounter a Value that has the same SCEV Expr as the
1535 // phi node, we can consider the instructions we visit from that point
1536 // as part of the cast-sequence that can be ignored.
1537 bool InCastSequence = false;
1538 auto *Inst = dyn_cast<Instruction>(Val);
1539 while (Val != PN) {
1540 // If we encountered a phi node other than PN, or if we left the loop,
1541 // we bail out.
1542 if (!Inst || !L->contains(Inst)) {
1543 return false;
1544 }
1545 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1546 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1547 InCastSequence = true;
1548 if (InCastSequence) {
1549 // Only the last instruction in the cast sequence is expected to have
1550 // uses outside the induction def-use chain.
1551 if (!CastInsts.empty())
1552 if (!Inst->hasOneUse())
1553 return false;
1554 CastInsts.push_back(Inst);
1555 }
1556 Val = getDef(Val);
1557 if (!Val)
1558 return false;
1559 Inst = dyn_cast<Instruction>(Val);
1560 }
1561
1562 return InCastSequence;
1563}
1564
1567 InductionDescriptor &D, bool Assume) {
1568 Type *PhiTy = Phi->getType();
1569
1570 // Handle integer and pointer inductions variables.
1571 // Now we handle also FP induction but not trying to make a
1572 // recurrent expression from the PHI node in-place.
1573
1574 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1575 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1576 return false;
1577
1578 if (PhiTy->isFloatingPointTy())
1579 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1580
1581 const SCEV *PhiScev = PSE.getSCEV(Phi);
1582 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1583
1584 // We need this expression to be an AddRecExpr.
1585 if (Assume && !AR)
1586 AR = PSE.getAsAddRec(Phi);
1587
1588 if (!AR) {
1589 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1590 return false;
1591 }
1592
1593 // Record any Cast instructions that participate in the induction update
1594 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1595 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1596 // only after enabling Assume with PSCEV, this means we may have encountered
1597 // cast instructions that required adding a runtime check in order to
1598 // guarantee the correctness of the AddRecurrence respresentation of the
1599 // induction.
1600 if (PhiScev != AR && SymbolicPhi) {
1602 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1603 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1604 }
1605
1606 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1607}
1608
1610 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1611 InductionDescriptor &D, const SCEV *Expr,
1612 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1613 Type *PhiTy = Phi->getType();
1614 // isSCEVable returns true for integer and pointer types.
1615 if (!SE->isSCEVable(PhiTy))
1616 return false;
1617
1618 // Check that the PHI is consecutive.
1619 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1620 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1621
1622 if (!AR) {
1623 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1624 return false;
1625 }
1626
1627 if (AR->getLoop() != TheLoop) {
1628 // FIXME: We should treat this as a uniform. Unfortunately, we
1629 // don't currently know how to handled uniform PHIs.
1630 LLVM_DEBUG(
1631 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
1632 return false;
1633 }
1634
1635 // This function assumes that InductionPhi is called only on Phi nodes
1636 // present inside loop headers. Check for the same, and throw an assert if
1637 // the current Phi is not present inside the loop header.
1638 assert(Phi->getParent() == AR->getLoop()->getHeader()
1639 && "Invalid Phi node, not present in loop header");
1640
1641 Value *StartValue =
1642 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
1643
1644 BasicBlock *Latch = AR->getLoop()->getLoopLatch();
1645 if (!Latch)
1646 return false;
1647
1648 const SCEV *Step = AR->getStepRecurrence(*SE);
1649 // Calculate the pointer stride and check if it is consecutive.
1650 // The stride may be a constant or a loop invariant integer value.
1651 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
1652 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
1653 return false;
1654
1655 if (PhiTy->isIntegerTy()) {
1656 BinaryOperator *BOp =
1657 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1658 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1659 CastsToIgnore);
1660 return true;
1661 }
1662
1663 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1664
1665 // This allows induction variables w/non-constant steps.
1666 D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
1667 return true;
1668}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl< Instruction * > &CastInsts)
This function is called when we suspect that the update-chain of a phi node (whose symbolic SCEV expr...
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, Type *RecurrenceType, SmallPtrSetImpl< Instruction * > &Casts, unsigned &MinWidthCastToRecurTy)
Collect cast instructions that can be ignored in the vectorizer's cost model, given a reduction exit ...
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi)
static Instruction * lookThroughAnd(PHINode *Phi, Type *&RT, SmallPtrSetImpl< Instruction * > &Visited, SmallPtrSetImpl< Instruction * > &CI)
Determines if Phi may have been type-promoted.
static std::pair< Type *, bool > computeRecurrenceType(Instruction *Exit, DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT)
Compute the minimal bit width needed to represent a reduction whose exit instruction is given by Exit...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:546
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define LLVM_DEBUG(...)
Definition Debug.h:119
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BinaryOps getOpcode() const
Definition InstrTypes.h:374
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
This is the shared class of boolean and integer constants.
Definition Constants.h:87
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
bool noSignedZeros() const
Definition FMF.h:67
void setNoNaNs(bool B=true)
Definition FMF.h:78
bool noNaNs() const
Definition FMF.h:65
static FastMathFlags getFast()
Definition FMF.h:50
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static LLVM_ABI bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D)
Returns true if Phi is a floating point induction in the loop L.
InductionDescriptor()=default
Default constructor - creates an invalid induction.
LLVM_ABI ConstantInt * getConstIntStepValue() const
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V, bool HasCoroSuspendInst=false) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
This POD struct holds information about a potential recurrence operation.
Instruction * getExactFPMathInst() const
static bool isFPMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static bool isFindFirstIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static LLVM_ABI InstDesc isConditionalRdxPattern(Instruction *I)
Returns a struct describing if the instruction is a Select(FCmp(X, Y), (Z = X op PHINode),...
static LLVM_ABI bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl< Instruction * > &Insts, unsigned MaxNumUses)
Returns true if instruction I has multiple uses in Insts.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
static LLVM_ABI bool areAllUsesIn(Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
Returns true if all uses of the instruction I is within the Set.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static LLVM_ABI InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, FastMathFlags FuncFMF, ScalarEvolution *SE)
Returns a struct describing if the instruction 'I' can be a recurrence variable of type 'Kind' for a ...
static LLVM_ABI InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop, PHINode *OrigPhi, Instruction *I, ScalarEvolution &SE)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind, const InstDesc &Prev)
Returns a struct describing if the instruction is a llvm.
static LLVM_ABI bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction of type Kind and adds it to the RecurrenceDescriptor.
static LLVM_ABI bool isIntegerRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer kind.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getUnknown(Value *V)
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:150
bool use_empty() const
Definition Value.h:346
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaxNum(const Opnd0 &Op0, const Opnd1 &Op1)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > > m_OrdOrUnordFMin(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximum(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximumNum(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimumNum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > > m_OrdOrUnordFMax(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point maximum function.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinNum(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
MachineInstr * getDef(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2138
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:295
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ FindLastIVUMax
FindLast reduction with select(cmp(),x,y) where one of (x,y) is increasing loop induction,...
@ FindFirstIVUMin
FindFirst reduction with select(icmp(),x,y) where one of (x,y) is a decreasing loop induction,...
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ FindLastIVSMax
FindFirst reduction with select(icmp(),x,y) where one of (x,y) is a decreasing loop induction,...
@ Mul
Product of integers.
@ None
Not a recurrence.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
static bool isMinOrMax(SelectPatternFlavor SPF)
When implementing this min/max pattern as fcmp; select, does the fcmp have to be ordered?