LLVM 20.0.0git
IVDescriptors.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
19#include "llvm/IR/Dominators.h"
22#include "llvm/IR/ValueHandle.h"
23#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27using namespace llvm::PatternMatch;
28
29#define DEBUG_TYPE "iv-descriptors"
30
33 for (const Use &Use : I->operands())
34 if (!Set.count(dyn_cast<Instruction>(Use)))
35 return false;
36 return true;
37}
38
40 switch (Kind) {
41 default:
42 break;
43 case RecurKind::Add:
44 case RecurKind::Mul:
45 case RecurKind::Or:
46 case RecurKind::And:
47 case RecurKind::Xor:
48 case RecurKind::SMax:
49 case RecurKind::SMin:
50 case RecurKind::UMax:
51 case RecurKind::UMin:
56 return true;
57 }
58 return false;
59}
60
62 return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
63}
64
65/// Determines if Phi may have been type-promoted. If Phi has a single user
66/// that ANDs the Phi with a type mask, return the user. RT is updated to
67/// account for the narrower bit width represented by the mask, and the AND
68/// instruction is added to CI.
72 if (!Phi->hasOneUse())
73 return Phi;
74
75 const APInt *M = nullptr;
76 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
77
78 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
79 // with a new integer type of the corresponding bit width.
80 if (match(J, m_And(m_Instruction(I), m_APInt(M)))) {
81 int32_t Bits = (*M + 1).exactLogBase2();
82 if (Bits > 0) {
83 RT = IntegerType::get(Phi->getContext(), Bits);
84 Visited.insert(Phi);
85 CI.insert(J);
86 return J;
87 }
88 }
89 return Phi;
90}
91
92/// Compute the minimal bit width needed to represent a reduction whose exit
93/// instruction is given by Exit.
94static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
95 DemandedBits *DB,
97 DominatorTree *DT) {
98 bool IsSigned = false;
99 const DataLayout &DL = Exit->getDataLayout();
100 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
101
102 if (DB) {
103 // Use the demanded bits analysis to determine the bits that are live out
104 // of the exit instruction, rounding up to the nearest power of two. If the
105 // use of demanded bits results in a smaller bit width, we know the value
106 // must be positive (i.e., IsSigned = false), because if this were not the
107 // case, the sign bit would have been demanded.
108 auto Mask = DB->getDemandedBits(Exit);
109 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
110 }
111
112 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
113 // If demanded bits wasn't able to limit the bit width, we can try to use
114 // value tracking instead. This can be the case, for example, if the value
115 // may be negative.
116 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
117 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
118 MaxBitWidth = NumTypeBits - NumSignBits;
119 KnownBits Bits = computeKnownBits(Exit, DL);
120 if (!Bits.isNonNegative()) {
121 // If the value is not known to be non-negative, we set IsSigned to true,
122 // meaning that we will use sext instructions instead of zext
123 // instructions to restore the original type.
124 IsSigned = true;
125 // Make sure at least one sign bit is included in the result, so it
126 // will get properly sign-extended.
127 ++MaxBitWidth;
128 }
129 }
130 MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
131
132 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
133 IsSigned);
134}
135
136/// Collect cast instructions that can be ignored in the vectorizer's cost
137/// model, given a reduction exit value and the minimal type in which the
138// reduction can be represented. Also search casts to the recurrence type
139// to find the minimum width used by the recurrence.
140static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
141 Type *RecurrenceType,
143 unsigned &MinWidthCastToRecurTy) {
144
147 Worklist.push_back(Exit);
148 MinWidthCastToRecurTy = -1U;
149
150 while (!Worklist.empty()) {
151 Instruction *Val = Worklist.pop_back_val();
152 Visited.insert(Val);
153 if (auto *Cast = dyn_cast<CastInst>(Val)) {
154 if (Cast->getSrcTy() == RecurrenceType) {
155 // If the source type of a cast instruction is equal to the recurrence
156 // type, it will be eliminated, and should be ignored in the vectorizer
157 // cost model.
158 Casts.insert(Cast);
159 continue;
160 }
161 if (Cast->getDestTy() == RecurrenceType) {
162 // The minimum width used by the recurrence is found by checking for
163 // casts on its operands. The minimum width is used by the vectorizer
164 // when finding the widest type for in-loop reductions without any
165 // loads/stores.
166 MinWidthCastToRecurTy = std::min<unsigned>(
167 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
168 continue;
169 }
170 }
171 // Add all operands to the work list if they are loop-varying values that
172 // we haven't yet visited.
173 for (Value *O : cast<User>(Val)->operands())
174 if (auto *I = dyn_cast<Instruction>(O))
175 if (TheLoop->contains(I) && !Visited.count(I))
176 Worklist.push_back(I);
177 }
178}
179
180// Check if a given Phi node can be recognized as an ordered reduction for
181// vectorizing floating point operations without unsafe math.
182static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
183 Instruction *Exit, PHINode *Phi) {
184 // Currently only FAdd and FMulAdd are supported.
185 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
186 return false;
187
188 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
189 return false;
190
191 if (Kind == RecurKind::FMulAdd &&
193 return false;
194
195 // Ensure the exit instruction has only one user other than the reduction PHI
196 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
197 return false;
198
199 // The only pattern accepted is the one in which the reduction PHI
200 // is used as one of the operands of the exit instruction
201 auto *Op0 = Exit->getOperand(0);
202 auto *Op1 = Exit->getOperand(1);
203 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
204 return false;
205 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
206 return false;
207
208 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
209 << ", ExitInst: " << *Exit << "\n");
210
211 return true;
212}
213
215 PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
218 if (Phi->getNumIncomingValues() != 2)
219 return false;
220
221 // Reduction variables are only found in the loop header block.
222 if (Phi->getParent() != TheLoop->getHeader())
223 return false;
224
225 // Obtain the reduction start value from the value that comes from the loop
226 // preheader.
227 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
228
229 // ExitInstruction is the single value which is used outside the loop.
230 // We only allow for a single reduction value to be used outside the loop.
231 // This includes users of the reduction, variables (which form a cycle
232 // which ends in the phi node).
233 Instruction *ExitInstruction = nullptr;
234
235 // Variable to keep last visited store instruction. By the end of the
236 // algorithm this variable will be either empty or having intermediate
237 // reduction value stored in invariant address.
238 StoreInst *IntermediateStore = nullptr;
239
240 // Indicates that we found a reduction operation in our scan.
241 bool FoundReduxOp = false;
242
243 // We start with the PHI node and scan for all of the users of this
244 // instruction. All users must be instructions that can be used as reduction
245 // variables (such as ADD). We must have a single out-of-block user. The cycle
246 // must include the original PHI.
247 bool FoundStartPHI = false;
248
249 // To recognize min/max patterns formed by a icmp select sequence, we store
250 // the number of instruction we saw from the recognized min/max pattern,
251 // to make sure we only see exactly the two instructions.
252 unsigned NumCmpSelectPatternInst = 0;
253 InstDesc ReduxDesc(false, nullptr);
254
255 // Data used for determining if the recurrence has been type-promoted.
256 Type *RecurrenceType = Phi->getType();
258 unsigned MinWidthCastToRecurrenceType;
259 Instruction *Start = Phi;
260 bool IsSigned = false;
261
264
265 // Return early if the recurrence kind does not match the type of Phi. If the
266 // recurrence kind is arithmetic, we attempt to look through AND operations
267 // resulting from the type promotion performed by InstCombine. Vector
268 // operations are not limited to the legal integer widths, so we may be able
269 // to evaluate the reduction in the narrower width.
270 if (RecurrenceType->isFloatingPointTy()) {
272 return false;
273 } else if (RecurrenceType->isIntegerTy()) {
274 if (!isIntegerRecurrenceKind(Kind))
275 return false;
276 if (!isMinMaxRecurrenceKind(Kind))
277 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
278 } else {
279 // Pointer min/max may exist, but it is not supported as a reduction op.
280 return false;
281 }
282
283 Worklist.push_back(Start);
284 VisitedInsts.insert(Start);
285
286 // Start with all flags set because we will intersect this with the reduction
287 // flags from all the reduction operations.
289
290 // The first instruction in the use-def chain of the Phi node that requires
291 // exact floating point operations.
292 Instruction *ExactFPMathInst = nullptr;
293
294 // A value in the reduction can be used:
295 // - By the reduction:
296 // - Reduction operation:
297 // - One use of reduction value (safe).
298 // - Multiple use of reduction value (not safe).
299 // - PHI:
300 // - All uses of the PHI must be the reduction (safe).
301 // - Otherwise, not safe.
302 // - By instructions outside of the loop (safe).
303 // * One value may have several outside users, but all outside
304 // uses must be of the same value.
305 // - By store instructions with a loop invariant address (safe with
306 // the following restrictions):
307 // * If there are several stores, all must have the same address.
308 // * Final value should be stored in that loop invariant address.
309 // - By an instruction that is not part of the reduction (not safe).
310 // This is either:
311 // * An instruction type other than PHI or the reduction operation.
312 // * A PHI in the header other than the initial PHI.
313 while (!Worklist.empty()) {
314 Instruction *Cur = Worklist.pop_back_val();
315
316 // Store instructions are allowed iff it is the store of the reduction
317 // value to the same loop invariant memory location.
318 if (auto *SI = dyn_cast<StoreInst>(Cur)) {
319 if (!SE) {
320 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
321 << "Scalar Evolution Analysis\n");
322 return false;
323 }
324
325 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
326 // Check it is the same address as previous stores
327 if (IntermediateStore) {
328 const SCEV *OtherScev =
330
331 if (OtherScev != PtrScev) {
332 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
333 << "inside the loop: " << *SI->getPointerOperand()
334 << " and "
335 << *IntermediateStore->getPointerOperand() << '\n');
336 return false;
337 }
338 }
339
340 // Check the pointer is loop invariant
341 if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
342 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
343 << "inside the loop: " << *SI->getPointerOperand()
344 << '\n');
345 return false;
346 }
347
348 // IntermediateStore is always the last store in the loop.
350 continue;
351 }
352
353 // No Users.
354 // If the instruction has no users then this is a broken chain and can't be
355 // a reduction variable.
356 if (Cur->use_empty())
357 return false;
358
359 bool IsAPhi = isa<PHINode>(Cur);
360
361 // A header PHI use other than the original PHI.
362 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
363 return false;
364
365 // Reductions of instructions such as Div, and Sub is only possible if the
366 // LHS is the reduction variable.
367 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
368 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
369 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
370 return false;
371
372 // Any reduction instruction must be of one of the allowed kinds. We ignore
373 // the starting value (the Phi or an AND instruction if the Phi has been
374 // type-promoted).
375 if (Cur != Start) {
376 ReduxDesc =
377 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
378 ExactFPMathInst = ExactFPMathInst == nullptr
379 ? ReduxDesc.getExactFPMathInst()
380 : ExactFPMathInst;
381 if (!ReduxDesc.isRecurrence())
382 return false;
383 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
384 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
385 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
386 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {
387 // Accept FMF on either fcmp or select of a min/max idiom.
388 // TODO: This is a hack to work-around the fact that FMF may not be
389 // assigned/propagated correctly. If that problem is fixed or we
390 // standardize on fmin/fmax via intrinsics, this can be removed.
391 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
392 CurFMF |= FCmp->getFastMathFlags();
393 }
394 FMF &= CurFMF;
395 }
396 // Update this reduction kind if we matched a new instruction.
397 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
398 // state accurate while processing the worklist?
399 if (ReduxDesc.getRecKind() != RecurKind::None)
400 Kind = ReduxDesc.getRecKind();
401 }
402
403 bool IsASelect = isa<SelectInst>(Cur);
404
405 // A conditional reduction operation must only have 2 or less uses in
406 // VisitedInsts.
407 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
408 hasMultipleUsesOf(Cur, VisitedInsts, 2))
409 return false;
410
411 // A reduction operation must only have one use of the reduction value.
412 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
413 !isAnyOfRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1))
414 return false;
415
416 // All inputs to a PHI node must be a reduction value.
417 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
418 return false;
419
420 if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::IAnyOf) &&
421 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
422 ++NumCmpSelectPatternInst;
423 if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::FAnyOf) &&
424 (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
425 ++NumCmpSelectPatternInst;
426
427 // Check whether we found a reduction operator.
428 FoundReduxOp |= !IsAPhi && Cur != Start;
429
430 // Process users of current instruction. Push non-PHI nodes after PHI nodes
431 // onto the stack. This way we are going to have seen all inputs to PHI
432 // nodes once we get to them.
435 for (User *U : Cur->users()) {
436 Instruction *UI = cast<Instruction>(U);
437
438 // If the user is a call to llvm.fmuladd then the instruction can only be
439 // the final operand.
440 if (isFMulAddIntrinsic(UI))
441 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
442 return false;
443
444 // Check if we found the exit user.
445 BasicBlock *Parent = UI->getParent();
446 if (!TheLoop->contains(Parent)) {
447 // If we already know this instruction is used externally, move on to
448 // the next user.
449 if (ExitInstruction == Cur)
450 continue;
451
452 // Exit if you find multiple values used outside or if the header phi
453 // node is being used. In this case the user uses the value of the
454 // previous iteration, in which case we would loose "VF-1" iterations of
455 // the reduction operation if we vectorize.
456 if (ExitInstruction != nullptr || Cur == Phi)
457 return false;
458
459 // The instruction used by an outside user must be the last instruction
460 // before we feed back to the reduction phi. Otherwise, we loose VF-1
461 // operations on the value.
462 if (!is_contained(Phi->operands(), Cur))
463 return false;
464
465 ExitInstruction = Cur;
466 continue;
467 }
468
469 // Process instructions only once (termination). Each reduction cycle
470 // value must only be used once, except by phi nodes and min/max
471 // reductions which are represented as a cmp followed by a select.
472 InstDesc IgnoredVal(false, nullptr);
473 if (VisitedInsts.insert(UI).second) {
474 if (isa<PHINode>(UI)) {
475 PHIs.push_back(UI);
476 } else {
477 StoreInst *SI = dyn_cast<StoreInst>(UI);
478 if (SI && SI->getPointerOperand() == Cur) {
479 // Reduction variable chain can only be stored somewhere but it
480 // can't be used as an address.
481 return false;
482 }
483 NonPHIs.push_back(UI);
484 }
485 } else if (!isa<PHINode>(UI) &&
486 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
487 !isa<SelectInst>(UI)) ||
488 (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
489 !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)
490 .isRecurrence() &&
491 !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
492 return false;
493
494 // Remember that we completed the cycle.
495 if (UI == Phi)
496 FoundStartPHI = true;
497 }
498 Worklist.append(PHIs.begin(), PHIs.end());
499 Worklist.append(NonPHIs.begin(), NonPHIs.end());
500 }
501
502 // This means we have seen one but not the other instruction of the
503 // pattern or more than just a select and cmp. Zero implies that we saw a
504 // llvm.min/max intrinsic, which is always OK.
505 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
506 NumCmpSelectPatternInst != 0)
507 return false;
508
509 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
510 return false;
511
512 if (IntermediateStore) {
513 // Check that stored value goes to the phi node again. This way we make sure
514 // that the value stored in IntermediateStore is indeed the final reduction
515 // value.
516 if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
517 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
518 << *IntermediateStore << '\n');
519 return false;
520 }
521
522 // If there is an exit instruction it's value should be stored in
523 // IntermediateStore
524 if (ExitInstruction &&
525 IntermediateStore->getValueOperand() != ExitInstruction) {
526 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
527 "store last calculated value of the reduction: "
528 << *IntermediateStore << '\n');
529 return false;
530 }
531
532 // If all uses are inside the loop (intermediate stores), then the
533 // reduction value after the loop will be the one used in the last store.
534 if (!ExitInstruction)
535 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
536 }
537
538 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
539 return false;
540
541 const bool IsOrdered =
542 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
543
544 if (Start != Phi) {
545 // If the starting value is not the same as the phi node, we speculatively
546 // looked through an 'and' instruction when evaluating a potential
547 // arithmetic reduction to determine if it may have been type-promoted.
548 //
549 // We now compute the minimal bit width that is required to represent the
550 // reduction. If this is the same width that was indicated by the 'and', we
551 // can represent the reduction in the smaller type. The 'and' instruction
552 // will be eliminated since it will essentially be a cast instruction that
553 // can be ignore in the cost model. If we compute a different type than we
554 // did when evaluating the 'and', the 'and' will not be eliminated, and we
555 // will end up with different kinds of operations in the recurrence
556 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
557 // the case.
558 //
559 // The vectorizer relies on InstCombine to perform the actual
560 // type-shrinking. It does this by inserting instructions to truncate the
561 // exit value of the reduction to the width indicated by RecurrenceType and
562 // then extend this value back to the original width. If IsSigned is false,
563 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
564 // used.
565 //
566 // TODO: We should not rely on InstCombine to rewrite the reduction in the
567 // smaller type. We should just generate a correctly typed expression
568 // to begin with.
569 Type *ComputedType;
570 std::tie(ComputedType, IsSigned) =
571 computeRecurrenceType(ExitInstruction, DB, AC, DT);
572 if (ComputedType != RecurrenceType)
573 return false;
574 }
575
576 // Collect cast instructions and the minimum width used by the recurrence.
577 // If the starting value is not the same as the phi node and the computed
578 // recurrence type is equal to the recurrence type, the recurrence expression
579 // will be represented in a narrower or wider type. If there are any cast
580 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
581 // Note that the 'and' instruction was already included in this list.
582 //
583 // TODO: A better way to represent this may be to tag in some way all the
584 // instructions that are a part of the reduction. The vectorizer cost
585 // model could then apply the recurrence type to these instructions,
586 // without needing a white list of instructions to ignore.
587 // This may also be useful for the inloop reductions, if it can be
588 // kept simple enough.
589 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
590 MinWidthCastToRecurrenceType);
591
592 // We found a reduction var if we have reached the original phi node and we
593 // only have a single instruction with out-of-loop users.
594
595 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
596 // is saved as part of the RecurrenceDescriptor.
597
598 // Save the description of this reduction variable.
599 RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
600 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
601 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
602 RedDes = RD;
603
604 return true;
605}
606
607// We are looking for loops that do something like this:
608// int r = 0;
609// for (int i = 0; i < n; i++) {
610// if (src[i] > 3)
611// r = 3;
612// }
613// where the reduction value (r) only has two states, in this example 0 or 3.
614// The generated LLVM IR for this type of loop will be like this:
615// for.body:
616// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
617// ...
618// %cmp = icmp sgt i32 %5, 3
619// %spec.select = select i1 %cmp, i32 3, i32 %r
620// ...
621// In general we can support vectorization of loops where 'r' flips between
622// any two non-constants, provided they are loop invariant. The only thing
623// we actually care about at the end of the loop is whether or not any lane
624// in the selected vector is different from the start value. The final
625// across-vector reduction after the loop simply involves choosing the start
626// value if nothing changed (0 in the example above) or the other selected
627// value (3 in the example above).
630 Instruction *I, InstDesc &Prev) {
631 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
632 // the select.
633 CmpPredicate Pred;
634 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
635 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
636 return InstDesc(Select, Prev.getRecKind());
637 }
638
639 if (!match(I,
640 m_Select(m_Cmp(Pred, m_Value(), m_Value()), m_Value(), m_Value())))
641 return InstDesc(false, I);
642
643 SelectInst *SI = cast<SelectInst>(I);
644 Value *NonPhi = nullptr;
645
646 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
647 NonPhi = SI->getFalseValue();
648 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
649 NonPhi = SI->getTrueValue();
650 else
651 return InstDesc(false, I);
652
653 // We are looking for selects of the form:
654 // select(cmp(), phi, loop_invariant) or
655 // select(cmp(), loop_invariant, phi)
656 if (!Loop->isLoopInvariant(NonPhi))
657 return InstDesc(false, I);
658
659 return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
661}
662
663// We are looking for loops that do something like this:
664// int r = 0;
665// for (int i = 0; i < n; i++) {
666// if (src[i] > 3)
667// r = i;
668// }
669// The reduction value (r) is derived from either the values of an increasing
670// induction variable (i) sequence, or from the start value (0).
671// The LLVM IR generated for such loops would be as follows:
672// for.body:
673// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
674// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
675// ...
676// %cmp = icmp sgt i32 %5, 3
677// %spec.select = select i1 %cmp, i32 %i, i32 %r
678// %inc = add nsw i32 %i, 1
679// ...
680// Since 'i' is an increasing induction variable, the reduction value after the
681// loop will be the maximum value of 'i' that the condition (src[i] > 3) is
682// satisfied, or the start value (0 in the example above). When the start value
683// of the increasing induction variable 'i' is greater than the minimum value of
684// the data type, we can use the minimum value of the data type as a sentinel
685// value to replace the start value. This allows us to perform a single
686// reduction max operation to obtain the final reduction result.
687// TODO: It is possible to solve the case where the start value is the minimum
688// value of the data type or a non-constant value by using mask and multiple
689// reduction operations.
693 // TODO: Support the vectorization of FindLastIV when the reduction phi is
694 // used by more than one select instruction. This vectorization is only
695 // performed when the SCEV of each increasing induction variable used by the
696 // select instructions is identical.
697 if (!OrigPhi->hasOneUse())
698 return InstDesc(false, I);
699
700 // TODO: Match selects with multi-use cmp conditions.
701 Value *NonRdxPhi = nullptr;
702 if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
703 m_Specific(OrigPhi)),
704 m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi),
705 m_Value(NonRdxPhi)))))
706 return InstDesc(false, I);
707
708 auto IsIncreasingLoopInduction = [&](Value *V) {
709 Type *Ty = V->getType();
710 if (!SE.isSCEVable(Ty))
711 return false;
712
713 auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V));
714 if (!AR || AR->getLoop() != TheLoop)
715 return false;
716
717 const SCEV *Step = AR->getStepRecurrence(SE);
718 if (!SE.isKnownPositive(Step))
719 return false;
720
721 const ConstantRange IVRange = SE.getSignedRange(AR);
722 unsigned NumBits = Ty->getIntegerBitWidth();
723 // Keep the minimum value of the recurrence type as the sentinel value.
724 // The maximum acceptable range for the increasing induction variable,
725 // called the valid range, will be defined as
726 // [<sentinel value> + 1, <sentinel value>)
727 // where <sentinel value> is SignedMin(<recurrence type>)
728 // TODO: This range restriction can be lifted by adding an additional
729 // virtual OR reduction.
730 const APInt Sentinel = APInt::getSignedMinValue(NumBits);
731 const ConstantRange ValidRange =
733 LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange
734 << ", and the signed range of " << *AR << " is "
735 << IVRange << "\n");
736 // Ensure the induction variable does not wrap around by verifying that its
737 // range is fully contained within the valid range.
738 return ValidRange.contains(IVRange);
739 };
740
741 // We are looking for selects of the form:
742 // select(cmp(), phi, increasing_loop_induction) or
743 // select(cmp(), increasing_loop_induction, phi)
744 // TODO: Support for monotonically decreasing induction variable
745 if (!IsIncreasingLoopInduction(NonRdxPhi))
746 return InstDesc(false, I);
747
748 return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IFindLastIV
750}
751
754 const InstDesc &Prev) {
755 assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&
756 "Expected a cmp or select or call instruction");
757 if (!isMinMaxRecurrenceKind(Kind))
758 return InstDesc(false, I);
759
760 // We must handle the select(cmp()) as a single instruction. Advance to the
761 // select.
762 CmpPredicate Pred;
763 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
764 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
765 return InstDesc(Select, Prev.getRecKind());
766 }
767
768 // Only match select with single use cmp condition, or a min/max intrinsic.
769 if (!isa<IntrinsicInst>(I) &&
771 m_Value())))
772 return InstDesc(false, I);
773
774 // Look for a min/max pattern.
775 if (match(I, m_UMin(m_Value(), m_Value())))
776 return InstDesc(Kind == RecurKind::UMin, I);
777 if (match(I, m_UMax(m_Value(), m_Value())))
778 return InstDesc(Kind == RecurKind::UMax, I);
779 if (match(I, m_SMax(m_Value(), m_Value())))
780 return InstDesc(Kind == RecurKind::SMax, I);
781 if (match(I, m_SMin(m_Value(), m_Value())))
782 return InstDesc(Kind == RecurKind::SMin, I);
784 return InstDesc(Kind == RecurKind::FMin, I);
786 return InstDesc(Kind == RecurKind::FMax, I);
787 if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
788 return InstDesc(Kind == RecurKind::FMin, I);
789 if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
790 return InstDesc(Kind == RecurKind::FMax, I);
791 if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
792 return InstDesc(Kind == RecurKind::FMinimum, I);
793 if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
794 return InstDesc(Kind == RecurKind::FMaximum, I);
795
796 return InstDesc(false, I);
797}
798
799/// Returns true if the select instruction has users in the compare-and-add
800/// reduction pattern below. The select instruction argument is the last one
801/// in the sequence.
802///
803/// %sum.1 = phi ...
804/// ...
805/// %cmp = fcmp pred %0, %CFP
806/// %add = fadd %0, %sum.1
807/// %sum.2 = select %cmp, %add, %sum.1
810 SelectInst *SI = dyn_cast<SelectInst>(I);
811 if (!SI)
812 return InstDesc(false, I);
813
814 CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
815 // Only handle single use cases for now.
816 if (!CI || !CI->hasOneUse())
817 return InstDesc(false, I);
818
819 Value *TrueVal = SI->getTrueValue();
820 Value *FalseVal = SI->getFalseValue();
821 // Handle only when either of operands of select instruction is a PHI
822 // node for now.
823 if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
824 (!isa<PHINode>(TrueVal) && !isa<PHINode>(FalseVal)))
825 return InstDesc(false, I);
826
827 Instruction *I1 = isa<PHINode>(TrueVal) ? dyn_cast<Instruction>(FalseVal)
828 : dyn_cast<Instruction>(TrueVal);
829 if (!I1 || !I1->isBinaryOp())
830 return InstDesc(false, I);
831
832 Value *Op1, *Op2;
833 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
834 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
835 I1->isFast()) ||
836 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
837 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
838 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
839 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
840 return InstDesc(false, I);
841
842 Instruction *IPhi = isa<PHINode>(Op1) ? dyn_cast<Instruction>(Op1)
843 : dyn_cast<Instruction>(Op2);
844 if (!IPhi || IPhi != FalseVal)
845 return InstDesc(false, I);
846
847 return InstDesc(true, SI);
848}
849
851 Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
852 FastMathFlags FuncFMF, ScalarEvolution *SE) {
853 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
854 switch (I->getOpcode()) {
855 default:
856 return InstDesc(false, I);
857 case Instruction::PHI:
858 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
859 case Instruction::Sub:
860 case Instruction::Add:
861 return InstDesc(Kind == RecurKind::Add, I);
862 case Instruction::Mul:
863 return InstDesc(Kind == RecurKind::Mul, I);
864 case Instruction::And:
865 return InstDesc(Kind == RecurKind::And, I);
866 case Instruction::Or:
867 return InstDesc(Kind == RecurKind::Or, I);
868 case Instruction::Xor:
869 return InstDesc(Kind == RecurKind::Xor, I);
870 case Instruction::FDiv:
871 case Instruction::FMul:
872 return InstDesc(Kind == RecurKind::FMul, I,
873 I->hasAllowReassoc() ? nullptr : I);
874 case Instruction::FSub:
875 case Instruction::FAdd:
876 return InstDesc(Kind == RecurKind::FAdd, I,
877 I->hasAllowReassoc() ? nullptr : I);
878 case Instruction::Select:
879 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
880 Kind == RecurKind::Add || Kind == RecurKind::Mul)
881 return isConditionalRdxPattern(Kind, I);
882 if (isFindLastIVRecurrenceKind(Kind) && SE)
883 return isFindLastIVPattern(L, OrigPhi, I, *SE);
884 [[fallthrough]];
885 case Instruction::FCmp:
886 case Instruction::ICmp:
887 case Instruction::Call:
888 if (isAnyOfRecurrenceKind(Kind))
889 return isAnyOfPattern(L, OrigPhi, I, Prev);
890 auto HasRequiredFMF = [&]() {
891 if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
892 return true;
893 if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
894 return true;
895 // minimum and maximum intrinsics do not require nsz and nnan flags since
896 // NaN and signed zeroes are propagated in the intrinsic implementation.
897 return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||
898 match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));
899 };
900 if (isIntMinMaxRecurrenceKind(Kind) ||
901 (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
902 return isMinMaxPattern(I, Kind, Prev);
903 else if (isFMulAddIntrinsic(I))
904 return InstDesc(Kind == RecurKind::FMulAdd, I,
905 I->hasAllowReassoc() ? nullptr : I);
906 return InstDesc(false, I);
907 }
908}
909
912 unsigned MaxNumUses) {
913 unsigned NumUses = 0;
914 for (const Use &U : I->operands()) {
915 if (Insts.count(dyn_cast<Instruction>(U)))
916 ++NumUses;
917 if (NumUses > MaxNumUses)
918 return true;
919 }
920
921 return false;
922}
923
925 RecurrenceDescriptor &RedDes,
927 DominatorTree *DT,
928 ScalarEvolution *SE) {
929 BasicBlock *Header = TheLoop->getHeader();
930 Function &F = *Header->getParent();
931 FastMathFlags FMF;
932 FMF.setNoNaNs(
933 F.getFnAttribute("no-nans-fp-math").getValueAsBool());
935 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
936
937 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
938 SE)) {
939 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
940 return true;
941 }
942 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
943 SE)) {
944 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
945 return true;
946 }
947 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
948 SE)) {
949 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
950 return true;
951 }
952 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
953 SE)) {
954 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
955 return true;
956 }
957 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
958 SE)) {
959 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
960 return true;
961 }
962 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,
963 SE)) {
964 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
965 return true;
966 }
967 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,
968 SE)) {
969 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
970 return true;
971 }
972 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,
973 SE)) {
974 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
975 return true;
976 }
977 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,
978 SE)) {
979 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
980 return true;
981 }
982 if (AddReductionVar(Phi, RecurKind::IAnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
983 SE)) {
984 LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
985 << *Phi << "\n");
986 return true;
987 }
988 if (AddReductionVar(Phi, RecurKind::IFindLastIV, TheLoop, FMF, RedDes, DB, AC,
989 DT, SE)) {
990 LLVM_DEBUG(dbgs() << "Found a "
992 ? "F"
993 : "I")
994 << "FindLastIV reduction PHI." << *Phi << "\n");
995 return true;
996 }
997 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
998 SE)) {
999 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
1000 return true;
1001 }
1002 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1003 SE)) {
1004 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
1005 return true;
1006 }
1007 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,
1008 SE)) {
1009 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
1010 return true;
1011 }
1012 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,
1013 SE)) {
1014 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
1015 return true;
1016 }
1017 if (AddReductionVar(Phi, RecurKind::FAnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
1018 SE)) {
1019 LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
1020 << " PHI." << *Phi << "\n");
1021 return true;
1022 }
1023 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
1024 SE)) {
1025 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
1026 return true;
1027 }
1028 if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
1029 SE)) {
1030 LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
1031 return true;
1032 }
1033 if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
1034 SE)) {
1035 LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
1036 return true;
1037 }
1038 // Not a reduction of known type.
1039 return false;
1040}
1041
1043 DominatorTree *DT) {
1044
1045 // Ensure the phi node is in the loop header and has two incoming values.
1046 if (Phi->getParent() != TheLoop->getHeader() ||
1047 Phi->getNumIncomingValues() != 2)
1048 return false;
1049
1050 // Ensure the loop has a preheader and a single latch block. The loop
1051 // vectorizer will need the latch to set up the next iteration of the loop.
1052 auto *Preheader = TheLoop->getLoopPreheader();
1053 auto *Latch = TheLoop->getLoopLatch();
1054 if (!Preheader || !Latch)
1055 return false;
1056
1057 // Ensure the phi node's incoming blocks are the loop preheader and latch.
1058 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
1059 Phi->getBasicBlockIndex(Latch) < 0)
1060 return false;
1061
1062 // Get the previous value. The previous value comes from the latch edge while
1063 // the initial value comes from the preheader edge.
1064 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
1065
1066 // If Previous is a phi in the header, go through incoming values from the
1067 // latch until we find a non-phi value. Use this as the new Previous, all uses
1068 // in the header will be dominated by the original phi, but need to be moved
1069 // after the non-phi previous value.
1071 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
1072 if (PrevPhi->getParent() != Phi->getParent())
1073 return false;
1074 if (!SeenPhis.insert(PrevPhi).second)
1075 return false;
1076 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
1077 }
1078
1079 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
1080 return false;
1081
1082 // Ensure every user of the phi node (recursively) is dominated by the
1083 // previous value. The dominance requirement ensures the loop vectorizer will
1084 // not need to vectorize the initial value prior to the first iteration of the
1085 // loop.
1086 // TODO: Consider extending this sinking to handle memory instructions.
1087
1089 BasicBlock *PhiBB = Phi->getParent();
1091 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
1092 // Cyclic dependence.
1093 if (Previous == SinkCandidate)
1094 return false;
1095
1096 if (!Seen.insert(SinkCandidate).second)
1097 return true;
1098 if (DT->dominates(Previous,
1099 SinkCandidate)) // We already are good w/o sinking.
1100 return true;
1101
1102 if (SinkCandidate->getParent() != PhiBB ||
1103 SinkCandidate->mayHaveSideEffects() ||
1104 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1105 return false;
1106
1107 // If we reach a PHI node that is not dominated by Previous, we reached a
1108 // header PHI. No need for sinking.
1109 if (isa<PHINode>(SinkCandidate))
1110 return true;
1111
1112 // Sink User tentatively and check its users
1113 WorkList.push_back(SinkCandidate);
1114 return true;
1115 };
1116
1117 WorkList.push_back(Phi);
1118 // Try to recursively sink instructions and their users after Previous.
1119 while (!WorkList.empty()) {
1120 Instruction *Current = WorkList.pop_back_val();
1121 for (User *User : Current->users()) {
1122 if (!TryToPushSinkCandidate(cast<Instruction>(User)))
1123 return false;
1124 }
1125 }
1126
1127 return true;
1128}
1129
1131 switch (Kind) {
1132 case RecurKind::Add:
1133 return Instruction::Add;
1134 case RecurKind::Mul:
1135 return Instruction::Mul;
1136 case RecurKind::Or:
1137 return Instruction::Or;
1138 case RecurKind::And:
1139 return Instruction::And;
1140 case RecurKind::Xor:
1141 return Instruction::Xor;
1142 case RecurKind::FMul:
1143 return Instruction::FMul;
1144 case RecurKind::FMulAdd:
1145 case RecurKind::FAdd:
1146 return Instruction::FAdd;
1147 case RecurKind::SMax:
1148 case RecurKind::SMin:
1149 case RecurKind::UMax:
1150 case RecurKind::UMin:
1151 case RecurKind::IAnyOf:
1153 return Instruction::ICmp;
1154 case RecurKind::FMax:
1155 case RecurKind::FMin:
1158 case RecurKind::FAnyOf:
1160 return Instruction::FCmp;
1161 default:
1162 llvm_unreachable("Unknown recurrence operation");
1163 }
1164}
1165
1168 SmallVector<Instruction *, 4> ReductionOperations;
1169 unsigned RedOp = getOpcode();
1170
1171 // Search down from the Phi to the LoopExitInstr, looking for instructions
1172 // with a single user of the correct type for the reduction.
1173
1174 // Note that we check that the type of the operand is correct for each item in
1175 // the chain, including the last (the loop exit value). This can come up from
1176 // sub, which would otherwise be treated as an add reduction. MinMax also need
1177 // to check for a pair of icmp/select, for which we use getNextInstruction and
1178 // isCorrectOpcode functions to step the right number of instruction, and
1179 // check the icmp/select pair.
1180 // FIXME: We also do not attempt to look through Select's yet, which might
1181 // be part of the reduction chain, or attempt to looks through And's to find a
1182 // smaller bitwidth. Subs are also currently not allowed (which are usually
1183 // treated as part of a add reduction) as they are expected to generally be
1184 // more expensive than out-of-loop reductions, and need to be costed more
1185 // carefully.
1186 unsigned ExpectedUses = 1;
1187 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
1188 ExpectedUses = 2;
1189
1190 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1191 for (auto *User : Cur->users()) {
1192 Instruction *UI = cast<Instruction>(User);
1193 if (isa<PHINode>(UI))
1194 continue;
1195 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
1196 // We are expecting a icmp/select pair, which we go to the next select
1197 // instruction if we can. We already know that Cur has 2 uses.
1198 if (isa<SelectInst>(UI))
1199 return UI;
1200 continue;
1201 }
1202 return UI;
1203 }
1204 return nullptr;
1205 };
1206 auto isCorrectOpcode = [&](Instruction *Cur) {
1207 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
1208 Value *LHS, *RHS;
1210 matchSelectPattern(Cur, LHS, RHS).Flavor);
1211 }
1212 // Recognize a call to the llvm.fmuladd intrinsic.
1213 if (isFMulAddIntrinsic(Cur))
1214 return true;
1215
1216 return Cur->getOpcode() == RedOp;
1217 };
1218
1219 // Attempt to look through Phis which are part of the reduction chain
1220 unsigned ExtraPhiUses = 0;
1221 Instruction *RdxInstr = LoopExitInstr;
1222 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
1223 if (ExitPhi->getNumIncomingValues() != 2)
1224 return {};
1225
1226 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
1227 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
1228
1229 Instruction *Chain = nullptr;
1230 if (Inc0 == Phi)
1231 Chain = Inc1;
1232 else if (Inc1 == Phi)
1233 Chain = Inc0;
1234 else
1235 return {};
1236
1237 RdxInstr = Chain;
1238 ExtraPhiUses = 1;
1239 }
1240
1241 // The loop exit instruction we check first (as a quick test) but add last. We
1242 // check the opcode is correct (and dont allow them to be Subs) and that they
1243 // have expected to have the expected number of uses. They will have one use
1244 // from the phi and one from a LCSSA value, no matter the type.
1245 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
1246 return {};
1247
1248 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1249 // for conditional reductions.
1250 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
1251 return {};
1252
1253 Instruction *Cur = getNextInstruction(Phi);
1254
1255 // Each other instruction in the chain should have the expected number of uses
1256 // and be the correct opcode.
1257 while (Cur != RdxInstr) {
1258 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1259 return {};
1260
1261 ReductionOperations.push_back(Cur);
1262 Cur = getNextInstruction(Cur);
1263 }
1264
1265 ReductionOperations.push_back(Cur);
1266 return ReductionOperations;
1267}
1268
1269InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1270 const SCEV *Step, BinaryOperator *BOp,
1272 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
1273 assert(IK != IK_NoInduction && "Not an induction");
1274
1275 // Start value type should match the induction kind and the value
1276 // itself should not be null.
1277 assert(StartValue && "StartValue is null");
1278 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1279 "StartValue is not a pointer for pointer induction");
1280 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1281 "StartValue is not an integer for integer induction");
1282
1283 // Check the Step Value. It should be non-zero integer value.
1284 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1285 "Step value is zero");
1286
1287 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1288 "StepValue is not an integer");
1289
1290 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1291 "StepValue is not FP for FpInduction");
1292 assert((IK != IK_FpInduction ||
1293 (InductionBinOp &&
1294 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1295 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1296 "Binary opcode should be specified for FP induction");
1297
1298 if (Casts) {
1299 for (auto &Inst : *Casts) {
1300 RedundantCasts.push_back(Inst);
1301 }
1302 }
1303}
1304
1306 if (isa<SCEVConstant>(Step))
1307 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
1308 return nullptr;
1309}
1310
1312 ScalarEvolution *SE,
1314
1315 // Here we only handle FP induction variables.
1316 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1317
1318 if (TheLoop->getHeader() != Phi->getParent())
1319 return false;
1320
1321 // The loop may have multiple entrances or multiple exits; we can analyze
1322 // this phi if it has a unique entry value and a unique backedge value.
1323 if (Phi->getNumIncomingValues() != 2)
1324 return false;
1325 Value *BEValue = nullptr, *StartValue = nullptr;
1326 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1327 BEValue = Phi->getIncomingValue(0);
1328 StartValue = Phi->getIncomingValue(1);
1329 } else {
1330 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1331 "Unexpected Phi node in the loop");
1332 BEValue = Phi->getIncomingValue(1);
1333 StartValue = Phi->getIncomingValue(0);
1334 }
1335
1336 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
1337 if (!BOp)
1338 return false;
1339
1340 Value *Addend = nullptr;
1341 if (BOp->getOpcode() == Instruction::FAdd) {
1342 if (BOp->getOperand(0) == Phi)
1343 Addend = BOp->getOperand(1);
1344 else if (BOp->getOperand(1) == Phi)
1345 Addend = BOp->getOperand(0);
1346 } else if (BOp->getOpcode() == Instruction::FSub)
1347 if (BOp->getOperand(0) == Phi)
1348 Addend = BOp->getOperand(1);
1349
1350 if (!Addend)
1351 return false;
1352
1353 // The addend should be loop invariant
1354 if (auto *I = dyn_cast<Instruction>(Addend))
1355 if (TheLoop->contains(I))
1356 return false;
1357
1358 // FP Step has unknown SCEV
1359 const SCEV *Step = SE->getUnknown(Addend);
1360 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1361 return true;
1362}
1363
1364/// This function is called when we suspect that the update-chain of a phi node
1365/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1366/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1367/// predicate P under which the SCEV expression for the phi can be the
1368/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1369/// cast instructions that are involved in the update-chain of this induction.
1370/// A caller that adds the required runtime predicate can be free to drop these
1371/// cast instructions, and compute the phi using \p AR (instead of some scev
1372/// expression with casts).
1373///
1374/// For example, without a predicate the scev expression can take the following
1375/// form:
1376/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1377///
1378/// It corresponds to the following IR sequence:
1379/// %for.body:
1380/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1381/// %casted_phi = "ExtTrunc i64 %x"
1382/// %add = add i64 %casted_phi, %step
1383///
1384/// where %x is given in \p PN,
1385/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1386/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1387/// several forms, for example, such as:
1388/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1389/// or:
1390/// ExtTrunc2: %t = shl %x, m
1391/// %casted_phi = ashr %t, m
1392///
1393/// If we are able to find such sequence, we return the instructions
1394/// we found, namely %casted_phi and the instructions on its use-def chain up
1395/// to the phi (not including the phi).
1397 const SCEVUnknown *PhiScev,
1398 const SCEVAddRecExpr *AR,
1399 SmallVectorImpl<Instruction *> &CastInsts) {
1400
1401 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1402 auto *PN = cast<PHINode>(PhiScev->getValue());
1403 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1404 const Loop *L = AR->getLoop();
1405
1406 // Find any cast instructions that participate in the def-use chain of
1407 // PhiScev in the loop.
1408 // FORNOW/TODO: We currently expect the def-use chain to include only
1409 // two-operand instructions, where one of the operands is an invariant.
1410 // createAddRecFromPHIWithCasts() currently does not support anything more
1411 // involved than that, so we keep the search simple. This can be
1412 // extended/generalized as needed.
1413
1414 auto getDef = [&](const Value *Val) -> Value * {
1415 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1416 if (!BinOp)
1417 return nullptr;
1418 Value *Op0 = BinOp->getOperand(0);
1419 Value *Op1 = BinOp->getOperand(1);
1420 Value *Def = nullptr;
1421 if (L->isLoopInvariant(Op0))
1422 Def = Op1;
1423 else if (L->isLoopInvariant(Op1))
1424 Def = Op0;
1425 return Def;
1426 };
1427
1428 // Look for the instruction that defines the induction via the
1429 // loop backedge.
1430 BasicBlock *Latch = L->getLoopLatch();
1431 if (!Latch)
1432 return false;
1433 Value *Val = PN->getIncomingValueForBlock(Latch);
1434 if (!Val)
1435 return false;
1436
1437 // Follow the def-use chain until the induction phi is reached.
1438 // If on the way we encounter a Value that has the same SCEV Expr as the
1439 // phi node, we can consider the instructions we visit from that point
1440 // as part of the cast-sequence that can be ignored.
1441 bool InCastSequence = false;
1442 auto *Inst = dyn_cast<Instruction>(Val);
1443 while (Val != PN) {
1444 // If we encountered a phi node other than PN, or if we left the loop,
1445 // we bail out.
1446 if (!Inst || !L->contains(Inst)) {
1447 return false;
1448 }
1449 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1450 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1451 InCastSequence = true;
1452 if (InCastSequence) {
1453 // Only the last instruction in the cast sequence is expected to have
1454 // uses outside the induction def-use chain.
1455 if (!CastInsts.empty())
1456 if (!Inst->hasOneUse())
1457 return false;
1458 CastInsts.push_back(Inst);
1459 }
1460 Val = getDef(Val);
1461 if (!Val)
1462 return false;
1463 Inst = dyn_cast<Instruction>(Val);
1464 }
1465
1466 return InCastSequence;
1467}
1468
1471 InductionDescriptor &D, bool Assume) {
1472 Type *PhiTy = Phi->getType();
1473
1474 // Handle integer and pointer inductions variables.
1475 // Now we handle also FP induction but not trying to make a
1476 // recurrent expression from the PHI node in-place.
1477
1478 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1479 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1480 return false;
1481
1482 if (PhiTy->isFloatingPointTy())
1483 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1484
1485 const SCEV *PhiScev = PSE.getSCEV(Phi);
1486 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1487
1488 // We need this expression to be an AddRecExpr.
1489 if (Assume && !AR)
1490 AR = PSE.getAsAddRec(Phi);
1491
1492 if (!AR) {
1493 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1494 return false;
1495 }
1496
1497 // Record any Cast instructions that participate in the induction update
1498 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1499 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1500 // only after enabling Assume with PSCEV, this means we may have encountered
1501 // cast instructions that required adding a runtime check in order to
1502 // guarantee the correctness of the AddRecurrence respresentation of the
1503 // induction.
1504 if (PhiScev != AR && SymbolicPhi) {
1506 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1507 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1508 }
1509
1510 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1511}
1512
1514 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1515 InductionDescriptor &D, const SCEV *Expr,
1516 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1517 Type *PhiTy = Phi->getType();
1518 // isSCEVable returns true for integer and pointer types.
1519 if (!SE->isSCEVable(PhiTy))
1520 return false;
1521
1522 // Check that the PHI is consecutive.
1523 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1524 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1525
1526 if (!AR) {
1527 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1528 return false;
1529 }
1530
1531 if (AR->getLoop() != TheLoop) {
1532 // FIXME: We should treat this as a uniform. Unfortunately, we
1533 // don't currently know how to handled uniform PHIs.
1534 LLVM_DEBUG(
1535 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
1536 return false;
1537 }
1538
1539 // This function assumes that InductionPhi is called only on Phi nodes
1540 // present inside loop headers. Check for the same, and throw an assert if
1541 // the current Phi is not present inside the loop header.
1542 assert(Phi->getParent() == AR->getLoop()->getHeader()
1543 && "Invalid Phi node, not present in loop header");
1544
1545 Value *StartValue =
1546 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
1547
1548 BasicBlock *Latch = AR->getLoop()->getLoopLatch();
1549 if (!Latch)
1550 return false;
1551
1552 const SCEV *Step = AR->getStepRecurrence(*SE);
1553 // Calculate the pointer stride and check if it is consecutive.
1554 // The stride may be a constant or a loop invariant integer value.
1555 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
1556 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
1557 return false;
1558
1559 if (PhiTy->isIntegerTy()) {
1560 BinaryOperator *BOp =
1561 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1562 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1563 CastsToIgnore);
1564 return true;
1565 }
1566
1567 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1568
1569 // This allows induction variables w/non-constant steps.
1570 D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
1571 return true;
1572}
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl< Instruction * > &CastInsts)
This function is called when we suspect that the update-chain of a phi node (whose symbolic SCEV expr...
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, Type *RecurrenceType, SmallPtrSetImpl< Instruction * > &Casts, unsigned &MinWidthCastToRecurTy)
Collect cast instructions that can be ignored in the vectorizer's cost model, given a reduction exit ...
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi)
static Instruction * lookThroughAnd(PHINode *Phi, Type *&RT, SmallPtrSetImpl< Instruction * > &Visited, SmallPtrSetImpl< Instruction * > &CI)
Determines if Phi may have been type-promoted.
static std::pair< Type *, bool > computeRecurrenceType(Instruction *Exit, DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT)
Compute the minimal bit width needed to represent a reduction whose exit instruction is given by Exit...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:533
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BinaryOps getOpcode() const
Definition: InstrTypes.h:370
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
This class represents a range of values.
Definition: ConstantRange.h:47
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
Definition: ConstantRange.h:84
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
bool noSignedZeros() const
Definition: FMF.h:68
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
void setNoNaNs(bool B=true)
Definition: FMF.h:79
bool noNaNs() const
Definition: FMF.h:66
static FastMathFlags getFast()
Definition: FMF.h:51
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D)
Returns true if Phi is a floating point induction in the loop L.
InductionDescriptor()=default
Default constructor - creates an invalid induction.
ConstantInt * getConstIntStepValue() const
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
This POD struct holds information about a potential recurrence operation.
Definition: IVDescriptors.h:94
Instruction * getPatternInst() const
Instruction * getExactFPMathInst() const
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
static bool isFPMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
unsigned getOpcode() const
static bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl< Instruction * > &Insts, unsigned MaxNumUses)
Returns true if instruction I has multiple uses in Insts.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
Returns true if all uses of the instruction I is within the Set.
SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static InstDesc isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi, Instruction *I, ScalarEvolution &SE)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I)
Returns a struct describing if the instruction is a Select(FCmp(X, Y), (Z = X op PHINode),...
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, FastMathFlags FuncFMF, ScalarEvolution *SE)
Returns a struct describing if the instruction 'I' can be a recurrence variable of type 'Kind' for a ...
static bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind, const InstDesc &Prev)
Returns a struct describing if the instruction is a llvm.
static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction of type Kind and adds it to the RecurrenceDescriptor.
static bool isIntegerRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer kind.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getUnknown(Value *V)
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Value * getValueOperand()
Definition: Instructions.h:378
Value * getPointerOperand()
Definition: Instructions.h:381
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
const ParentTy * getParent() const
Definition: ilist_node.h:32
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:826
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > > m_OrdOrUnordFMin(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > > m_OrdOrUnordFMax(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point maximum function.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ IFindLastIV
FindLast reduction with select(icmp(),x,y) where one of (x,y) is increasing loop induction,...
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ None
Not a recurrence.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ FFindLastIV
FindLast reduction with select(fcmp(),x,y) where one of (x,y) is increasing loop induction,...
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Add
Sum of integers.
@ FAdd
Sum of floats.
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return the number of times the sign bit of the register is replicated into the other bits.
static bool isMinOrMax(SelectPatternFlavor SPF)
When implementing this min/max pattern as fcmp; select, does the fcmp have to be ordered?