LLVM 17.0.0git
IVDescriptors.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
19#include "llvm/IR/Dominators.h"
21#include "llvm/IR/Module.h"
23#include "llvm/IR/ValueHandle.h"
24#include "llvm/Support/Debug.h"
26
27#include <set>
28
29using namespace llvm;
30using namespace llvm::PatternMatch;
31
32#define DEBUG_TYPE "iv-descriptors"
33
36 for (const Use &Use : I->operands())
37 if (!Set.count(dyn_cast<Instruction>(Use)))
38 return false;
39 return true;
40}
41
43 switch (Kind) {
44 default:
45 break;
46 case RecurKind::Add:
47 case RecurKind::Mul:
48 case RecurKind::Or:
49 case RecurKind::And:
50 case RecurKind::Xor:
51 case RecurKind::SMax:
52 case RecurKind::SMin:
53 case RecurKind::UMax:
54 case RecurKind::UMin:
57 return true;
58 }
59 return false;
60}
61
63 return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
64}
65
66/// Determines if Phi may have been type-promoted. If Phi has a single user
67/// that ANDs the Phi with a type mask, return the user. RT is updated to
68/// account for the narrower bit width represented by the mask, and the AND
69/// instruction is added to CI.
73 if (!Phi->hasOneUse())
74 return Phi;
75
76 const APInt *M = nullptr;
77 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
78
79 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
80 // with a new integer type of the corresponding bit width.
81 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
82 int32_t Bits = (*M + 1).exactLogBase2();
83 if (Bits > 0) {
84 RT = IntegerType::get(Phi->getContext(), Bits);
85 Visited.insert(Phi);
86 CI.insert(J);
87 return J;
88 }
89 }
90 return Phi;
91}
92
93/// Compute the minimal bit width needed to represent a reduction whose exit
94/// instruction is given by Exit.
95static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
96 DemandedBits *DB,
98 DominatorTree *DT) {
99 bool IsSigned = false;
100 const DataLayout &DL = Exit->getModule()->getDataLayout();
101 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
102
103 if (DB) {
104 // Use the demanded bits analysis to determine the bits that are live out
105 // of the exit instruction, rounding up to the nearest power of two. If the
106 // use of demanded bits results in a smaller bit width, we know the value
107 // must be positive (i.e., IsSigned = false), because if this were not the
108 // case, the sign bit would have been demanded.
109 auto Mask = DB->getDemandedBits(Exit);
110 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
111 }
112
113 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
114 // If demanded bits wasn't able to limit the bit width, we can try to use
115 // value tracking instead. This can be the case, for example, if the value
116 // may be negative.
117 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
118 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
119 MaxBitWidth = NumTypeBits - NumSignBits;
120 KnownBits Bits = computeKnownBits(Exit, DL);
121 if (!Bits.isNonNegative()) {
122 // If the value is not known to be non-negative, we set IsSigned to true,
123 // meaning that we will use sext instructions instead of zext
124 // instructions to restore the original type.
125 IsSigned = true;
126 // Make sure at at least one sign bit is included in the result, so it
127 // will get properly sign-extended.
128 ++MaxBitWidth;
129 }
130 }
131 MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
132
133 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
134 IsSigned);
135}
136
137/// Collect cast instructions that can be ignored in the vectorizer's cost
138/// model, given a reduction exit value and the minimal type in which the
139// reduction can be represented. Also search casts to the recurrence type
140// to find the minimum width used by the recurrence.
141static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
142 Type *RecurrenceType,
144 unsigned &MinWidthCastToRecurTy) {
145
148 Worklist.push_back(Exit);
149 MinWidthCastToRecurTy = -1U;
150
151 while (!Worklist.empty()) {
152 Instruction *Val = Worklist.pop_back_val();
153 Visited.insert(Val);
154 if (auto *Cast = dyn_cast<CastInst>(Val)) {
155 if (Cast->getSrcTy() == RecurrenceType) {
156 // If the source type of a cast instruction is equal to the recurrence
157 // type, it will be eliminated, and should be ignored in the vectorizer
158 // cost model.
159 Casts.insert(Cast);
160 continue;
161 }
162 if (Cast->getDestTy() == RecurrenceType) {
163 // The minimum width used by the recurrence is found by checking for
164 // casts on its operands. The minimum width is used by the vectorizer
165 // when finding the widest type for in-loop reductions without any
166 // loads/stores.
167 MinWidthCastToRecurTy = std::min<unsigned>(
168 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
169 continue;
170 }
171 }
172 // Add all operands to the work list if they are loop-varying values that
173 // we haven't yet visited.
174 for (Value *O : cast<User>(Val)->operands())
175 if (auto *I = dyn_cast<Instruction>(O))
176 if (TheLoop->contains(I) && !Visited.count(I))
177 Worklist.push_back(I);
178 }
179}
180
181// Check if a given Phi node can be recognized as an ordered reduction for
182// vectorizing floating point operations without unsafe math.
183static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
184 Instruction *Exit, PHINode *Phi) {
185 // Currently only FAdd and FMulAdd are supported.
186 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
187 return false;
188
189 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
190 return false;
191
192 if (Kind == RecurKind::FMulAdd &&
194 return false;
195
196 // Ensure the exit instruction has only one user other than the reduction PHI
197 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
198 return false;
199
200 // The only pattern accepted is the one in which the reduction PHI
201 // is used as one of the operands of the exit instruction
202 auto *Op0 = Exit->getOperand(0);
203 auto *Op1 = Exit->getOperand(1);
204 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
205 return false;
206 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
207 return false;
208
209 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
210 << ", ExitInst: " << *Exit << "\n");
211
212 return true;
213}
214
216 PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
219 if (Phi->getNumIncomingValues() != 2)
220 return false;
221
222 // Reduction variables are only found in the loop header block.
223 if (Phi->getParent() != TheLoop->getHeader())
224 return false;
225
226 // Obtain the reduction start value from the value that comes from the loop
227 // preheader.
228 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
229
230 // ExitInstruction is the single value which is used outside the loop.
231 // We only allow for a single reduction value to be used outside the loop.
232 // This includes users of the reduction, variables (which form a cycle
233 // which ends in the phi node).
234 Instruction *ExitInstruction = nullptr;
235
236 // Variable to keep last visited store instruction. By the end of the
237 // algorithm this variable will be either empty or having intermediate
238 // reduction value stored in invariant address.
239 StoreInst *IntermediateStore = nullptr;
240
241 // Indicates that we found a reduction operation in our scan.
242 bool FoundReduxOp = false;
243
244 // We start with the PHI node and scan for all of the users of this
245 // instruction. All users must be instructions that can be used as reduction
246 // variables (such as ADD). We must have a single out-of-block user. The cycle
247 // must include the original PHI.
248 bool FoundStartPHI = false;
249
250 // To recognize min/max patterns formed by a icmp select sequence, we store
251 // the number of instruction we saw from the recognized min/max pattern,
252 // to make sure we only see exactly the two instructions.
253 unsigned NumCmpSelectPatternInst = 0;
254 InstDesc ReduxDesc(false, nullptr);
255
256 // Data used for determining if the recurrence has been type-promoted.
257 Type *RecurrenceType = Phi->getType();
259 unsigned MinWidthCastToRecurrenceType;
260 Instruction *Start = Phi;
261 bool IsSigned = false;
262
265
266 // Return early if the recurrence kind does not match the type of Phi. If the
267 // recurrence kind is arithmetic, we attempt to look through AND operations
268 // resulting from the type promotion performed by InstCombine. Vector
269 // operations are not limited to the legal integer widths, so we may be able
270 // to evaluate the reduction in the narrower width.
271 if (RecurrenceType->isFloatingPointTy()) {
273 return false;
274 } else if (RecurrenceType->isIntegerTy()) {
275 if (!isIntegerRecurrenceKind(Kind))
276 return false;
277 if (!isMinMaxRecurrenceKind(Kind))
278 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
279 } else {
280 // Pointer min/max may exist, but it is not supported as a reduction op.
281 return false;
282 }
283
284 Worklist.push_back(Start);
285 VisitedInsts.insert(Start);
286
287 // Start with all flags set because we will intersect this with the reduction
288 // flags from all the reduction operations.
290
291 // The first instruction in the use-def chain of the Phi node that requires
292 // exact floating point operations.
293 Instruction *ExactFPMathInst = nullptr;
294
295 // A value in the reduction can be used:
296 // - By the reduction:
297 // - Reduction operation:
298 // - One use of reduction value (safe).
299 // - Multiple use of reduction value (not safe).
300 // - PHI:
301 // - All uses of the PHI must be the reduction (safe).
302 // - Otherwise, not safe.
303 // - By instructions outside of the loop (safe).
304 // * One value may have several outside users, but all outside
305 // uses must be of the same value.
306 // - By store instructions with a loop invariant address (safe with
307 // the following restrictions):
308 // * If there are several stores, all must have the same address.
309 // * Final value should be stored in that loop invariant address.
310 // - By an instruction that is not part of the reduction (not safe).
311 // This is either:
312 // * An instruction type other than PHI or the reduction operation.
313 // * A PHI in the header other than the initial PHI.
314 while (!Worklist.empty()) {
315 Instruction *Cur = Worklist.pop_back_val();
316
317 // Store instructions are allowed iff it is the store of the reduction
318 // value to the same loop invariant memory location.
319 if (auto *SI = dyn_cast<StoreInst>(Cur)) {
320 if (!SE) {
321 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
322 << "Scalar Evolution Analysis\n");
323 return false;
324 }
325
326 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
327 // Check it is the same address as previous stores
328 if (IntermediateStore) {
329 const SCEV *OtherScev =
331
332 if (OtherScev != PtrScev) {
333 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
334 << "inside the loop: " << *SI->getPointerOperand()
335 << " and "
336 << *IntermediateStore->getPointerOperand() << '\n');
337 return false;
338 }
339 }
340
341 // Check the pointer is loop invariant
342 if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
343 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
344 << "inside the loop: " << *SI->getPointerOperand()
345 << '\n');
346 return false;
347 }
348
349 // IntermediateStore is always the last store in the loop.
351 continue;
352 }
353
354 // No Users.
355 // If the instruction has no users then this is a broken chain and can't be
356 // a reduction variable.
357 if (Cur->use_empty())
358 return false;
359
360 bool IsAPhi = isa<PHINode>(Cur);
361
362 // A header PHI use other than the original PHI.
363 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
364 return false;
365
366 // Reductions of instructions such as Div, and Sub is only possible if the
367 // LHS is the reduction variable.
368 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
369 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
370 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
371 return false;
372
373 // Any reduction instruction must be of one of the allowed kinds. We ignore
374 // the starting value (the Phi or an AND instruction if the Phi has been
375 // type-promoted).
376 if (Cur != Start) {
377 ReduxDesc =
378 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
379 ExactFPMathInst = ExactFPMathInst == nullptr
380 ? ReduxDesc.getExactFPMathInst()
381 : ExactFPMathInst;
382 if (!ReduxDesc.isRecurrence())
383 return false;
384 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
385 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
386 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
387 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {
388 // Accept FMF on either fcmp or select of a min/max idiom.
389 // TODO: This is a hack to work-around the fact that FMF may not be
390 // assigned/propagated correctly. If that problem is fixed or we
391 // standardize on fmin/fmax via intrinsics, this can be removed.
392 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
393 CurFMF |= FCmp->getFastMathFlags();
394 }
395 FMF &= CurFMF;
396 }
397 // Update this reduction kind if we matched a new instruction.
398 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
399 // state accurate while processing the worklist?
400 if (ReduxDesc.getRecKind() != RecurKind::None)
401 Kind = ReduxDesc.getRecKind();
402 }
403
404 bool IsASelect = isa<SelectInst>(Cur);
405
406 // A conditional reduction operation must only have 2 or less uses in
407 // VisitedInsts.
408 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
409 hasMultipleUsesOf(Cur, VisitedInsts, 2))
410 return false;
411
412 // A reduction operation must only have one use of the reduction value.
413 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
415 hasMultipleUsesOf(Cur, VisitedInsts, 1))
416 return false;
417
418 // All inputs to a PHI node must be a reduction value.
419 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
420 return false;
421
422 if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) &&
423 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
424 ++NumCmpSelectPatternInst;
425 if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) &&
426 (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
427 ++NumCmpSelectPatternInst;
428
429 // Check whether we found a reduction operator.
430 FoundReduxOp |= !IsAPhi && Cur != Start;
431
432 // Process users of current instruction. Push non-PHI nodes after PHI nodes
433 // onto the stack. This way we are going to have seen all inputs to PHI
434 // nodes once we get to them.
437 for (User *U : Cur->users()) {
438 Instruction *UI = cast<Instruction>(U);
439
440 // If the user is a call to llvm.fmuladd then the instruction can only be
441 // the final operand.
442 if (isFMulAddIntrinsic(UI))
443 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
444 return false;
445
446 // Check if we found the exit user.
447 BasicBlock *Parent = UI->getParent();
448 if (!TheLoop->contains(Parent)) {
449 // If we already know this instruction is used externally, move on to
450 // the next user.
451 if (ExitInstruction == Cur)
452 continue;
453
454 // Exit if you find multiple values used outside or if the header phi
455 // node is being used. In this case the user uses the value of the
456 // previous iteration, in which case we would loose "VF-1" iterations of
457 // the reduction operation if we vectorize.
458 if (ExitInstruction != nullptr || Cur == Phi)
459 return false;
460
461 // The instruction used by an outside user must be the last instruction
462 // before we feed back to the reduction phi. Otherwise, we loose VF-1
463 // operations on the value.
464 if (!is_contained(Phi->operands(), Cur))
465 return false;
466
467 ExitInstruction = Cur;
468 continue;
469 }
470
471 // Process instructions only once (termination). Each reduction cycle
472 // value must only be used once, except by phi nodes and min/max
473 // reductions which are represented as a cmp followed by a select.
474 InstDesc IgnoredVal(false, nullptr);
475 if (VisitedInsts.insert(UI).second) {
476 if (isa<PHINode>(UI)) {
477 PHIs.push_back(UI);
478 } else {
479 StoreInst *SI = dyn_cast<StoreInst>(UI);
480 if (SI && SI->getPointerOperand() == Cur) {
481 // Reduction variable chain can only be stored somewhere but it
482 // can't be used as an address.
483 return false;
484 }
485 NonPHIs.push_back(UI);
486 }
487 } else if (!isa<PHINode>(UI) &&
488 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
489 !isa<SelectInst>(UI)) ||
490 (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
491 !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal)
492 .isRecurrence() &&
493 !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
494 return false;
495
496 // Remember that we completed the cycle.
497 if (UI == Phi)
498 FoundStartPHI = true;
499 }
500 Worklist.append(PHIs.begin(), PHIs.end());
501 Worklist.append(NonPHIs.begin(), NonPHIs.end());
502 }
503
504 // This means we have seen one but not the other instruction of the
505 // pattern or more than just a select and cmp. Zero implies that we saw a
506 // llvm.min/max intrinsic, which is always OK.
507 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
508 NumCmpSelectPatternInst != 0)
509 return false;
510
511 if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
512 return false;
513
514 if (IntermediateStore) {
515 // Check that stored value goes to the phi node again. This way we make sure
516 // that the value stored in IntermediateStore is indeed the final reduction
517 // value.
519 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
520 << *IntermediateStore << '\n');
521 return false;
522 }
523
524 // If there is an exit instruction it's value should be stored in
525 // IntermediateStore
526 if (ExitInstruction &&
527 IntermediateStore->getValueOperand() != ExitInstruction) {
528 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
529 "store last calculated value of the reduction: "
530 << *IntermediateStore << '\n');
531 return false;
532 }
533
534 // If all uses are inside the loop (intermediate stores), then the
535 // reduction value after the loop will be the one used in the last store.
536 if (!ExitInstruction)
537 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
538 }
539
540 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
541 return false;
542
543 const bool IsOrdered =
544 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
545
546 if (Start != Phi) {
547 // If the starting value is not the same as the phi node, we speculatively
548 // looked through an 'and' instruction when evaluating a potential
549 // arithmetic reduction to determine if it may have been type-promoted.
550 //
551 // We now compute the minimal bit width that is required to represent the
552 // reduction. If this is the same width that was indicated by the 'and', we
553 // can represent the reduction in the smaller type. The 'and' instruction
554 // will be eliminated since it will essentially be a cast instruction that
555 // can be ignore in the cost model. If we compute a different type than we
556 // did when evaluating the 'and', the 'and' will not be eliminated, and we
557 // will end up with different kinds of operations in the recurrence
558 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
559 // the case.
560 //
561 // The vectorizer relies on InstCombine to perform the actual
562 // type-shrinking. It does this by inserting instructions to truncate the
563 // exit value of the reduction to the width indicated by RecurrenceType and
564 // then extend this value back to the original width. If IsSigned is false,
565 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
566 // used.
567 //
568 // TODO: We should not rely on InstCombine to rewrite the reduction in the
569 // smaller type. We should just generate a correctly typed expression
570 // to begin with.
571 Type *ComputedType;
572 std::tie(ComputedType, IsSigned) =
573 computeRecurrenceType(ExitInstruction, DB, AC, DT);
574 if (ComputedType != RecurrenceType)
575 return false;
576 }
577
578 // Collect cast instructions and the minimum width used by the recurrence.
579 // If the starting value is not the same as the phi node and the computed
580 // recurrence type is equal to the recurrence type, the recurrence expression
581 // will be represented in a narrower or wider type. If there are any cast
582 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
583 // Note that the 'and' instruction was already included in this list.
584 //
585 // TODO: A better way to represent this may be to tag in some way all the
586 // instructions that are a part of the reduction. The vectorizer cost
587 // model could then apply the recurrence type to these instructions,
588 // without needing a white list of instructions to ignore.
589 // This may also be useful for the inloop reductions, if it can be
590 // kept simple enough.
591 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
592 MinWidthCastToRecurrenceType);
593
594 // We found a reduction var if we have reached the original phi node and we
595 // only have a single instruction with out-of-loop users.
596
597 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
598 // is saved as part of the RecurrenceDescriptor.
599
600 // Save the description of this reduction variable.
601 RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
602 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
603 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
604 RedDes = RD;
605
606 return true;
607}
608
609// We are looking for loops that do something like this:
610// int r = 0;
611// for (int i = 0; i < n; i++) {
612// if (src[i] > 3)
613// r = 3;
614// }
615// where the reduction value (r) only has two states, in this example 0 or 3.
616// The generated LLVM IR for this type of loop will be like this:
617// for.body:
618// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
619// ...
620// %cmp = icmp sgt i32 %5, 3
621// %spec.select = select i1 %cmp, i32 3, i32 %r
622// ...
623// In general we can support vectorization of loops where 'r' flips between
624// any two non-constants, provided they are loop invariant. The only thing
625// we actually care about at the end of the loop is whether or not any lane
626// in the selected vector is different from the start value. The final
627// across-vector reduction after the loop simply involves choosing the start
628// value if nothing changed (0 in the example above) or the other selected
629// value (3 in the example above).
632 Instruction *I, InstDesc &Prev) {
633 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
634 // the select.
636 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
637 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
638 return InstDesc(Select, Prev.getRecKind());
639 }
640
641 // Only match select with single use cmp condition.
642 if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
643 m_Value())))
644 return InstDesc(false, I);
645
646 SelectInst *SI = cast<SelectInst>(I);
647 Value *NonPhi = nullptr;
648
649 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
650 NonPhi = SI->getFalseValue();
651 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
652 NonPhi = SI->getTrueValue();
653 else
654 return InstDesc(false, I);
655
656 // We are looking for selects of the form:
657 // select(cmp(), phi, loop_invariant) or
658 // select(cmp(), loop_invariant, phi)
659 if (!Loop->isLoopInvariant(NonPhi))
660 return InstDesc(false, I);
661
662 return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
664}
665
668 const InstDesc &Prev) {
669 assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&
670 "Expected a cmp or select or call instruction");
671 if (!isMinMaxRecurrenceKind(Kind))
672 return InstDesc(false, I);
673
674 // We must handle the select(cmp()) as a single instruction. Advance to the
675 // select.
677 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
678 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
679 return InstDesc(Select, Prev.getRecKind());
680 }
681
682 // Only match select with single use cmp condition, or a min/max intrinsic.
683 if (!isa<IntrinsicInst>(I) &&
685 m_Value())))
686 return InstDesc(false, I);
687
688 // Look for a min/max pattern.
689 if (match(I, m_UMin(m_Value(), m_Value())))
690 return InstDesc(Kind == RecurKind::UMin, I);
691 if (match(I, m_UMax(m_Value(), m_Value())))
692 return InstDesc(Kind == RecurKind::UMax, I);
693 if (match(I, m_SMax(m_Value(), m_Value())))
694 return InstDesc(Kind == RecurKind::SMax, I);
695 if (match(I, m_SMin(m_Value(), m_Value())))
696 return InstDesc(Kind == RecurKind::SMin, I);
697 if (match(I, m_OrdFMin(m_Value(), m_Value())))
698 return InstDesc(Kind == RecurKind::FMin, I);
699 if (match(I, m_OrdFMax(m_Value(), m_Value())))
700 return InstDesc(Kind == RecurKind::FMax, I);
701 if (match(I, m_UnordFMin(m_Value(), m_Value())))
702 return InstDesc(Kind == RecurKind::FMin, I);
703 if (match(I, m_UnordFMax(m_Value(), m_Value())))
704 return InstDesc(Kind == RecurKind::FMax, I);
705 if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
706 return InstDesc(Kind == RecurKind::FMin, I);
707 if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
708 return InstDesc(Kind == RecurKind::FMax, I);
709
710 return InstDesc(false, I);
711}
712
713/// Returns true if the select instruction has users in the compare-and-add
714/// reduction pattern below. The select instruction argument is the last one
715/// in the sequence.
716///
717/// %sum.1 = phi ...
718/// ...
719/// %cmp = fcmp pred %0, %CFP
720/// %add = fadd %0, %sum.1
721/// %sum.2 = select %cmp, %add, %sum.1
724 SelectInst *SI = dyn_cast<SelectInst>(I);
725 if (!SI)
726 return InstDesc(false, I);
727
728 CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
729 // Only handle single use cases for now.
730 if (!CI || !CI->hasOneUse())
731 return InstDesc(false, I);
732
733 Value *TrueVal = SI->getTrueValue();
734 Value *FalseVal = SI->getFalseValue();
735 // Handle only when either of operands of select instruction is a PHI
736 // node for now.
737 if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||
738 (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))
739 return InstDesc(false, I);
740
741 Instruction *I1 =
742 isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)
743 : dyn_cast<Instruction>(TrueVal);
744 if (!I1 || !I1->isBinaryOp())
745 return InstDesc(false, I);
746
747 Value *Op1, *Op2;
748 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
749 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
750 I1->isFast()) ||
751 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
752 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
753 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
754 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
755 return InstDesc(false, I);
756
757 Instruction *IPhi = isa<PHINode>(*Op1) ? dyn_cast<Instruction>(Op1)
758 : dyn_cast<Instruction>(Op2);
759 if (!IPhi || IPhi != FalseVal)
760 return InstDesc(false, I);
761
762 return InstDesc(true, SI);
763}
764
767 Instruction *I, RecurKind Kind,
768 InstDesc &Prev, FastMathFlags FuncFMF) {
769 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
770 switch (I->getOpcode()) {
771 default:
772 return InstDesc(false, I);
773 case Instruction::PHI:
774 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
775 case Instruction::Sub:
776 case Instruction::Add:
777 return InstDesc(Kind == RecurKind::Add, I);
778 case Instruction::Mul:
779 return InstDesc(Kind == RecurKind::Mul, I);
780 case Instruction::And:
781 return InstDesc(Kind == RecurKind::And, I);
782 case Instruction::Or:
783 return InstDesc(Kind == RecurKind::Or, I);
784 case Instruction::Xor:
785 return InstDesc(Kind == RecurKind::Xor, I);
786 case Instruction::FDiv:
787 case Instruction::FMul:
788 return InstDesc(Kind == RecurKind::FMul, I,
789 I->hasAllowReassoc() ? nullptr : I);
790 case Instruction::FSub:
791 case Instruction::FAdd:
792 return InstDesc(Kind == RecurKind::FAdd, I,
793 I->hasAllowReassoc() ? nullptr : I);
794 case Instruction::Select:
795 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
796 Kind == RecurKind::Add || Kind == RecurKind::Mul)
797 return isConditionalRdxPattern(Kind, I);
798 [[fallthrough]];
799 case Instruction::FCmp:
800 case Instruction::ICmp:
801 case Instruction::Call:
803 return isSelectCmpPattern(L, OrigPhi, I, Prev);
804 if (isIntMinMaxRecurrenceKind(Kind) ||
805 (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
806 (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
807 I->hasNoSignedZeros())) &&
809 return isMinMaxPattern(I, Kind, Prev);
810 else if (isFMulAddIntrinsic(I))
811 return InstDesc(Kind == RecurKind::FMulAdd, I,
812 I->hasAllowReassoc() ? nullptr : I);
813 return InstDesc(false, I);
814 }
815}
816
819 unsigned MaxNumUses) {
820 unsigned NumUses = 0;
821 for (const Use &U : I->operands()) {
822 if (Insts.count(dyn_cast<Instruction>(U)))
823 ++NumUses;
824 if (NumUses > MaxNumUses)
825 return true;
826 }
827
828 return false;
829}
830
832 RecurrenceDescriptor &RedDes,
834 DominatorTree *DT,
835 ScalarEvolution *SE) {
836 BasicBlock *Header = TheLoop->getHeader();
837 Function &F = *Header->getParent();
838 FastMathFlags FMF;
839 FMF.setNoNaNs(
840 F.getFnAttribute("no-nans-fp-math").getValueAsBool());
842 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
843
844 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
845 SE)) {
846 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
847 return true;
848 }
849 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
850 SE)) {
851 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
852 return true;
853 }
854 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
855 SE)) {
856 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
857 return true;
858 }
859 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
860 SE)) {
861 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
862 return true;
863 }
864 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
865 SE)) {
866 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
867 return true;
868 }
869 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,
870 SE)) {
871 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
872 return true;
873 }
874 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,
875 SE)) {
876 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
877 return true;
878 }
879 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,
880 SE)) {
881 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
882 return true;
883 }
884 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,
885 SE)) {
886 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
887 return true;
888 }
889 if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
890 DT, SE)) {
891 LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
892 << *Phi << "\n");
893 return true;
894 }
895 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
896 SE)) {
897 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
898 return true;
899 }
900 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
901 SE)) {
902 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
903 return true;
904 }
905 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,
906 SE)) {
907 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
908 return true;
909 }
910 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,
911 SE)) {
912 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
913 return true;
914 }
915 if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
916 DT, SE)) {
917 LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
918 << " PHI." << *Phi << "\n");
919 return true;
920 }
921 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
922 SE)) {
923 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
924 return true;
925 }
926 // Not a reduction of known type.
927 return false;
928}
929
931 PHINode *Phi, Loop *TheLoop,
933
934 // Ensure the phi node is in the loop header and has two incoming values.
935 if (Phi->getParent() != TheLoop->getHeader() ||
936 Phi->getNumIncomingValues() != 2)
937 return false;
938
939 // Ensure the loop has a preheader and a single latch block. The loop
940 // vectorizer will need the latch to set up the next iteration of the loop.
941 auto *Preheader = TheLoop->getLoopPreheader();
942 auto *Latch = TheLoop->getLoopLatch();
943 if (!Preheader || !Latch)
944 return false;
945
946 // Ensure the phi node's incoming blocks are the loop preheader and latch.
947 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
948 Phi->getBasicBlockIndex(Latch) < 0)
949 return false;
950
951 // Get the previous value. The previous value comes from the latch edge while
952 // the initial value comes from the preheader edge.
953 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
954
955 // If Previous is a phi in the header, go through incoming values from the
956 // latch until we find a non-phi value. Use this as the new Previous, all uses
957 // in the header will be dominated by the original phi, but need to be moved
958 // after the non-phi previous value.
960 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
961 if (PrevPhi->getParent() != Phi->getParent())
962 return false;
963 if (!SeenPhis.insert(PrevPhi).second)
964 return false;
965 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
966 }
967
968 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
969 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
970 return false;
971
972 // Ensure every user of the phi node (recursively) is dominated by the
973 // previous value. The dominance requirement ensures the loop vectorizer will
974 // not need to vectorize the initial value prior to the first iteration of the
975 // loop.
976 // TODO: Consider extending this sinking to handle memory instructions.
977
978 // We optimistically assume we can sink all users after Previous. Keep a set
979 // of instructions to sink after Previous ordered by dominance in the common
980 // basic block. It will be applied to SinkAfter if all users can be sunk.
981 auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
982 return A->comesBefore(B);
983 };
984 std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
985 CompareByComesBefore);
986
987 BasicBlock *PhiBB = Phi->getParent();
989 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
990 // Already sunk SinkCandidate.
991 if (SinkCandidate->getParent() == PhiBB &&
992 InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
993 return true;
994
995 // Cyclic dependence.
996 if (Previous == SinkCandidate)
997 return false;
998
999 if (DT->dominates(Previous,
1000 SinkCandidate)) // We already are good w/o sinking.
1001 return true;
1002
1003 if (SinkCandidate->getParent() != PhiBB ||
1004 SinkCandidate->mayHaveSideEffects() ||
1005 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1006 return false;
1007
1008 // Avoid sinking an instruction multiple times (if multiple operands are
1009 // fixed order recurrences) by sinking once - after the latest 'previous'
1010 // instruction.
1011 auto It = SinkAfter.find(SinkCandidate);
1012 if (It != SinkAfter.end()) {
1013 auto *OtherPrev = It->second;
1014 // Find the earliest entry in the 'sink-after' chain. The last entry in
1015 // the chain is the original 'Previous' for a recurrence handled earlier.
1016 auto EarlierIt = SinkAfter.find(OtherPrev);
1017 while (EarlierIt != SinkAfter.end()) {
1018 Instruction *EarlierInst = EarlierIt->second;
1019 EarlierIt = SinkAfter.find(EarlierInst);
1020 // Bail out if order has not been preserved.
1021 if (EarlierIt != SinkAfter.end() &&
1022 !DT->dominates(EarlierInst, OtherPrev))
1023 return false;
1024 OtherPrev = EarlierInst;
1025 }
1026 // Bail out if order has not been preserved.
1027 if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
1028 return false;
1029
1030 // SinkCandidate is already being sunk after an instruction after
1031 // Previous. Nothing left to do.
1032 if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
1033 return true;
1034
1035 // If there are other instructions to be sunk after SinkCandidate, remove
1036 // and re-insert SinkCandidate can break those instructions. Bail out for
1037 // simplicity.
1038 if (any_of(SinkAfter,
1039 [SinkCandidate](const std::pair<Instruction *, Instruction *> &P) {
1040 return P.second == SinkCandidate;
1041 }))
1042 return false;
1043
1044 // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
1045 // re-sunk to Previous, instead of sinking to OtherPrev. Remove
1046 // SinkCandidate from SinkAfter to ensure it's insert position is updated.
1047 SinkAfter.erase(SinkCandidate);
1048 }
1049
1050 // If we reach a PHI node that is not dominated by Previous, we reached a
1051 // header PHI. No need for sinking.
1052 if (isa<PHINode>(SinkCandidate))
1053 return true;
1054
1055 // Sink User tentatively and check its users
1056 InstrsToSink.insert(SinkCandidate);
1057 WorkList.push_back(SinkCandidate);
1058 return true;
1059 };
1060
1061 WorkList.push_back(Phi);
1062 // Try to recursively sink instructions and their users after Previous.
1063 while (!WorkList.empty()) {
1064 Instruction *Current = WorkList.pop_back_val();
1065 for (User *User : Current->users()) {
1066 if (!TryToPushSinkCandidate(cast<Instruction>(User)))
1067 return false;
1068 }
1069 }
1070
1071 // We can sink all users of Phi. Update the mapping.
1072 for (Instruction *I : InstrsToSink) {
1073 SinkAfter[I] = Previous;
1074 Previous = I;
1075 }
1076 return true;
1077}
1078
1079/// This function returns the identity element (or neutral element) for
1080/// the operation K.
1082 FastMathFlags FMF) const {
1083 switch (K) {
1084 case RecurKind::Xor:
1085 case RecurKind::Add:
1086 case RecurKind::Or:
1087 // Adding, Xoring, Oring zero to a number does not change it.
1088 return ConstantInt::get(Tp, 0);
1089 case RecurKind::Mul:
1090 // Multiplying a number by 1 does not change it.
1091 return ConstantInt::get(Tp, 1);
1092 case RecurKind::And:
1093 // AND-ing a number with an all-1 value does not change it.
1094 return ConstantInt::get(Tp, -1, true);
1095 case RecurKind::FMul:
1096 // Multiplying a number by 1 does not change it.
1097 return ConstantFP::get(Tp, 1.0L);
1098 case RecurKind::FMulAdd:
1099 case RecurKind::FAdd:
1100 // Adding zero to a number does not change it.
1101 // FIXME: Ideally we should not need to check FMF for FAdd and should always
1102 // use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0.
1103 // Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI
1104 // nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would
1105 // mean we can then remove the check for noSignedZeros() below (see D98963).
1106 if (FMF.noSignedZeros())
1107 return ConstantFP::get(Tp, 0.0L);
1108 return ConstantFP::get(Tp, -0.0L);
1109 case RecurKind::UMin:
1110 return ConstantInt::get(Tp, -1, true);
1111 case RecurKind::UMax:
1112 return ConstantInt::get(Tp, 0);
1113 case RecurKind::SMin:
1114 return ConstantInt::get(Tp,
1116 case RecurKind::SMax:
1117 return ConstantInt::get(Tp,
1119 case RecurKind::FMin:
1120 assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
1121 "nnan, nsz is expected to be set for FP min reduction.");
1122 return ConstantFP::getInfinity(Tp, false /*Negative*/);
1123 case RecurKind::FMax:
1124 assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
1125 "nnan, nsz is expected to be set for FP max reduction.");
1126 return ConstantFP::getInfinity(Tp, true /*Negative*/);
1129 return getRecurrenceStartValue();
1130 break;
1131 default:
1132 llvm_unreachable("Unknown recurrence kind");
1133 }
1134}
1135
1137 switch (Kind) {
1138 case RecurKind::Add:
1139 return Instruction::Add;
1140 case RecurKind::Mul:
1141 return Instruction::Mul;
1142 case RecurKind::Or:
1143 return Instruction::Or;
1144 case RecurKind::And:
1145 return Instruction::And;
1146 case RecurKind::Xor:
1147 return Instruction::Xor;
1148 case RecurKind::FMul:
1149 return Instruction::FMul;
1150 case RecurKind::FMulAdd:
1151 case RecurKind::FAdd:
1152 return Instruction::FAdd;
1153 case RecurKind::SMax:
1154 case RecurKind::SMin:
1155 case RecurKind::UMax:
1156 case RecurKind::UMin:
1158 return Instruction::ICmp;
1159 case RecurKind::FMax:
1160 case RecurKind::FMin:
1162 return Instruction::FCmp;
1163 default:
1164 llvm_unreachable("Unknown recurrence operation");
1165 }
1166}
1167
1170 SmallVector<Instruction *, 4> ReductionOperations;
1171 unsigned RedOp = getOpcode(Kind);
1172
1173 // Search down from the Phi to the LoopExitInstr, looking for instructions
1174 // with a single user of the correct type for the reduction.
1175
1176 // Note that we check that the type of the operand is correct for each item in
1177 // the chain, including the last (the loop exit value). This can come up from
1178 // sub, which would otherwise be treated as an add reduction. MinMax also need
1179 // to check for a pair of icmp/select, for which we use getNextInstruction and
1180 // isCorrectOpcode functions to step the right number of instruction, and
1181 // check the icmp/select pair.
1182 // FIXME: We also do not attempt to look through Select's yet, which might
1183 // be part of the reduction chain, or attempt to looks through And's to find a
1184 // smaller bitwidth. Subs are also currently not allowed (which are usually
1185 // treated as part of a add reduction) as they are expected to generally be
1186 // more expensive than out-of-loop reductions, and need to be costed more
1187 // carefully.
1188 unsigned ExpectedUses = 1;
1189 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
1190 ExpectedUses = 2;
1191
1192 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1193 for (auto *User : Cur->users()) {
1194 Instruction *UI = cast<Instruction>(User);
1195 if (isa<PHINode>(UI))
1196 continue;
1197 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
1198 // We are expecting a icmp/select pair, which we go to the next select
1199 // instruction if we can. We already know that Cur has 2 uses.
1200 if (isa<SelectInst>(UI))
1201 return UI;
1202 continue;
1203 }
1204 return UI;
1205 }
1206 return nullptr;
1207 };
1208 auto isCorrectOpcode = [&](Instruction *Cur) {
1209 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
1210 Value *LHS, *RHS;
1212 matchSelectPattern(Cur, LHS, RHS).Flavor);
1213 }
1214 // Recognize a call to the llvm.fmuladd intrinsic.
1215 if (isFMulAddIntrinsic(Cur))
1216 return true;
1217
1218 return Cur->getOpcode() == RedOp;
1219 };
1220
1221 // Attempt to look through Phis which are part of the reduction chain
1222 unsigned ExtraPhiUses = 0;
1223 Instruction *RdxInstr = LoopExitInstr;
1224 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
1225 if (ExitPhi->getNumIncomingValues() != 2)
1226 return {};
1227
1228 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
1229 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
1230
1231 Instruction *Chain = nullptr;
1232 if (Inc0 == Phi)
1233 Chain = Inc1;
1234 else if (Inc1 == Phi)
1235 Chain = Inc0;
1236 else
1237 return {};
1238
1239 RdxInstr = Chain;
1240 ExtraPhiUses = 1;
1241 }
1242
1243 // The loop exit instruction we check first (as a quick test) but add last. We
1244 // check the opcode is correct (and dont allow them to be Subs) and that they
1245 // have expected to have the expected number of uses. They will have one use
1246 // from the phi and one from a LCSSA value, no matter the type.
1247 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
1248 return {};
1249
1250 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1251 // for conditional reductions.
1252 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
1253 return {};
1254
1255 Instruction *Cur = getNextInstruction(Phi);
1256
1257 // Each other instruction in the chain should have the expected number of uses
1258 // and be the correct opcode.
1259 while (Cur != RdxInstr) {
1260 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1261 return {};
1262
1263 ReductionOperations.push_back(Cur);
1264 Cur = getNextInstruction(Cur);
1265 }
1266
1267 ReductionOperations.push_back(Cur);
1268 return ReductionOperations;
1269}
1270
1271InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1272 const SCEV *Step, BinaryOperator *BOp,
1273 Type *ElementType,
1275 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp),
1276 ElementType(ElementType) {
1277 assert(IK != IK_NoInduction && "Not an induction");
1278
1279 // Start value type should match the induction kind and the value
1280 // itself should not be null.
1281 assert(StartValue && "StartValue is null");
1282 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1283 "StartValue is not a pointer for pointer induction");
1284 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1285 "StartValue is not an integer for integer induction");
1286
1287 // Check the Step Value. It should be non-zero integer value.
1288 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1289 "Step value is zero");
1290
1291 assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
1292 "Step value should be constant for pointer induction");
1293 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1294 "StepValue is not an integer");
1295
1296 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1297 "StepValue is not FP for FpInduction");
1298 assert((IK != IK_FpInduction ||
1299 (InductionBinOp &&
1300 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1301 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1302 "Binary opcode should be specified for FP induction");
1303
1304 if (IK == IK_PtrInduction)
1305 assert(ElementType && "Pointer induction must have element type");
1306 else
1307 assert(!ElementType && "Non-pointer induction cannot have element type");
1308
1309 if (Casts) {
1310 for (auto &Inst : *Casts) {
1311 RedundantCasts.push_back(Inst);
1312 }
1313 }
1314}
1315
1317 if (isa<SCEVConstant>(Step))
1318 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
1319 return nullptr;
1320}
1321
1323 ScalarEvolution *SE,
1325
1326 // Here we only handle FP induction variables.
1327 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1328
1329 if (TheLoop->getHeader() != Phi->getParent())
1330 return false;
1331
1332 // The loop may have multiple entrances or multiple exits; we can analyze
1333 // this phi if it has a unique entry value and a unique backedge value.
1334 if (Phi->getNumIncomingValues() != 2)
1335 return false;
1336 Value *BEValue = nullptr, *StartValue = nullptr;
1337 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1338 BEValue = Phi->getIncomingValue(0);
1339 StartValue = Phi->getIncomingValue(1);
1340 } else {
1341 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1342 "Unexpected Phi node in the loop");
1343 BEValue = Phi->getIncomingValue(1);
1344 StartValue = Phi->getIncomingValue(0);
1345 }
1346
1347 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
1348 if (!BOp)
1349 return false;
1350
1351 Value *Addend = nullptr;
1352 if (BOp->getOpcode() == Instruction::FAdd) {
1353 if (BOp->getOperand(0) == Phi)
1354 Addend = BOp->getOperand(1);
1355 else if (BOp->getOperand(1) == Phi)
1356 Addend = BOp->getOperand(0);
1357 } else if (BOp->getOpcode() == Instruction::FSub)
1358 if (BOp->getOperand(0) == Phi)
1359 Addend = BOp->getOperand(1);
1360
1361 if (!Addend)
1362 return false;
1363
1364 // The addend should be loop invariant
1365 if (auto *I = dyn_cast<Instruction>(Addend))
1366 if (TheLoop->contains(I))
1367 return false;
1368
1369 // FP Step has unknown SCEV
1370 const SCEV *Step = SE->getUnknown(Addend);
1371 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1372 return true;
1373}
1374
1375/// This function is called when we suspect that the update-chain of a phi node
1376/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1377/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1378/// predicate P under which the SCEV expression for the phi can be the
1379/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1380/// cast instructions that are involved in the update-chain of this induction.
1381/// A caller that adds the required runtime predicate can be free to drop these
1382/// cast instructions, and compute the phi using \p AR (instead of some scev
1383/// expression with casts).
1384///
1385/// For example, without a predicate the scev expression can take the following
1386/// form:
1387/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1388///
1389/// It corresponds to the following IR sequence:
1390/// %for.body:
1391/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1392/// %casted_phi = "ExtTrunc i64 %x"
1393/// %add = add i64 %casted_phi, %step
1394///
1395/// where %x is given in \p PN,
1396/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1397/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1398/// several forms, for example, such as:
1399/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1400/// or:
1401/// ExtTrunc2: %t = shl %x, m
1402/// %casted_phi = ashr %t, m
1403///
1404/// If we are able to find such sequence, we return the instructions
1405/// we found, namely %casted_phi and the instructions on its use-def chain up
1406/// to the phi (not including the phi).
1408 const SCEVUnknown *PhiScev,
1409 const SCEVAddRecExpr *AR,
1410 SmallVectorImpl<Instruction *> &CastInsts) {
1411
1412 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1413 auto *PN = cast<PHINode>(PhiScev->getValue());
1414 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1415 const Loop *L = AR->getLoop();
1416
1417 // Find any cast instructions that participate in the def-use chain of
1418 // PhiScev in the loop.
1419 // FORNOW/TODO: We currently expect the def-use chain to include only
1420 // two-operand instructions, where one of the operands is an invariant.
1421 // createAddRecFromPHIWithCasts() currently does not support anything more
1422 // involved than that, so we keep the search simple. This can be
1423 // extended/generalized as needed.
1424
1425 auto getDef = [&](const Value *Val) -> Value * {
1426 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1427 if (!BinOp)
1428 return nullptr;
1429 Value *Op0 = BinOp->getOperand(0);
1430 Value *Op1 = BinOp->getOperand(1);
1431 Value *Def = nullptr;
1432 if (L->isLoopInvariant(Op0))
1433 Def = Op1;
1434 else if (L->isLoopInvariant(Op1))
1435 Def = Op0;
1436 return Def;
1437 };
1438
1439 // Look for the instruction that defines the induction via the
1440 // loop backedge.
1441 BasicBlock *Latch = L->getLoopLatch();
1442 if (!Latch)
1443 return false;
1444 Value *Val = PN->getIncomingValueForBlock(Latch);
1445 if (!Val)
1446 return false;
1447
1448 // Follow the def-use chain until the induction phi is reached.
1449 // If on the way we encounter a Value that has the same SCEV Expr as the
1450 // phi node, we can consider the instructions we visit from that point
1451 // as part of the cast-sequence that can be ignored.
1452 bool InCastSequence = false;
1453 auto *Inst = dyn_cast<Instruction>(Val);
1454 while (Val != PN) {
1455 // If we encountered a phi node other than PN, or if we left the loop,
1456 // we bail out.
1457 if (!Inst || !L->contains(Inst)) {
1458 return false;
1459 }
1460 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1461 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1462 InCastSequence = true;
1463 if (InCastSequence) {
1464 // Only the last instruction in the cast sequence is expected to have
1465 // uses outside the induction def-use chain.
1466 if (!CastInsts.empty())
1467 if (!Inst->hasOneUse())
1468 return false;
1469 CastInsts.push_back(Inst);
1470 }
1471 Val = getDef(Val);
1472 if (!Val)
1473 return false;
1474 Inst = dyn_cast<Instruction>(Val);
1475 }
1476
1477 return InCastSequence;
1478}
1479
1482 InductionDescriptor &D, bool Assume) {
1483 Type *PhiTy = Phi->getType();
1484
1485 // Handle integer and pointer inductions variables.
1486 // Now we handle also FP induction but not trying to make a
1487 // recurrent expression from the PHI node in-place.
1488
1489 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1490 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1491 return false;
1492
1493 if (PhiTy->isFloatingPointTy())
1494 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1495
1496 const SCEV *PhiScev = PSE.getSCEV(Phi);
1497 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1498
1499 // We need this expression to be an AddRecExpr.
1500 if (Assume && !AR)
1501 AR = PSE.getAsAddRec(Phi);
1502
1503 if (!AR) {
1504 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1505 return false;
1506 }
1507
1508 // Record any Cast instructions that participate in the induction update
1509 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1510 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1511 // only after enabling Assume with PSCEV, this means we may have encountered
1512 // cast instructions that required adding a runtime check in order to
1513 // guarantee the correctness of the AddRecurrence respresentation of the
1514 // induction.
1515 if (PhiScev != AR && SymbolicPhi) {
1517 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1518 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1519 }
1520
1521 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1522}
1523
1525 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1526 InductionDescriptor &D, const SCEV *Expr,
1527 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1528 Type *PhiTy = Phi->getType();
1529 // We only handle integer and pointer inductions variables.
1530 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
1531 return false;
1532
1533 // Check that the PHI is consecutive.
1534 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1535 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1536
1537 if (!AR) {
1538 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1539 return false;
1540 }
1541
1542 if (AR->getLoop() != TheLoop) {
1543 // FIXME: We should treat this as a uniform. Unfortunately, we
1544 // don't currently know how to handled uniform PHIs.
1545 LLVM_DEBUG(
1546 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
1547 return false;
1548 }
1549
1550 Value *StartValue =
1552
1553 BasicBlock *Latch = AR->getLoop()->getLoopLatch();
1554 if (!Latch)
1555 return false;
1556
1557 const SCEV *Step = AR->getStepRecurrence(*SE);
1558 // Calculate the pointer stride and check if it is consecutive.
1559 // The stride may be a constant or a loop invariant integer value.
1560 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
1561 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
1562 return false;
1563
1564 if (PhiTy->isIntegerTy()) {
1565 BinaryOperator *BOp =
1566 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1567 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1568 /* ElementType */ nullptr, CastsToIgnore);
1569 return true;
1570 }
1571
1572 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1573 // Pointer induction should be a constant.
1574 if (!ConstStep)
1575 return false;
1576
1577 // Always use i8 element type for opaque pointer inductions.
1578 PointerType *PtrTy = cast<PointerType>(PhiTy);
1579 Type *ElementType = PtrTy->isOpaque()
1580 ? Type::getInt8Ty(PtrTy->getContext())
1582 if (!ElementType->isSized())
1583 return false;
1584
1585 ConstantInt *CV = ConstStep->getValue();
1586 const DataLayout &DL = Phi->getModule()->getDataLayout();
1587 TypeSize TySize = DL.getTypeAllocSize(ElementType);
1588 // TODO: We could potentially support this for scalable vectors if we can
1589 // prove at compile time that the constant step is always a multiple of
1590 // the scalable type.
1591 if (TySize.isZero() || TySize.isScalable())
1592 return false;
1593
1594 int64_t Size = static_cast<int64_t>(TySize.getFixedValue());
1595 int64_t CVSize = CV->getSExtValue();
1596 if (CVSize % Size)
1597 return false;
1598 auto *StepValue =
1599 SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
1600 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue,
1601 /* BinOp */ nullptr, ElementType);
1602 return true;
1603}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl< Instruction * > &CastInsts)
This function is called when we suspect that the update-chain of a phi node (whose symbolic SCEV expr...
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, Type *RecurrenceType, SmallPtrSetImpl< Instruction * > &Casts, unsigned &MinWidthCastToRecurTy)
Collect cast instructions that can be ignored in the vectorizer's cost model, given a reduction exit ...
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi)
static Instruction * lookThroughAnd(PHINode *Phi, Type *&RT, SmallPtrSetImpl< Instruction * > &Visited, SmallPtrSetImpl< Instruction * > &CI)
Determines if Phi may have been type-promoted.
static std::pair< Type *, bool > computeRecurrenceType(Instruction *Exit, DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT)
Compute the minimal bit width needed to represent a reduction whose exit instruction is given by Exit...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:524
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
#define P(N)
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:708
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
static Constant * getInfinity(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1034
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:176
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:151
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
bool noSignedZeros() const
Definition: FMF.h:69
void setNoSignedZeros(bool B=true)
Definition: FMF.h:86
void setNoNaNs(bool B=true)
Definition: FMF.h:80
bool noNaNs() const
Definition: FMF.h:67
static FastMathFlags getFast()
Definition: FMF.h:52
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C / sizeof(elem).
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D)
Returns true if Phi is a floating point induction in the loop L.
InductionDescriptor()=default
Default constructor - creates an invalid induction.
ConstantInt * getConstIntStepValue() const
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
const BasicBlock * getParent() const
Definition: Instruction.h:90
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:331
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
BlockT * getHeader() const
Definition: LoopInfo.h:105
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
size_type count(const KeyT &Key) const
Definition: MapVector.h:145
iterator end()
Definition: MapVector.h:72
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:173
iterator find(const KeyT &Key)
Definition: MapVector.h:147
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Class to represent pointers.
Definition: DerivedTypes.h:632
bool isOpaque() const
Definition: DerivedTypes.h:673
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
This POD struct holds information about a potential recurrence operation.
Definition: IVDescriptors.h:86
Instruction * getPatternInst() const
Instruction * getExactFPMathInst() const
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
static bool isFPMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
unsigned getOpcode() const
static bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl< Instruction * > &Insts, unsigned MaxNumUses)
Returns true if instruction I has multiple uses in Insts.
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, MapVector< Instruction *, Instruction * > &SinkAfter, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
Returns true if all uses of the instruction I is within the Set.
TrackingVH< Value > getRecurrenceStartValue() const
SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I)
Returns a struct describing if the instruction is a Select(FCmp(X, Y), (Z = X op PHINode),...
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, FastMathFlags FuncFMF)
Returns a struct describing if the instruction 'I' can be a recurrence variable of type 'Kind' for a ...
static InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind, const InstDesc &Prev)
Returns a struct describing if the instruction is a llvm.
static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction of type Kind and adds it to the RecurrenceDescriptor.
static bool isIntegerRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer kind.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
static bool isSelectCmpRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getUnknown(Value *V)
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Value * getValueOperand()
Definition: Instructions.h:390
Value * getPointerOperand()
Definition: Instructions.h:393
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:425
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:304
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
use_iterator use_begin()
Definition: Value.h:360
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:152
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:148
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr bool isZero() const
Definition: TypeSize.h:151
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an 'unordered' floating point minimum function.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:997
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:716
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:985
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an 'unordered' floating point maximum function.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an 'ordered' floating point maximum function.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an 'ordered' floating point minimum function.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:991
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:306
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:35
@ UMin
Unisgned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ SelectFCmp
Integer select(fcmp(),x,y) where one of (x,y) is loop invariant.
@ Mul
Product of integers.
@ None
Not a recurrence.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Fused multiply-add of floats (a * b + c).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ SelectICmp
Integer select(icmp(),x,y) where one of (x,y) is loop invariant.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return the number of times the sign bit of the register is replicated into the other bits.
static bool isMinOrMax(SelectPatternFlavor SPF)
When implementing this min/max pattern as fcmp; select, does the fcmp have to be ordered?