LLVM 22.0.0git
HashRecognize.cpp
Go to the documentation of this file.
1//===- HashRecognize.cpp ----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The HashRecognize analysis recognizes unoptimized polynomial hash functions
10// with operations over a Galois field of characteristic 2, also called binary
11// fields, or GF(2^n): this class of hash functions can be optimized using a
12// lookup-table-driven implementation, or with target-specific instructions.
13// Examples:
14//
15// 1. Cyclic redundancy check (CRC), which is a polynomial division in GF(2).
16// 2. Rabin fingerprint, a component of the Rabin-Karp algorithm, which is a
17// rolling hash polynomial division in GF(2).
18// 3. Rijndael MixColumns, a step in AES computation, which is a polynomial
19// multiplication in GF(2^3).
20// 4. GHASH, the authentication mechanism in AES Galois/Counter Mode (GCM),
21// which is a polynomial evaluation in GF(2^128).
22//
23// All of them use an irreducible generating polynomial of degree m,
24//
25// c_m * x^m + c_(m-1) * x^(m-1) + ... + c_0 * x^0
26//
27// where each coefficient c is can take values in GF(2^n), where 2^n is termed
28// the order of the Galois field. For GF(2), each coefficient can take values
29// either 0 or 1, and the polynomial is simply represented by m+1 bits,
30// corresponding to the coefficients. The different variants of CRC are named by
31// degree of generating polynomial used: so CRC-32 would use a polynomial of
32// degree 32.
33//
34// The reason algorithms on GF(2^n) can be optimized with a lookup-table is the
35// following: in such fields, polynomial addition and subtraction are identical
36// and equivalent to XOR, polynomial multiplication is an AND, and polynomial
37// division is identity: the XOR and AND operations in unoptimized
38// implementations are performed bit-wise, and can be optimized to be performed
39// chunk-wise, by interleaving copies of the generating polynomial, and storing
40// the pre-computed values in a table.
41//
42// A generating polynomial of m bits always has the MSB set, so we usually
43// omit it. An example of a 16-bit polynomial is the CRC-16-CCITT polynomial:
44//
45// (x^16) + x^12 + x^5 + 1 = (1) 0001 0000 0010 0001 = 0x1021
46//
47// Transmissions are either in big-endian or little-endian form, and hash
48// algorithms are written according to this. For example, IEEE 802 and RS-232
49// specify little-endian transmission.
50//
51//===----------------------------------------------------------------------===//
52//
53// At the moment, we only recognize the CRC algorithm.
54// Documentation on CRC32 from the kernel:
55// https://www.kernel.org/doc/Documentation/crc32.txt
56//
57//
58//===----------------------------------------------------------------------===//
59
61#include "llvm/ADT/APInt.h"
69
70using namespace llvm;
71using namespace PatternMatch;
72using namespace SCEVPatternMatch;
73
74#define DEBUG_TYPE "hash-recognize"
75
76// KnownBits for a PHI node. There are at most two PHI nodes, corresponding to
77// the Simple Recurrence and Conditional Recurrence. The IndVar PHI is not
78// relevant.
80
81// A pair of a PHI node along with its incoming value from within a loop.
82using PhiStepPair = std::pair<const PHINode *, const Instruction *>;
83
84/// A much simpler version of ValueTracking, in that it computes KnownBits of
85/// values, except that it computes the evolution of KnownBits in a loop with a
86/// given trip count, and predication is specialized for a significant-bit
87/// check.
89 const unsigned TripCount;
90 const bool ByteOrderSwapped;
91 APInt GenPoly;
92 StringRef ErrStr;
93
94 // Compute the KnownBits of a BinaryOperator.
95 KnownBits computeBinOp(const BinaryOperator *I);
96
97 // Compute the KnownBits of an Instruction.
98 KnownBits computeInstr(const Instruction *I);
99
100 // Compute the KnownBits of a Value.
101 KnownBits compute(const Value *V);
102
103public:
104 // ValueEvolution is meant to be constructed with the TripCount of the loop,
105 // and a boolean indicating whether the polynomial algorithm is big-endian
106 // (for the significant-bit check).
107 ValueEvolution(unsigned TripCount, bool ByteOrderSwapped);
108
109 // Given a list of PHI nodes along with their incoming value from within the
110 // loop, computeEvolutions computes the KnownBits of each of the PHI nodes on
111 // the final iteration. Returns true on success and false on error.
112 bool computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions);
113
114 // In case ValueEvolution encounters an error, this is meant to be used for a
115 // precise error message.
116 StringRef getError() const { return ErrStr; }
117
118 // A set of Instructions visited by ValueEvolution. The only unvisited
119 // instructions will be ones not on the use-def chain of the PHIs' evolutions.
121
122 // The computed KnownBits for each PHI node, which is populated after
123 // computeEvolutions is called.
125};
126
127ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped)
128 : TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {}
129
130KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I) {
131 KnownBits KnownL(compute(I->getOperand(0)));
132 KnownBits KnownR(compute(I->getOperand(1)));
133
134 switch (I->getOpcode()) {
135 case Instruction::BinaryOps::And:
136 return KnownL & KnownR;
137 case Instruction::BinaryOps::Or:
138 return KnownL | KnownR;
139 case Instruction::BinaryOps::Xor:
140 return KnownL ^ KnownR;
141 case Instruction::BinaryOps::Shl: {
142 auto *OBO = cast<OverflowingBinaryOperator>(I);
143 return KnownBits::shl(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
144 OBO->hasNoSignedWrap());
145 }
146 case Instruction::BinaryOps::LShr:
147 return KnownBits::lshr(KnownL, KnownR);
148 case Instruction::BinaryOps::AShr:
149 return KnownBits::ashr(KnownL, KnownR);
150 case Instruction::BinaryOps::Add: {
151 auto *OBO = cast<OverflowingBinaryOperator>(I);
152 return KnownBits::add(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
153 OBO->hasNoSignedWrap());
154 }
155 case Instruction::BinaryOps::Sub: {
156 auto *OBO = cast<OverflowingBinaryOperator>(I);
157 return KnownBits::sub(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
158 OBO->hasNoSignedWrap());
159 }
160 case Instruction::BinaryOps::Mul: {
161 Value *Op0 = I->getOperand(0);
162 Value *Op1 = I->getOperand(1);
163 bool SelfMultiply = Op0 == Op1 && isGuaranteedNotToBeUndef(Op0);
164 return KnownBits::mul(KnownL, KnownR, SelfMultiply);
165 }
166 case Instruction::BinaryOps::UDiv:
167 return KnownBits::udiv(KnownL, KnownR);
168 case Instruction::BinaryOps::SDiv:
169 return KnownBits::sdiv(KnownL, KnownR);
170 case Instruction::BinaryOps::URem:
171 return KnownBits::urem(KnownL, KnownR);
172 case Instruction::BinaryOps::SRem:
173 return KnownBits::srem(KnownL, KnownR);
174 default:
175 ErrStr = "Unknown BinaryOperator";
176 unsigned BitWidth = I->getType()->getScalarSizeInBits();
177 return {BitWidth};
178 }
179}
180
181KnownBits ValueEvolution::computeInstr(const Instruction *I) {
182 unsigned BitWidth = I->getType()->getScalarSizeInBits();
183
184 // computeInstr is the only entry-point that needs to update the Visited set.
186
187 // We look up in the map that contains the KnownBits of the PHI from the
188 // previous iteration.
189 if (const PHINode *P = dyn_cast<PHINode>(I))
190 return KnownPhis.lookup_or(P, BitWidth);
191
192 // Compute the KnownBits for a Select(Cmp()), forcing it to take the branch
193 // that is predicated on the (least|most)-significant-bit check.
194 CmpPredicate Pred;
195 Value *L, *R;
196 Instruction *TV, *FV;
197 if (match(I, m_Select(m_ICmp(Pred, m_Value(L), m_Value(R)), m_Instruction(TV),
198 m_Instruction(FV)))) {
199 Visited.insert(cast<Instruction>(I->getOperand(0)));
200
201 // We need to check LCR against [0, 2) in the little-endian case, because
202 // the RCR check is insufficient: it is simply [0, 1).
203 if (!ByteOrderSwapped) {
204 KnownBits KnownL = compute(L);
205 unsigned ICmpBW = KnownL.getBitWidth();
206 auto LCR = ConstantRange::fromKnownBits(KnownL, false);
207 auto CheckLCR = ConstantRange(APInt::getZero(ICmpBW), APInt(ICmpBW, 2));
208 if (LCR != CheckLCR) {
209 ErrStr = "Bad LHS of significant-bit-check";
210 return {BitWidth};
211 }
212 }
213
214 // Check that the predication is on (most|least) significant bit.
215 KnownBits KnownR = compute(R);
216 unsigned ICmpBW = KnownR.getBitWidth();
217 auto RCR = ConstantRange::fromKnownBits(KnownR, false);
218 auto AllowedR = ConstantRange::makeAllowedICmpRegion(Pred, RCR);
219 ConstantRange CheckRCR(APInt::getZero(ICmpBW),
220 ByteOrderSwapped ? APInt::getSignedMinValue(ICmpBW)
221 : APInt(ICmpBW, 1));
222
223 // We only compute KnownBits of either TV or FV, as the other value would
224 // just be a bit-shift as checked by isBigEndianBitShift.
225 if (AllowedR == CheckRCR) {
226 Visited.insert(FV);
227 return compute(TV);
228 }
229 if (AllowedR.inverse() == CheckRCR) {
230 Visited.insert(TV);
231 return compute(FV);
232 }
233
234 ErrStr = "Bad RHS of significant-bit-check";
235 return {BitWidth};
236 }
237
238 if (auto *BO = dyn_cast<BinaryOperator>(I))
239 return computeBinOp(BO);
240
241 switch (I->getOpcode()) {
242 case Instruction::CastOps::Trunc:
243 return compute(I->getOperand(0)).trunc(BitWidth);
244 case Instruction::CastOps::ZExt:
245 return compute(I->getOperand(0)).zext(BitWidth);
246 case Instruction::CastOps::SExt:
247 return compute(I->getOperand(0)).sext(BitWidth);
248 default:
249 ErrStr = "Unknown Instruction";
250 return {BitWidth};
251 }
252}
253
254KnownBits ValueEvolution::compute(const Value *V) {
255 if (auto *CI = dyn_cast<ConstantInt>(V))
256 return KnownBits::makeConstant(CI->getValue());
257
258 if (auto *I = dyn_cast<Instruction>(V))
259 return computeInstr(I);
260
261 ErrStr = "Unknown Value";
262 unsigned BitWidth = V->getType()->getScalarSizeInBits();
263 return {BitWidth};
264}
265
267 for (unsigned I = 0; I < TripCount; ++I)
268 for (auto [Phi, Step] : PhiEvolutions)
269 KnownPhis.emplace_or_assign(Phi, computeInstr(Step));
270
271 return ErrStr.empty();
272}
273
274/// A structure that can hold either a Simple Recurrence or a Conditional
275/// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand
276/// of the BO, while in a Conditional Recurrence, it is a SelectInst.
278 const Loop &L;
279 const PHINode *Phi = nullptr;
280 BinaryOperator *BO = nullptr;
281 Value *Start = nullptr;
282 Value *Step = nullptr;
283 std::optional<APInt> ExtraConst;
284
285 RecurrenceInfo(const Loop &L) : L(L) {}
286 operator bool() const { return BO; }
287
288 void print(raw_ostream &OS, unsigned Indent = 0) const {
289 OS.indent(Indent) << "Phi: ";
290 Phi->print(OS);
291 OS << "\n";
292 OS.indent(Indent) << "BinaryOperator: ";
293 BO->print(OS);
294 OS << "\n";
295 OS.indent(Indent) << "Start: ";
296 Start->print(OS);
297 OS << "\n";
298 OS.indent(Indent) << "Step: ";
299 Step->print(OS);
300 OS << "\n";
301 if (ExtraConst) {
302 OS.indent(Indent) << "ExtraConst: ";
303 ExtraConst->print(OS, false);
304 OS << "\n";
305 }
306 }
307
308#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
309 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
310#endif
311
312 bool matchSimpleRecurrence(const PHINode *P);
314 const PHINode *P,
315 Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd);
316
317private:
318 BinaryOperator *digRecurrence(
319 Instruction *V,
320 Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd);
321};
322
323/// Wraps llvm::matchSimpleRecurrence. Match a simple first order recurrence
324/// cycle of the form:
325///
326/// loop:
327/// %rec = phi [%start, %entry], [%BO, %loop]
328/// ...
329/// %BO = binop %rec, %step
330///
331/// or
332///
333/// loop:
334/// %rec = phi [%start, %entry], [%BO, %loop]
335/// ...
336/// %BO = binop %step, %rec
337///
339 Phi = P;
341}
342
343/// Digs for a recurrence starting with \p V hitting the PHI node in a use-def
344/// chain. Used by matchConditionalRecurrence.
346RecurrenceInfo::digRecurrence(Instruction *V,
347 Instruction::BinaryOps BOWithConstOpToMatch) {
349 Worklist.push_back(V);
350 while (!Worklist.empty()) {
351 Instruction *I = Worklist.pop_back_val();
352
353 // Don't add a PHI's operands to the Worklist.
354 if (isa<PHINode>(I))
355 continue;
356
357 // Find a recurrence over a BinOp, by matching either of its operands
358 // with with the PHINode.
360 return cast<BinaryOperator>(I);
361
362 // Bind to ExtraConst, if we match exactly one.
363 if (I->getOpcode() == BOWithConstOpToMatch) {
364 if (ExtraConst)
365 return nullptr;
366 const APInt *C = nullptr;
367 if (match(I, m_c_BinOp(m_APInt(C), m_Value())))
368 ExtraConst = *C;
369 }
370
371 // Continue along the use-def chain.
372 for (Use &U : I->operands())
373 if (auto *UI = dyn_cast<Instruction>(U))
374 if (L.contains(UI))
375 Worklist.push_back(UI);
376 }
377 return nullptr;
378}
379
380/// A Conditional Recurrence is a recurrence of the form:
381///
382/// loop:
383/// %rec = phi [%start, %entry], [%step, %loop]
384/// ...
385/// %step = select _, %tv, %fv
386///
387/// where %tv and %fv ultimately end up using %rec via the same %BO instruction,
388/// after digging through the use-def chain.
389///
390/// ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when digging
391/// the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is matched,
392/// and ExtraConst is a constant operand of that BinOp. This peculiarity exists,
393/// because in a CRC algorithm, the \p BOWithConstOpToMatch is an XOR, and the
394/// ExtraConst ends up being the generating polynomial.
396 const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch) {
397 Phi = P;
398 if (Phi->getNumIncomingValues() != 2)
399 return false;
400
401 for (unsigned Idx = 0; Idx != 2; ++Idx) {
402 Value *FoundStep = Phi->getIncomingValue(Idx);
403 Value *FoundStart = Phi->getIncomingValue(!Idx);
404
405 Instruction *TV, *FV;
406 if (!match(FoundStep,
408 continue;
409
410 // For a conditional recurrence, both the true and false values of the
411 // select must ultimately end up in the same recurrent BinOp.
412 BinaryOperator *FoundBO = digRecurrence(TV, BOWithConstOpToMatch);
413 BinaryOperator *AltBO = digRecurrence(FV, BOWithConstOpToMatch);
414 if (!FoundBO || FoundBO != AltBO)
415 return false;
416
417 if (BOWithConstOpToMatch != Instruction::BinaryOpsEnd && !ExtraConst) {
418 LLVM_DEBUG(dbgs() << "HashRecognize: Unable to match single BinaryOp "
419 "with constant in conditional recurrence\n");
420 return false;
421 }
422
423 BO = FoundBO;
424 Start = FoundStart;
425 Step = FoundStep;
426 return true;
427 }
428 return false;
429}
430
431/// Iterates over all the phis in \p LoopLatch, and attempts to extract a
432/// Conditional Recurrence and an optional Simple Recurrence.
433static std::optional<std::pair<RecurrenceInfo, RecurrenceInfo>>
434getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) {
435 auto Phis = LoopLatch->phis();
436 unsigned NumPhis = std::distance(Phis.begin(), Phis.end());
437 if (NumPhis != 2 && NumPhis != 3)
438 return {};
439
440 RecurrenceInfo SimpleRecurrence(L);
441 RecurrenceInfo ConditionalRecurrence(L);
442 for (PHINode &P : Phis) {
443 if (&P == IndVar)
444 continue;
445 if (!SimpleRecurrence)
446 SimpleRecurrence.matchSimpleRecurrence(&P);
447 if (!ConditionalRecurrence)
448 ConditionalRecurrence.matchConditionalRecurrence(
449 &P, Instruction::BinaryOps::Xor);
450 }
451 if (NumPhis == 3 && (!SimpleRecurrence || !ConditionalRecurrence))
452 return {};
453 return std::make_pair(SimpleRecurrence, ConditionalRecurrence);
454}
455
456PolynomialInfo::PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS,
457 Value *ComputedValue, bool ByteOrderSwapped,
458 Value *LHSAux)
459 : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue),
460 ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {}
461
462/// In the big-endian case, checks the bottom N bits against CheckFn, and that
463/// the rest are unknown. In the little-endian case, checks the top N bits
464/// against CheckFn, and that the rest are unknown. Callers usually call this
465/// function with N = TripCount, and CheckFn checking that the remainder bits of
466/// the CRC polynomial division are zero.
467static bool checkExtractBits(const KnownBits &Known, unsigned N,
468 function_ref<bool(const KnownBits &)> CheckFn,
469 bool ByteOrderSwapped) {
470 // Check that the entire thing is a constant.
471 if (N == Known.getBitWidth())
472 return CheckFn(Known.extractBits(N, 0));
473
474 // Check that the {top, bottom} N bits are not unknown and that the {bottom,
475 // top} N bits are known.
476 unsigned BitPos = ByteOrderSwapped ? 0 : Known.getBitWidth() - N;
477 unsigned SwappedBitPos = ByteOrderSwapped ? N : 0;
478 return CheckFn(Known.extractBits(N, BitPos)) &&
479 Known.extractBits(Known.getBitWidth() - N, SwappedBitPos).isUnknown();
480}
481
482/// Generate a lookup table of 256 entries by interleaving the generating
483/// polynomial. The optimization technique of table-lookup for CRC is also
484/// called the Sarwate algorithm.
486 bool ByteOrderSwapped) {
487 unsigned BW = GenPoly.getBitWidth();
488 CRCTable Table;
489 Table[0] = APInt::getZero(BW);
490
491 if (ByteOrderSwapped) {
492 APInt CRCInit = APInt::getSignedMinValue(BW);
493 for (unsigned I = 1; I < 256; I <<= 1) {
494 CRCInit = CRCInit.shl(1) ^
495 (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(BW));
496 for (unsigned J = 0; J < I; ++J)
497 Table[I + J] = CRCInit ^ Table[J];
498 }
499 return Table;
500 }
501
502 APInt CRCInit(BW, 1);
503 for (unsigned I = 128; I; I >>= 1) {
504 CRCInit = CRCInit.lshr(1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(BW));
505 for (unsigned J = 0; J < 256; J += (I << 1))
506 Table[I + J] = CRCInit ^ Table[J];
507 }
508 return Table;
509}
510
511/// Checks that \p P1 and \p P2 are used together in an XOR in the use-def chain
512/// of \p SI's condition, ignoring any casts. The purpose of this function is to
513/// ensure that LHSAux from the SimpleRecurrence is used correctly in the CRC
514/// computation. We cannot check the correctness of casts at this point, and
515/// rely on the KnownBits propagation to check correctness of the CRC
516/// computation.
517///
518/// In other words, it checks for the following pattern:
519///
520/// loop:
521/// %P1 = phi [_, %entry], [%P1.next, %loop]
522/// %P2 = phi [_, %entry], [%P2.next, %loop]
523/// ...
524/// %xor = xor (CastOrSelf %P1), (CastOrSelf %P2)
525///
526/// where %xor is in the use-def chain of \p SI's condition.
527static bool isConditionalOnXorOfPHIs(const SelectInst *SI, const PHINode *P1,
528 const PHINode *P2, const Loop &L) {
530
531 // matchConditionalRecurrence has already ensured that the SelectInst's
532 // condition is an Instruction.
533 Worklist.push_back(cast<Instruction>(SI->getCondition()));
534
535 while (!Worklist.empty()) {
536 const Instruction *I = Worklist.pop_back_val();
537
538 // Don't add a PHI's operands to the Worklist.
539 if (isa<PHINode>(I))
540 continue;
541
542 // If we match an XOR of the two PHIs ignoring casts, we're done.
545 return true;
546
547 // Continue along the use-def chain.
548 for (const Use &U : I->operands())
549 if (auto *UI = dyn_cast<Instruction>(U))
550 if (L.contains(UI))
551 Worklist.push_back(UI);
552 }
553 return false;
554}
555
556// Recognizes a multiplication or division by the constant two, using SCEV. By
557// doing this, we're immune to whether the IR expression is mul/udiv or
558// equivalently shl/lshr. Return false when it is a UDiv, true when it is a Mul,
559// and std::nullopt otherwise.
560static std::optional<bool> isBigEndianBitShift(Value *V, ScalarEvolution &SE) {
561 if (!V->getType()->isIntegerTy())
562 return {};
563
564 const SCEV *E = SE.getSCEV(V);
566 return false;
568 return true;
569 return {};
570}
571
572/// The main entry point for analyzing a loop and recognizing the CRC algorithm.
573/// Returns a PolynomialInfo on success, and either an ErrBits or a StringRef on
574/// failure.
575std::variant<PolynomialInfo, ErrBits, StringRef>
577 if (!L.isInnermost())
578 return "Loop is not innermost";
579 BasicBlock *Latch = L.getLoopLatch();
580 BasicBlock *Exit = L.getExitBlock();
581 const PHINode *IndVar = L.getCanonicalInductionVariable();
582 if (!Latch || !Exit || !IndVar || L.getNumBlocks() != 1)
583 return "Loop not in canonical form";
584 unsigned TC = SE.getSmallConstantTripCount(&L);
585 if (!TC || TC > 256 || TC % 8)
586 return "Unable to find a small constant byte-multiple trip count";
587
588 auto R = getRecurrences(Latch, IndVar, L);
589 if (!R)
590 return "Found stray PHI";
591 auto [SimpleRecurrence, ConditionalRecurrence] = *R;
592 if (!ConditionalRecurrence)
593 return "Unable to find conditional recurrence";
594
595 // Make sure that all recurrences are either all SCEVMul with two or SCEVDiv
596 // with two, or in other words, that they're single bit-shifts.
597 std::optional<bool> ByteOrderSwapped =
598 isBigEndianBitShift(ConditionalRecurrence.BO, SE);
599 if (!ByteOrderSwapped)
600 return "Loop with non-unit bitshifts";
601 if (SimpleRecurrence) {
602 if (isBigEndianBitShift(SimpleRecurrence.BO, SE) != ByteOrderSwapped)
603 return "Loop with non-unit bitshifts";
604
605 // Ensure that the PHIs have exactly two uses:
606 // the bit-shift, and the XOR (or a cast feeding into the XOR).
607 if (!ConditionalRecurrence.Phi->hasNUses(2) ||
608 !SimpleRecurrence.Phi->hasNUses(2))
609 return "Recurrences have stray uses";
610
611 // Check that the SelectInst ConditionalRecurrence.Step is conditional on
612 // the XOR of SimpleRecurrence.Phi and ConditionalRecurrence.Phi.
613 if (!isConditionalOnXorOfPHIs(cast<SelectInst>(ConditionalRecurrence.Step),
614 SimpleRecurrence.Phi,
615 ConditionalRecurrence.Phi, L))
616 return "Recurrences not intertwined with XOR";
617 }
618
619 // Make sure that the TC doesn't exceed the bitwidth of LHSAux, or LHS.
620 Value *LHS = ConditionalRecurrence.Start;
621 Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr;
622 if (TC > (LHSAux ? LHSAux->getType()->getIntegerBitWidth()
624 return "Loop iterations exceed bitwidth of data";
625
626 // Make sure that the computed value is used in the exit block: this should be
627 // true even if it is only really used in an outer loop's exit block, since
628 // the loop is in LCSSA form.
629 auto *ComputedValue = cast<SelectInst>(ConditionalRecurrence.Step);
630 if (none_of(ComputedValue->users(), [Exit](User *U) {
631 auto *UI = dyn_cast<Instruction>(U);
632 return UI && UI->getParent() == Exit;
633 }))
634 return "Unable to find use of computed value in loop exit block";
635
636 assert(ConditionalRecurrence.ExtraConst &&
637 "Expected ExtraConst in conditional recurrence");
638 const APInt &GenPoly = *ConditionalRecurrence.ExtraConst;
639
640 // PhiEvolutions are pairs of PHINodes along with their incoming value from
641 // within the loop, which we term as their step. Note that in the case of a
642 // Simple Recurrence, Step is an operand of the BO, while in a Conditional
643 // Recurrence, it is a SelectInst.
644 SmallVector<PhiStepPair, 2> PhiEvolutions;
645 PhiEvolutions.emplace_back(ConditionalRecurrence.Phi, ComputedValue);
646 if (SimpleRecurrence)
647 PhiEvolutions.emplace_back(SimpleRecurrence.Phi, SimpleRecurrence.BO);
648
649 ValueEvolution VE(TC, *ByteOrderSwapped);
650 if (!VE.computeEvolutions(PhiEvolutions))
651 return VE.getError();
652 KnownBits ResultBits = VE.KnownPhis.at(ConditionalRecurrence.Phi);
653
654 // There must be exactly four unvisited instructions, corresponding to the
655 // IndVar PHI. Any other unvisited instructions from the KnownBits propagation
656 // can complicate the optimization, which replaces the entire loop with the
657 // table-lookup version of the hash algorithm.
658 std::initializer_list<const Instruction *> AugmentVisited = {
659 IndVar, Latch->getTerminator(), L.getLatchCmpInst(),
660 cast<Instruction>(IndVar->getIncomingValueForBlock(Latch))};
661 VE.Visited.insert_range(AugmentVisited);
662 if (std::distance(Latch->begin(), Latch->end()) != VE.Visited.size())
663 return "Found stray unvisited instructions";
664
665 unsigned N = std::min(TC, ResultBits.getBitWidth());
666 auto IsZero = [](const KnownBits &K) { return K.isZero(); };
667 if (!checkExtractBits(ResultBits, N, IsZero, *ByteOrderSwapped))
668 return ErrBits(ResultBits, TC, *ByteOrderSwapped);
669
670 return PolynomialInfo(TC, LHS, GenPoly, ComputedValue, *ByteOrderSwapped,
671 LHSAux);
672}
673
675 for (unsigned I = 0; I < 256; I++) {
676 (*this)[I].print(OS, false);
677 OS << (I % 16 == 15 ? '\n' : ' ');
678 }
679}
680
681#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
682void CRCTable::dump() const { print(dbgs()); }
683#endif
684
686 if (!L.isInnermost())
687 return;
688 OS << "HashRecognize: Checking a loop in '"
689 << L.getHeader()->getParent()->getName() << "' from " << L.getLocStr()
690 << "\n";
691 auto Ret = recognizeCRC();
692 if (!std::holds_alternative<PolynomialInfo>(Ret)) {
693 OS << "Did not find a hash algorithm\n";
694 if (std::holds_alternative<StringRef>(Ret))
695 OS << "Reason: " << std::get<StringRef>(Ret) << "\n";
696 if (std::holds_alternative<ErrBits>(Ret)) {
697 auto [Actual, Iter, ByteOrderSwapped] = std::get<ErrBits>(Ret);
698 OS << "Reason: Expected " << (ByteOrderSwapped ? "bottom " : "top ")
699 << Iter << " bits zero (";
700 Actual.print(OS);
701 OS << ")\n";
702 }
703 return;
704 }
705
706 auto Info = std::get<PolynomialInfo>(Ret);
707 OS << "Found" << (Info.ByteOrderSwapped ? " big-endian " : " little-endian ")
708 << "CRC-" << Info.RHS.getBitWidth() << " loop with trip count "
709 << Info.TripCount << "\n";
710 OS.indent(2) << "Initial CRC: ";
711 Info.LHS->print(OS);
712 OS << "\n";
713 OS.indent(2) << "Generating polynomial: ";
714 Info.RHS.print(OS, false);
715 OS << "\n";
716 OS.indent(2) << "Computed CRC: ";
717 Info.ComputedValue->print(OS);
718 OS << "\n";
719 if (Info.LHSAux) {
720 OS.indent(2) << "Auxiliary data: ";
721 Info.LHSAux->print(OS);
722 OS << "\n";
723 }
724 OS.indent(2) << "Computed CRC lookup table:\n";
725 genSarwateTable(Info.RHS, Info.ByteOrderSwapped).print(OS);
726}
727
728#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
729void HashRecognize::dump() const { print(dbgs()); }
730#endif
731
732std::optional<PolynomialInfo> HashRecognize::getResult() const {
733 auto Res = HashRecognize(L, SE).recognizeCRC();
734 if (std::holds_alternative<PolynomialInfo>(Res))
735 return std::get<PolynomialInfo>(Res);
736 return std::nullopt;
737}
738
740 : L(L), SE(SE) {}
741
745 LPMUpdater &) {
746 HashRecognize(L, AR.SE).print(OS);
747 return PreservedAnalyses::all();
748}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::pair< const PHINode *, const Instruction * > PhiStepPair
static bool isConditionalOnXorOfPHIs(const SelectInst *SI, const PHINode *P1, const PHINode *P2, const Loop &L)
Checks that P1 and P2 are used together in an XOR in the use-def chain of SI's condition,...
static std::optional< std::pair< RecurrenceInfo, RecurrenceInfo > > getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L)
Iterates over all the phis in LoopLatch, and attempts to extract a Conditional Recurrence and an opti...
static std::optional< bool > isBigEndianBitShift(Value *V, ScalarEvolution &SE)
static bool checkExtractBits(const KnownBits &Known, unsigned N, function_ref< bool(const KnownBits &)> CheckFn, bool ByteOrderSwapped)
In the big-endian case, checks the bottom N bits against CheckFn, and that the rest are unknown.
This header provides classes for managing per-loop analyses.
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
raw_pwrite_stream & OS
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
Value * LHS
A much simpler version of ValueTracking, in that it computes KnownBits of values, except that it comp...
StringRef getError() const
SmallPtrSet< const Instruction *, 16 > Visited
KnownPhiMap KnownPhis
bool computeEvolutions(ArrayRef< PhiStepPair > PhiEvolutions)
ValueEvolution(unsigned TripCount, bool ByteOrderSwapped)
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
bool isSignBitSet() const
Determine if sign bit of this APInt is set.
Definition: APInt.h:341
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
This class represents a range of values.
Definition: ConstantRange.h:47
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
static LLVM_ABI ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
std::pair< iterator, bool > emplace_or_assign(const KeyT &Key, Ts &&...Args)
Definition: DenseMap.h:304
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:221
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
Definition: DenseMap.h:213
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &)
The analysis.
Definition: HashRecognize.h:80
static CRCTable genSarwateTable(const APInt &GenPoly, bool ByteOrderSwapped)
Generate a lookup table of 256 entries by interleaving the generating polynomial.
std::optional< PolynomialInfo > getResult() const
LLVM_DUMP_METHOD void dump() const
HashRecognize(const Loop &L, ScalarEvolution &SE)
void print(raw_ostream &OS) const
std::variant< PolynomialInfo, ErrBits, StringRef > recognizeCRC() const
The main entry point for analyzing a loop and recognizing the CRC algorithm.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
This class represents the LLVM 'select' instruction.
size_type size() const
Definition: SmallPtrSet.h:99
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
LLVM_ABI unsigned getIntegerBitWidth() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:5222
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:862
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
SCEVBinaryExpr_match< SCEVMulExpr, Op0_t, Op1_t > m_scev_Mul(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
SCEVBinaryExpr_match< SCEVUDivExpr, Op0_t, Op1_t > m_scev_UDiv(const Op0_t &Op0, const Op1_t &Op1)
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be undef, but may be poison.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
std::tuple< KnownBits, unsigned, bool > ErrBits
A tuple of bits that are expected to be zero, number N of them expected to be zero,...
Definition: HashRecognize.h:33
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
#define N
A structure that can hold either a Simple Recurrence or a Conditional Recurrence.
const Loop & L
const PHINode * Phi
LLVM_DUMP_METHOD void dump() const
bool matchConditionalRecurrence(const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch=Instruction::BinaryOpsEnd)
A Conditional Recurrence is a recurrence of the form:
void print(raw_ostream &OS, unsigned Indent=0) const
std::optional< APInt > ExtraConst
bool matchSimpleRecurrence(const PHINode *P)
Wraps llvm::matchSimpleRecurrence.
BinaryOperator * BO
RecurrenceInfo(const Loop &L)
A custom std::array with 256 entries, that also has a print function.
Definition: HashRecognize.h:36
LLVM_DUMP_METHOD void dump() const
void print(raw_ostream &OS) const
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:294
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
Definition: KnownBits.cpp:427
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1056
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:66
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:154
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:165
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
Definition: KnownBits.cpp:370
KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const
Return a subset of the known bits from [bitPosition,bitPosition+numBits).
Definition: KnownBits.h:218
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:173
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition: KnownBits.h:340
static LLVM_ABI KnownBits srem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for srem(LHS, RHS).
Definition: KnownBits.cpp:1073
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1016
static LLVM_ABI KnownBits sdiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for sdiv(LHS, RHS).
Definition: KnownBits.cpp:960
static KnownBits sub(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from subtraction of LHS and RHS.
Definition: KnownBits.h:346
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:803
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
The structure that is returned when a polynomial algorithm was recognized by the analysis.
Definition: HashRecognize.h:46
PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS, Value *ComputedValue, bool ByteOrderSwapped, Value *LHSAux=nullptr)