LLVM 23.0.0git
AggressiveInstCombine.cpp
Go to the documentation of this file.
1//===- AggressiveInstCombine.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the aggressive expression pattern combiner classes.
10// Currently, it handles expression patterns for:
11// * Truncate instruction
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/MDBuilder.h"
40
41using namespace llvm;
42using namespace PatternMatch;
43
44#define DEBUG_TYPE "aggressive-instcombine"
45
46namespace llvm {
48}
49
50STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
51STATISTIC(NumGuardedRotates,
52 "Number of guarded rotates transformed into funnel shifts");
53STATISTIC(NumGuardedFunnelShifts,
54 "Number of guarded funnel shifts transformed into funnel shifts");
55STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
56
58 "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
59 cl::desc("Max number of instructions to scan for aggressive instcombine."));
60
62 "strncmp-inline-threshold", cl::init(3), cl::Hidden,
63 cl::desc("The maximum length of a constant string for a builtin string cmp "
64 "call eligible for inlining. The default value is 3."));
65
67 MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
68 cl::desc("The maximum length of a constant string to "
69 "inline a memchr call."));
70
71/// Match a pattern for a bitwise funnel/rotate operation that partially guards
72/// against undefined behavior by branching around the funnel-shift/rotation
73/// when the shift amount is 0.
75 if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
76 return false;
77
78 // As with the one-use checks below, this is not strictly necessary, but we
79 // are being cautious to avoid potential perf regressions on targets that
80 // do not actually have a funnel/rotate instruction (where the funnel shift
81 // would be expanded back into math/shift/logic ops).
82 if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
83 return false;
84
85 // Match V to funnel shift left/right and capture the source operands and
86 // shift amount.
87 auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
88 Value *&ShAmt) {
89 unsigned Width = V->getType()->getScalarSizeInBits();
90
91 // fshl(ShVal0, ShVal1, ShAmt)
92 // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
93 if (match(V, m_OneUse(m_c_Or(
94 m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
95 m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),
96 m_Deferred(ShAmt))))))) {
97 return Intrinsic::fshl;
98 }
99
100 // fshr(ShVal0, ShVal1, ShAmt)
101 // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
102 if (match(V,
104 m_Value(ShAmt))),
105 m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
106 return Intrinsic::fshr;
107 }
108
110 };
111
112 // One phi operand must be a funnel/rotate operation, and the other phi
113 // operand must be the source value of that funnel/rotate operation:
114 // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
115 // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
116 // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
117 PHINode &Phi = cast<PHINode>(I);
118 unsigned FunnelOp = 0, GuardOp = 1;
119 Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
120 Value *ShVal0, *ShVal1, *ShAmt;
121 Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
122 if (IID == Intrinsic::not_intrinsic ||
123 (IID == Intrinsic::fshl && ShVal0 != P1) ||
124 (IID == Intrinsic::fshr && ShVal1 != P1)) {
125 IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
126 if (IID == Intrinsic::not_intrinsic ||
127 (IID == Intrinsic::fshl && ShVal0 != P0) ||
128 (IID == Intrinsic::fshr && ShVal1 != P0))
129 return false;
130 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
131 "Pattern must match funnel shift left or right");
132 std::swap(FunnelOp, GuardOp);
133 }
134
135 // The incoming block with our source operand must be the "guard" block.
136 // That must contain a cmp+branch to avoid the funnel/rotate when the shift
137 // amount is equal to 0. The other incoming block is the block with the
138 // funnel/rotate.
139 BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
140 BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
141 Instruction *TermI = GuardBB->getTerminator();
142
143 // Ensure that the shift values dominate each block.
144 if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
145 return false;
146
147 BasicBlock *PhiBB = Phi.getParent();
149 m_ZeroInt()),
150 m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))
151 return false;
152
153 IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
154
155 if (ShVal0 == ShVal1)
156 ++NumGuardedRotates;
157 else
158 ++NumGuardedFunnelShifts;
159
160 // If this is not a rotate then the select was blocking poison from the
161 // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
162 bool IsFshl = IID == Intrinsic::fshl;
163 if (ShVal0 != ShVal1) {
164 if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
165 ShVal1 = Builder.CreateFreeze(ShVal1);
166 else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
167 ShVal0 = Builder.CreateFreeze(ShVal0);
168 }
169
170 // We matched a variation of this IR pattern:
171 // GuardBB:
172 // %cmp = icmp eq i32 %ShAmt, 0
173 // br i1 %cmp, label %PhiBB, label %FunnelBB
174 // FunnelBB:
175 // %sub = sub i32 32, %ShAmt
176 // %shr = lshr i32 %ShVal1, %sub
177 // %shl = shl i32 %ShVal0, %ShAmt
178 // %fsh = or i32 %shr, %shl
179 // br label %PhiBB
180 // PhiBB:
181 // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
182 // -->
183 // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
184 Phi.replaceAllUsesWith(
185 Builder.CreateIntrinsic(IID, Phi.getType(), {ShVal0, ShVal1, ShAmt}));
186 return true;
187}
188
189/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
190/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
191/// of 'and' ops, then we also need to capture the fact that we saw an
192/// "and X, 1", so that's an extra return value for that case.
193namespace {
194struct MaskOps {
195 Value *Root = nullptr;
196 APInt Mask;
197 bool MatchAndChain;
198 bool FoundAnd1 = false;
199
200 MaskOps(unsigned BitWidth, bool MatchAnds)
201 : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
202};
203} // namespace
204
205/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
206/// chain of 'and' or 'or' instructions looking for shift ops of a common source
207/// value. Examples:
208/// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
209/// returns { X, 0x129 }
210/// and (and (X >> 1), 1), (X >> 4)
211/// returns { X, 0x12 }
212static bool matchAndOrChain(Value *V, MaskOps &MOps) {
213 Value *Op0, *Op1;
214 if (MOps.MatchAndChain) {
215 // Recurse through a chain of 'and' operands. This requires an extra check
216 // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere
217 // in the chain to know that all of the high bits are cleared.
218 if (match(V, m_And(m_Value(Op0), m_One()))) {
219 MOps.FoundAnd1 = true;
220 return matchAndOrChain(Op0, MOps);
221 }
222 if (match(V, m_And(m_Value(Op0), m_Value(Op1))))
223 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
224 } else {
225 // Recurse through a chain of 'or' operands.
226 if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
227 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
228 }
229
230 // We need a shift-right or a bare value representing a compare of bit 0 of
231 // the original source operand.
232 Value *Candidate;
233 const APInt *BitIndex = nullptr;
234 if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))
235 Candidate = V;
236
237 // Initialize result source operand.
238 if (!MOps.Root)
239 MOps.Root = Candidate;
240
241 // The shift constant is out-of-range? This code hasn't been simplified.
242 if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))
243 return false;
244
245 // Fill in the mask bit derived from the shift constant.
246 MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);
247 return MOps.Root == Candidate;
248}
249
250/// Match patterns that correspond to "any-bits-set" and "all-bits-set".
251/// These will include a chain of 'or' or 'and'-shifted bits from a
252/// common source value:
253/// and (or (lshr X, C), ...), 1 --> (X & CMask) != 0
254/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
255/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
256/// that differ only with a final 'not' of the result. We expect that final
257/// 'not' to be folded with the compare that we create here (invert predicate).
259 // The 'any-bits-set' ('or' chain) pattern is simpler to match because the
260 // final "and X, 1" instruction must be the final op in the sequence.
261 bool MatchAllBitsSet;
263 MatchAllBitsSet = true;
264 else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
265 MatchAllBitsSet = false;
266 else
267 return false;
268
269 MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
270 if (MatchAllBitsSet) {
271 if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1)
272 return false;
273 } else {
274 if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps))
275 return false;
276 }
277
278 // The pattern was found. Create a masked compare that replaces all of the
279 // shift and logic ops.
280 IRBuilder<> Builder(&I);
281 Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
282 Value *And = Builder.CreateAnd(MOps.Root, Mask);
283 Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
284 : Builder.CreateIsNotNull(And);
285 Value *Zext = Builder.CreateZExt(Cmp, I.getType());
286 I.replaceAllUsesWith(Zext);
287 ++NumAnyOrAllBitsSet;
288 return true;
289}
290
291// Try to recognize below function as popcount intrinsic.
292// This is the "best" algorithm from
293// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
294// Also used in TargetLowering::expandCTPOP().
295//
296// int popcount(unsigned int i) {
297// i = i - ((i >> 1) & 0x55555555);
298// i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
299// i = ((i + (i >> 4)) & 0x0F0F0F0F);
300// return (i * 0x01010101) >> 24;
301// }
303 if (I.getOpcode() != Instruction::LShr)
304 return false;
305
306 Type *Ty = I.getType();
307 if (!Ty->isIntOrIntVectorTy())
308 return false;
309
310 unsigned Len = Ty->getScalarSizeInBits();
311 // FIXME: fix Len == 8 and other irregular type lengths.
312 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
313 return false;
314
315 APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
316 APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
317 APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
318 APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
319 APInt MaskShift = APInt(Len, Len - 8);
320
321 Value *Op0 = I.getOperand(0);
322 Value *Op1 = I.getOperand(1);
323 Value *MulOp0;
324 // Matching "(i * 0x01010101...) >> 24".
325 if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
327 Value *ShiftOp0;
328 // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
329 if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
330 m_Deferred(ShiftOp0)),
331 m_SpecificInt(Mask0F)))) {
332 Value *AndOp0;
333 // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
334 if (match(ShiftOp0,
335 m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
337 m_SpecificInt(Mask33))))) {
338 Value *Root, *SubOp1;
339 // Matching "i - ((i >> 1) & 0x55555555...)".
340 const APInt *AndMask;
341 if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
342 match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
343 m_APInt(AndMask)))) {
344 auto CheckAndMask = [&]() {
345 if (*AndMask == Mask55)
346 return true;
347
348 // Exact match failed, see if any bits are known to be 0 where we
349 // expect a 1 in the mask.
350 if (!AndMask->isSubsetOf(Mask55))
351 return false;
352
353 APInt NeededMask = Mask55 & ~*AndMask;
354 return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
355 NeededMask,
356 SimplifyQuery(I.getDataLayout()));
357 };
358
359 if (CheckAndMask()) {
360 LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
361 IRBuilder<> Builder(&I);
362 I.replaceAllUsesWith(
363 Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
364 ++NumPopCountRecognized;
365 return true;
366 }
367 }
368 }
369 }
370 }
371
372 return false;
373}
374
375/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
376/// C2 saturate the value of the fp conversion. The transform is not reversable
377/// as the fptosi.sat is more defined than the input - all values produce a
378/// valid value for the fptosi.sat, where as some produce poison for original
379/// that were out of range of the integer conversion. The reversed pattern may
380/// use fmax and fmin instead. As we cannot directly reverse the transform, and
381/// it is not always profitable, we make it conditional on the cost being
382/// reported as lower by TTI.
384 // Look for min(max(fptosi, converting to fptosi_sat.
385 Value *In;
386 const APInt *MinC, *MaxC;
388 m_APInt(MinC))),
389 m_APInt(MaxC))) &&
391 m_APInt(MaxC))),
392 m_APInt(MinC))))
393 return false;
394
395 // Check that the constants clamp a saturate.
396 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
397 return false;
398
399 Type *IntTy = I.getType();
400 Type *FpTy = In->getType();
401 Type *SatTy =
402 IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
403 if (auto *VecTy = dyn_cast<VectorType>(IntTy))
404 SatTy = VectorType::get(SatTy, VecTy->getElementCount());
405
406 // Get the cost of the intrinsic, and check that against the cost of
407 // fptosi+smin+smax
408 InstructionCost SatCost = TTI.getIntrinsicInstrCost(
409 IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
411 SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,
414
415 InstructionCost MinMaxCost = TTI.getCastInstrCost(
416 Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
418 MinMaxCost += TTI.getIntrinsicInstrCost(
419 IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
421 MinMaxCost += TTI.getIntrinsicInstrCost(
422 IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
424
425 if (SatCost >= MinMaxCost)
426 return false;
427
428 IRBuilder<> Builder(&I);
429 Value *Sat =
430 Builder.CreateIntrinsic(Intrinsic::fptosi_sat, {SatTy, FpTy}, In);
431 I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
432 return true;
433}
434
435/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
436/// pessimistic codegen that has to account for setting errno and can enable
437/// vectorization.
438static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
440 DominatorTree &DT) {
441 // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
442 // (because NNAN or the operand arg must not be less than -0.0) and (2) we
443 // would not end up lowering to a libcall anyway (which could change the value
444 // of errno), then:
445 // (1) errno won't be set.
446 // (2) it is safe to convert this to an intrinsic call.
447 Type *Ty = Call->getType();
448 Value *Arg = Call->getArgOperand(0);
449 if (TTI.haveFastSqrt(Ty) &&
450 (Call->hasNoNaNs() ||
452 Arg, SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) {
453 IRBuilder<> Builder(Call);
454 Value *NewSqrt =
455 Builder.CreateIntrinsic(Intrinsic::sqrt, Ty, Arg, Call, "sqrt");
456 Call->replaceAllUsesWith(NewSqrt);
457
458 // Explicitly erase the old call because a call with side effects is not
459 // trivially dead.
460 Call->eraseFromParent();
461 return true;
462 }
463
464 return false;
465}
466
467// Check if this array of constants represents a cttz table.
468// Iterate over the elements from \p Table by trying to find/match all
469// the numbers from 0 to \p InputBits that should represent cttz results.
470static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,
471 const APInt &AndMask, Type *AccessTy,
472 unsigned InputBits, const APInt &GEPIdxFactor,
473 const DataLayout &DL) {
474 for (unsigned Idx = 0; Idx < InputBits; Idx++) {
475 APInt Index = (APInt(InputBits, 1).shl(Idx) * Mul).lshr(Shift) & AndMask;
477 ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
478 if (!C || C->getValue() != Idx)
479 return false;
480 }
481
482 return true;
483}
484
485// Try to recognize table-based ctz implementation.
486// E.g., an example in C (for more cases please see the llvm/tests):
487// int f(unsigned x) {
488// static const char table[32] =
489// {0, 1, 28, 2, 29, 14, 24, 3, 30,
490// 22, 20, 15, 25, 17, 4, 8, 31, 27,
491// 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
492// return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
493// }
494// this can be lowered to `cttz` instruction.
495// There is also a special case when the element is 0.
496//
497// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
498// sequence that contains each pattern of bits in it. The shift extracts
499// the top bits after the multiply, and that index into the table should
500// represent the number of trailing zeros in the original number.
501//
502// Here are some examples or LLVM IR for a 64-bit target:
503//
504// CASE 1:
505// %sub = sub i32 0, %x
506// %and = and i32 %sub, %x
507// %mul = mul i32 %and, 125613361
508// %shr = lshr i32 %mul, 27
509// %idxprom = zext i32 %shr to i64
510// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
511// i64 %idxprom
512// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
513//
514// CASE 2:
515// %sub = sub i32 0, %x
516// %and = and i32 %sub, %x
517// %mul = mul i32 %and, 72416175
518// %shr = lshr i32 %mul, 26
519// %idxprom = zext i32 %shr to i64
520// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
521// i64 0, i64 %idxprom
522// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
523//
524// CASE 3:
525// %sub = sub i32 0, %x
526// %and = and i32 %sub, %x
527// %mul = mul i32 %and, 81224991
528// %shr = lshr i32 %mul, 27
529// %idxprom = zext i32 %shr to i64
530// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
531// i64 0, i64 %idxprom
532// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
533//
534// CASE 4:
535// %sub = sub i64 0, %x
536// %and = and i64 %sub, %x
537// %mul = mul i64 %and, 283881067100198605
538// %shr = lshr i64 %mul, 58
539// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
540// i64 %shr
541// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
542//
543// All these can be lowered to @llvm.cttz.i32/64 intrinsics.
546 if (!LI)
547 return false;
548
549 Type *AccessType = LI->getType();
550 if (!AccessType->isIntegerTy())
551 return false;
552
554 if (!GEP || !GEP->hasNoUnsignedSignedWrap())
555 return false;
556
557 GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
558 if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())
559 return false;
560
561 unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
562 APInt ModOffset(BW, 0);
564 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||
565 VarOffsets.size() != 1 || ModOffset != 0)
566 return false;
567 auto [GepIdx, GEPScale] = VarOffsets.front();
568
569 Value *X1;
570 const APInt *MulConst, *ShiftConst, *AndCst = nullptr;
571 // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
572 // This might be extended to the pointer index type, and if the gep index type
573 // has been replaced with an i8 then a new And (and different ShiftConst) will
574 // be present.
575 auto MatchInner = m_LShr(
576 m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),
577 m_APInt(ShiftConst));
578 if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&
579 !match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))
580 return false;
581
582 unsigned InputBits = X1->getType()->getScalarSizeInBits();
583 if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)
584 return false;
585
586 if (!GEPScale.isIntN(InputBits) ||
587 !isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,
588 AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,
589 InputBits, GEPScale.zextOrTrunc(InputBits), DL))
590 return false;
591
592 ConstantInt *ZeroTableElem = cast<ConstantInt>(
593 ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));
594 bool DefinedForZero = ZeroTableElem->getZExtValue() == InputBits;
595
596 IRBuilder<> B(LI);
597 ConstantInt *BoolConst = B.getInt1(!DefinedForZero);
598 Type *XType = X1->getType();
599 auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
600 Value *ZExtOrTrunc = nullptr;
601
602 if (DefinedForZero) {
603 ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);
604 } else {
605 // If the value in elem 0 isn't the same as InputBits, we still want to
606 // produce the value from the table.
607 auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));
608 auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz);
609
610 // The true branch of select handles the cttz(0) case, which is rare.
613 SelectI->setMetadata(
614 LLVMContext::MD_prof,
615 MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights());
616 }
617
618 // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
619 // it should be handled as: `cttz(x) & (typeSize - 1)`.
620
621 ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
622 }
623
624 LI->replaceAllUsesWith(ZExtOrTrunc);
625
626 return true;
627}
628
629/// This is used by foldLoadsRecursive() to capture a Root Load node which is
630/// of type or(load, load) and recursively build the wide load. Also capture the
631/// shift amount, zero extend type and loadSize.
641
642// Identify and Merge consecutive loads recursively which is of the form
643// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
644// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
645static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
646 AliasAnalysis &AA, bool IsRoot = false) {
647 uint64_t ShAmt2;
648 Value *X;
649 Instruction *L1, *L2;
650
651 // For the root instruction, allow multiple uses since the final result
652 // may legitimately be used in multiple places. For intermediate values,
653 // require single use to avoid creating duplicate loads.
654 if (!IsRoot && !V->hasOneUse())
655 return false;
656
657 if (!match(V, m_c_Or(m_Value(X),
659 ShAmt2)))))
660 return false;
661
662 if (!foldLoadsRecursive(X, LOps, DL, AA, /*IsRoot=*/false) && LOps.FoundRoot)
663 // Avoid Partial chain merge.
664 return false;
665
666 // Check if the pattern has loads
667 LoadInst *LI1 = LOps.Root;
668 uint64_t ShAmt1 = LOps.Shift;
669 if (LOps.FoundRoot == false &&
671 m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {
672 LI1 = dyn_cast<LoadInst>(L1);
673 }
674 LoadInst *LI2 = dyn_cast<LoadInst>(L2);
675
676 // Check if loads are same, atomic, volatile and having same address space.
677 if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
679 return false;
680
681 // Check if Loads come from same BB.
682 if (LI1->getParent() != LI2->getParent())
683 return false;
684
685 // Find the data layout
686 bool IsBigEndian = DL.isBigEndian();
687
688 // Check if loads are consecutive and same size.
689 Value *Load1Ptr = LI1->getPointerOperand();
690 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
691 Load1Ptr =
692 Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
693 /* AllowNonInbounds */ true);
694
695 Value *Load2Ptr = LI2->getPointerOperand();
696 APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
697 Load2Ptr =
698 Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
699 /* AllowNonInbounds */ true);
700
701 // Verify if both loads have same base pointers
702 uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
703 uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
704 if (Load1Ptr != Load2Ptr)
705 return false;
706
707 // Make sure that there are no padding bits.
708 if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||
709 !DL.typeSizeEqualsStoreSize(LI2->getType()))
710 return false;
711
712 // Alias Analysis to check for stores b/w the loads.
713 LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;
715 if (!Start->comesBefore(End)) {
716 std::swap(Start, End);
717 // If LOps.RootInsert comes after LI2, since we use LI2 as the new insert
718 // point, we should make sure whether the memory region accessed by LOps
719 // isn't modified.
720 if (LOps.FoundRoot)
722 LOps.Root->getPointerOperand(),
723 LocationSize::precise(DL.getTypeStoreSize(
724 IntegerType::get(LI1->getContext(), LOps.LoadSize))),
725 LOps.AATags);
726 else
728 } else
730 unsigned NumScanned = 0;
731 for (Instruction &Inst :
732 make_range(Start->getIterator(), End->getIterator())) {
733 if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
734 return false;
735
736 if (++NumScanned > MaxInstrsToScan)
737 return false;
738 }
739
740 // Make sure Load with lower Offset is at LI1
741 bool Reverse = false;
742 if (Offset2.slt(Offset1)) {
743 std::swap(LI1, LI2);
744 std::swap(ShAmt1, ShAmt2);
745 std::swap(Offset1, Offset2);
746 std::swap(Load1Ptr, Load2Ptr);
747 std::swap(LoadSize1, LoadSize2);
748 Reverse = true;
749 }
750
751 // Big endian swap the shifts
752 if (IsBigEndian)
753 std::swap(ShAmt1, ShAmt2);
754
755 // First load is always LI1. This is where we put the new load.
756 // Use the merged load size available from LI1 for forward loads.
757 if (LOps.FoundRoot) {
758 if (!Reverse)
759 LoadSize1 = LOps.LoadSize;
760 else
761 LoadSize2 = LOps.LoadSize;
762 }
763
764 // Verify if shift amount and load index aligns and verifies that loads
765 // are consecutive.
766 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
767 uint64_t PrevSize =
768 DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
769 if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
770 return false;
771
772 // Update LOps
773 AAMDNodes AATags1 = LOps.AATags;
774 AAMDNodes AATags2 = LI2->getAAMetadata();
775 if (LOps.FoundRoot == false) {
776 LOps.FoundRoot = true;
777 AATags1 = LI1->getAAMetadata();
778 }
779 LOps.LoadSize = LoadSize1 + LoadSize2;
780 LOps.RootInsert = Start;
781
782 // Concatenate the AATags of the Merged Loads.
783 LOps.AATags = AATags1.concat(AATags2);
784
785 LOps.Root = LI1;
786 LOps.Shift = ShAmt1;
787 LOps.ZextType = X->getType();
788 return true;
789}
790
791// For a given BB instruction, evaluate all loads in the chain that form a
792// pattern which suggests that the loads can be combined. The one and only use
793// of the loads is to form a wider load.
796 const DominatorTree &DT) {
797 // Only consider load chains of scalar values.
798 if (isa<VectorType>(I.getType()))
799 return false;
800
801 LoadOps LOps;
802 if (!foldLoadsRecursive(&I, LOps, DL, AA, /*IsRoot=*/true) || !LOps.FoundRoot)
803 return false;
804
805 IRBuilder<> Builder(&I);
806 LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
807
808 IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);
809 // TTI based checks if we want to proceed with wider load
810 bool Allowed = TTI.isTypeLegal(WiderType);
811 if (!Allowed)
812 return false;
813
814 unsigned AS = LI1->getPointerAddressSpace();
815 unsigned Fast = 0;
816 Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
817 AS, LI1->getAlign(), &Fast);
818 if (!Allowed || !Fast)
819 return false;
820
821 // Get the Index and Ptr for the new GEP.
822 Value *Load1Ptr = LI1->getPointerOperand();
823 Builder.SetInsertPoint(LOps.RootInsert);
824 if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {
825 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
826 Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
827 DL, Offset1, /* AllowNonInbounds */ true);
828 Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));
829 }
830 // Generate wider load.
831 NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),
832 LI1->isVolatile(), "");
833 NewLoad->takeName(LI1);
834 // Set the New Load AATags Metadata.
835 if (LOps.AATags)
836 NewLoad->setAAMetadata(LOps.AATags);
837
838 Value *NewOp = NewLoad;
839 // Check if zero extend needed.
840 if (LOps.ZextType)
841 NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
842
843 // Check if shift needed. We need to shift with the amount of load1
844 // shift if not zero.
845 if (LOps.Shift)
846 NewOp = Builder.CreateShl(NewOp, LOps.Shift);
847 I.replaceAllUsesWith(NewOp);
848
849 return true;
850}
851
852/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
853struct PartStore {
860
861 bool isCompatibleWith(const PartStore &Other) const {
862 return PtrBase == Other.PtrBase && Val == Other.Val;
863 }
864
865 bool operator<(const PartStore &Other) const {
866 return PtrOffset.slt(Other.PtrOffset);
867 }
868};
869
870static std::optional<PartStore> matchPartStore(Instruction &I,
871 const DataLayout &DL) {
872 auto *Store = dyn_cast<StoreInst>(&I);
873 if (!Store || !Store->isSimple())
874 return std::nullopt;
875
876 Value *StoredVal = Store->getValueOperand();
877 Type *StoredTy = StoredVal->getType();
878 if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))
879 return std::nullopt;
880
881 uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
882 uint64_t ValOffset;
883 Value *Val;
884 if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))
885 return std::nullopt;
886
887 Value *Ptr = Store->getPointerOperand();
888 APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
890 DL, PtrOffset, /*AllowNonInbounds=*/true);
891 return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
892}
893
895 unsigned Width, const DataLayout &DL,
897 if (Parts.size() < 2)
898 return false;
899
900 // Check whether combining the stores is profitable.
901 // FIXME: We could generate smaller stores if we can't produce a large one.
902 const PartStore &First = Parts.front();
903 LLVMContext &Ctx = First.Store->getContext();
904 Type *NewTy = Type::getIntNTy(Ctx, Width);
905 unsigned Fast = 0;
906 if (!TTI.isTypeLegal(NewTy) ||
907 !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,
908 First.Store->getPointerAddressSpace(),
909 First.Store->getAlign(), &Fast) ||
910 !Fast)
911 return false;
912
913 // Generate the combined store.
914 IRBuilder<> Builder(First.Store);
915 Value *Val = First.Val;
916 if (First.ValOffset != 0)
917 Val = Builder.CreateLShr(Val, First.ValOffset);
918 Val = Builder.CreateTrunc(Val, NewTy);
919 StoreInst *Store = Builder.CreateAlignedStore(
920 Val, First.Store->getPointerOperand(), First.Store->getAlign());
921
922 // Merge various metadata onto the new store.
923 AAMDNodes AATags = First.Store->getAAMetadata();
924 SmallVector<Instruction *> Stores = {First.Store};
925 Stores.reserve(Parts.size());
926 SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()};
927 DbgLocs.reserve(Parts.size());
928 for (const PartStore &Part : drop_begin(Parts)) {
929 AATags = AATags.concat(Part.Store->getAAMetadata());
930 Stores.push_back(Part.Store);
931 DbgLocs.push_back(Part.Store->getDebugLoc());
932 }
933 Store->setAAMetadata(AATags);
934 Store->mergeDIAssignID(Stores);
935 Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs));
936
937 // Remove the old stores.
938 for (const PartStore &Part : Parts)
939 Part.Store->eraseFromParent();
940
941 return true;
942}
943
946 if (Parts.size() < 2)
947 return false;
948
949 // We now have multiple parts of the same value stored to the same pointer.
950 // Sort the parts by pointer offset, and make sure they are consistent with
951 // the value offsets. Also check that the value is fully covered without
952 // overlaps.
953 bool Changed = false;
954 llvm::sort(Parts);
955 int64_t LastEndOffsetFromFirst = 0;
956 const PartStore *First = &Parts[0];
957 for (const PartStore &Part : Parts) {
958 APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;
959 int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;
960 if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
961 LastEndOffsetFromFirst != ValOffsetFromFirst) {
963 LastEndOffsetFromFirst, DL, TTI);
964 First = &Part;
965 LastEndOffsetFromFirst = Part.ValWidth;
966 continue;
967 }
968
969 LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
970 }
971
973 LastEndOffsetFromFirst, DL, TTI);
974 return Changed;
975}
976
979 // FIXME: Add big endian support.
980 if (DL.isBigEndian())
981 return false;
982
983 BatchAAResults BatchAA(AA);
985 bool MadeChange = false;
986 for (Instruction &I : make_early_inc_range(BB)) {
987 if (std::optional<PartStore> Part = matchPartStore(I, DL)) {
988 if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {
989 Parts.push_back(std::move(*Part));
990 continue;
991 }
992
993 MadeChange |= mergePartStores(Parts, DL, TTI);
994 Parts.clear();
995 Parts.push_back(std::move(*Part));
996 continue;
997 }
998
999 if (Parts.empty())
1000 continue;
1001
1002 if (I.mayThrow() ||
1003 (I.mayReadOrWriteMemory() &&
1005 &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {
1006 MadeChange |= mergePartStores(Parts, DL, TTI);
1007 Parts.clear();
1008 continue;
1009 }
1010 }
1011
1012 MadeChange |= mergePartStores(Parts, DL, TTI);
1013 return MadeChange;
1014}
1015
1016/// Combine away instructions providing they are still equivalent when compared
1017/// against 0. i.e do they have any bits set.
1019 auto *I = dyn_cast<Instruction>(V);
1020 if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())
1021 return nullptr;
1022
1023 Value *A;
1024
1025 // Look deeper into the chain of or's, combining away shl (so long as they are
1026 // nuw or nsw).
1027 Value *Op0 = I->getOperand(0);
1028 if (match(Op0, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1029 m_NUWShl(m_Value(A), m_Value()))))
1030 Op0 = A;
1031 else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))
1032 Op0 = NOp;
1033
1034 Value *Op1 = I->getOperand(1);
1035 if (match(Op1, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1036 m_NUWShl(m_Value(A), m_Value()))))
1037 Op1 = A;
1038 else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))
1039 Op1 = NOp;
1040
1041 if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))
1042 return Builder.CreateOr(Op0, Op1);
1043 return nullptr;
1044}
1045
1048 const DominatorTree &DT) {
1049 CmpPredicate Pred;
1050 Value *Op0;
1051 if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||
1052 !ICmpInst::isEquality(Pred))
1053 return false;
1054
1055 // If the chain or or's matches a load, combine to that before attempting to
1056 // remove shifts.
1057 if (auto OpI = dyn_cast<Instruction>(Op0))
1058 if (OpI->getOpcode() == Instruction::Or)
1059 if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))
1060 return true;
1061
1062 IRBuilder<> Builder(&I);
1063 // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0
1064 if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {
1065 I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));
1066 return true;
1067 }
1068
1069 return false;
1070}
1071
1072// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
1073// ModOffset
1074static std::pair<APInt, APInt>
1076 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1077 std::optional<APInt> Stride;
1078 APInt ModOffset(BW, 0);
1079 // Return a minimum gep stride, greatest common divisor of consective gep
1080 // index scales(c.f. Bézout's identity).
1081 while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
1083 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
1084 break;
1085
1086 for (auto [V, Scale] : VarOffsets) {
1087 // Only keep a power of two factor for non-inbounds
1088 if (!GEP->hasNoUnsignedSignedWrap())
1089 Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
1090
1091 if (!Stride)
1092 Stride = Scale;
1093 else
1094 Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
1095 }
1096
1097 PtrOp = GEP->getPointerOperand();
1098 }
1099
1100 // Check whether pointer arrives back at Global Variable via at least one GEP.
1101 // Even if it doesn't, we can check by alignment.
1102 if (!isa<GlobalVariable>(PtrOp) || !Stride)
1103 return {APInt(BW, 1), APInt(BW, 0)};
1104
1105 // In consideration of signed GEP indices, non-negligible offset become
1106 // remainder of division by minimum GEP stride.
1107 ModOffset = ModOffset.srem(*Stride);
1108 if (ModOffset.isNegative())
1109 ModOffset += *Stride;
1110
1111 return {*Stride, ModOffset};
1112}
1113
1114/// If C is a constant patterned array and all valid loaded results for given
1115/// alignment are same to a constant, return that constant.
1117 auto *LI = dyn_cast<LoadInst>(&I);
1118 if (!LI || LI->isVolatile())
1119 return false;
1120
1121 // We can only fold the load if it is from a constant global with definitive
1122 // initializer. Skip expensive logic if this is not the case.
1123 auto *PtrOp = LI->getPointerOperand();
1125 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
1126 return false;
1127
1128 // Bail for large initializers in excess of 4K to avoid too many scans.
1129 Constant *C = GV->getInitializer();
1130 uint64_t GVSize = DL.getTypeAllocSize(C->getType());
1131 if (!GVSize || 4096 < GVSize)
1132 return false;
1133
1134 Type *LoadTy = LI->getType();
1135 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1136 auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);
1137
1138 // Any possible offset could be multiple of GEP stride. And any valid
1139 // offset is multiple of load alignment, so checking only multiples of bigger
1140 // one is sufficient to say results' equality.
1141 if (auto LA = LI->getAlign();
1142 LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
1143 ConstOffset = APInt(BW, 0);
1144 Stride = APInt(BW, LA.value());
1145 }
1146
1147 Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
1148 if (!Ca)
1149 return false;
1150
1151 unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);
1152 for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)
1153 if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))
1154 return false;
1155
1156 I.replaceAllUsesWith(Ca);
1157
1158 return true;
1159}
1160
1161namespace {
1162class StrNCmpInliner {
1163public:
1164 StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,
1165 const DataLayout &DL)
1166 : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
1167
1168 bool optimizeStrNCmp();
1169
1170private:
1171 void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
1172
1173 CallInst *CI;
1174 LibFunc Func;
1175 DomTreeUpdater *DTU;
1176 const DataLayout &DL;
1177};
1178
1179} // namespace
1180
1181/// First we normalize calls to strncmp/strcmp to the form of
1182/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
1183/// (without considering '\0').
1184///
1185/// Examples:
1186///
1187/// \code
1188/// strncmp(s, "a", 3) -> compare(s, "a", 2)
1189/// strncmp(s, "abc", 3) -> compare(s, "abc", 3)
1190/// strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
1191/// strcmp(s, "a") -> compare(s, "a", 2)
1192///
1193/// char s2[] = {'a'}
1194/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1195///
1196/// char s2[] = {'a', 'b', 'c', 'd'}
1197/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1198/// \endcode
1199///
1200/// We only handle cases where N and exactly one of s1 and s2 are constant.
1201/// Cases that s1 and s2 are both constant are already handled by the
1202/// instcombine pass.
1203///
1204/// We do not handle cases where N > StrNCmpInlineThreshold.
1205///
1206/// We also do not handles cases where N < 2, which are already
1207/// handled by the instcombine pass.
1208///
1209bool StrNCmpInliner::optimizeStrNCmp() {
1210 if (StrNCmpInlineThreshold < 2)
1211 return false;
1212
1214 return false;
1215
1216 Value *Str1P = CI->getArgOperand(0);
1217 Value *Str2P = CI->getArgOperand(1);
1218 // Should be handled elsewhere.
1219 if (Str1P == Str2P)
1220 return false;
1221
1222 StringRef Str1, Str2;
1223 bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);
1224 bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);
1225 if (HasStr1 == HasStr2)
1226 return false;
1227
1228 // Note that '\0' and characters after it are not trimmed.
1229 StringRef Str = HasStr1 ? Str1 : Str2;
1230 Value *StrP = HasStr1 ? Str2P : Str1P;
1231
1232 size_t Idx = Str.find('\0');
1233 uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
1234 if (Func == LibFunc_strncmp) {
1235 if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
1236 N = std::min(N, ConstInt->getZExtValue());
1237 else
1238 return false;
1239 }
1240 // Now N means how many bytes we need to compare at most.
1241 if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
1242 return false;
1243
1244 // Cases where StrP has two or more dereferenceable bytes might be better
1245 // optimized elsewhere.
1246 bool CanBeNull = false, CanBeFreed = false;
1247 if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
1248 return false;
1249 inlineCompare(StrP, Str, N, HasStr1);
1250 return true;
1251}
1252
1253/// Convert
1254///
1255/// \code
1256/// ret = compare(s1, s2, N)
1257/// \endcode
1258///
1259/// into
1260///
1261/// \code
1262/// ret = (int)s1[0] - (int)s2[0]
1263/// if (ret != 0)
1264/// goto NE
1265/// ...
1266/// ret = (int)s1[N-2] - (int)s2[N-2]
1267/// if (ret != 0)
1268/// goto NE
1269/// ret = (int)s1[N-1] - (int)s2[N-1]
1270/// NE:
1271/// \endcode
1272///
1273/// CFG before and after the transformation:
1274///
1275/// (before)
1276/// BBCI
1277///
1278/// (after)
1279/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
1280/// | ^
1281/// E |
1282/// | |
1283/// BBSubs[1] (sub,icmp) --NE-----+
1284/// ... |
1285/// BBSubs[N-1] (sub) ---------+
1286///
1287void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
1288 bool Swapped) {
1289 auto &Ctx = CI->getContext();
1290 IRBuilder<> B(Ctx);
1291 // We want these instructions to be recognized as inlined instructions for the
1292 // compare call, but we don't have a source location for the definition of
1293 // that function, since we're generating that code now. Because the generated
1294 // code is a viable point for a memory access error, we make the pragmatic
1295 // choice here to directly use CI's location so that we have useful
1296 // attribution for the generated code.
1297 B.SetCurrentDebugLocation(CI->getDebugLoc());
1298
1299 BasicBlock *BBCI = CI->getParent();
1300 BasicBlock *BBTail =
1301 SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
1302
1304 for (uint64_t I = 0; I < N; ++I)
1305 BBSubs.push_back(
1306 BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));
1307 BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
1308
1309 cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
1310
1311 B.SetInsertPoint(BBNE);
1312 PHINode *Phi = B.CreatePHI(CI->getType(), N);
1313 B.CreateBr(BBTail);
1314
1315 Value *Base = LHS;
1316 for (uint64_t i = 0; i < N; ++i) {
1317 B.SetInsertPoint(BBSubs[i]);
1318 Value *VL =
1319 B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
1320 B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
1321 CI->getType());
1322 Value *VR =
1323 ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));
1324 Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
1325 if (i < N - 1) {
1326 BranchInst *CondBrInst = B.CreateCondBr(
1327 B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)), BBNE,
1328 BBSubs[i + 1]);
1329
1330 Function *F = CI->getFunction();
1331 assert(F && "Instruction does not belong to a function!");
1332 std::optional<Function::ProfileCount> EC = F->getEntryCount();
1333 if (EC && EC->getCount() > 0)
1335 } else {
1336 B.CreateBr(BBNE);
1337 }
1338
1339 Phi->addIncoming(Sub, BBSubs[i]);
1340 }
1341
1342 CI->replaceAllUsesWith(Phi);
1343 CI->eraseFromParent();
1344
1345 if (DTU) {
1347 Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});
1348 for (uint64_t i = 0; i < N; ++i) {
1349 if (i < N - 1)
1350 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
1351 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
1352 }
1353 Updates.push_back({DominatorTree::Insert, BBNE, BBTail});
1354 Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
1355 DTU->applyUpdates(Updates);
1356 }
1357}
1358
1359/// Convert memchr with a small constant string into a switch
1361 const DataLayout &DL) {
1362 if (isa<Constant>(Call->getArgOperand(1)))
1363 return false;
1364
1365 StringRef Str;
1366 Value *Base = Call->getArgOperand(0);
1367 if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))
1368 return false;
1369
1370 uint64_t N = Str.size();
1371 if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
1372 uint64_t Val = ConstInt->getZExtValue();
1373 // Ignore the case that n is larger than the size of string.
1374 if (Val > N)
1375 return false;
1376 N = Val;
1377 } else
1378 return false;
1379
1381 return false;
1382
1383 BasicBlock *BB = Call->getParent();
1384 BasicBlock *BBNext = SplitBlock(BB, Call, DTU);
1385 IRBuilder<> IRB(BB);
1386 IRB.SetCurrentDebugLocation(Call->getDebugLoc());
1387 IntegerType *ByteTy = IRB.getInt8Ty();
1389 SwitchInst *SI = IRB.CreateSwitch(
1390 IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
1391 // We can't know the precise weights here, as they would depend on the value
1392 // distribution of Call->getArgOperand(1). So we just mark it as "unknown".
1394 Type *IndexTy = DL.getIndexType(Call->getType());
1396
1397 BasicBlock *BBSuccess = BasicBlock::Create(
1398 Call->getContext(), "memchr.success", BB->getParent(), BBNext);
1399 IRB.SetInsertPoint(BBSuccess);
1400 PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");
1401 Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);
1402 IRB.CreateBr(BBNext);
1403 if (DTU)
1404 Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});
1405
1407 for (uint64_t I = 0; I < N; ++I) {
1408 ConstantInt *CaseVal =
1409 ConstantInt::get(ByteTy, static_cast<unsigned char>(Str[I]));
1410 if (!Cases.insert(CaseVal).second)
1411 continue;
1412
1413 BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",
1414 BB->getParent(), BBSuccess);
1415 SI->addCase(CaseVal, BBCase);
1416 IRB.SetInsertPoint(BBCase);
1417 IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);
1418 IRB.CreateBr(BBSuccess);
1419 if (DTU) {
1420 Updates.push_back({DominatorTree::Insert, BB, BBCase});
1421 Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});
1422 }
1423 }
1424
1425 PHINode *PHI =
1426 PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());
1427 PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);
1428 PHI->addIncoming(FirstOccursLocation, BBSuccess);
1429
1430 Call->replaceAllUsesWith(PHI);
1431 Call->eraseFromParent();
1432
1433 if (DTU)
1434 DTU->applyUpdates(Updates);
1435
1436 return true;
1437}
1438
1441 DominatorTree &DT, const DataLayout &DL,
1442 bool &MadeCFGChange) {
1443
1444 auto *CI = dyn_cast<CallInst>(&I);
1445 if (!CI || CI->isNoBuiltin())
1446 return false;
1447
1448 Function *CalledFunc = CI->getCalledFunction();
1449 if (!CalledFunc)
1450 return false;
1451
1452 LibFunc LF;
1453 if (!TLI.getLibFunc(*CalledFunc, LF) ||
1454 !isLibFuncEmittable(CI->getModule(), &TLI, LF))
1455 return false;
1456
1457 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
1458
1459 switch (LF) {
1460 case LibFunc_sqrt:
1461 case LibFunc_sqrtf:
1462 case LibFunc_sqrtl:
1463 return foldSqrt(CI, LF, TTI, TLI, AC, DT);
1464 case LibFunc_strcmp:
1465 case LibFunc_strncmp:
1466 if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {
1467 MadeCFGChange = true;
1468 return true;
1469 }
1470 break;
1471 case LibFunc_memchr:
1472 if (foldMemChr(CI, &DTU, DL)) {
1473 MadeCFGChange = true;
1474 return true;
1475 }
1476 break;
1477 default:;
1478 }
1479 return false;
1480}
1481
1482/// Match high part of long multiplication.
1483///
1484/// Considering a multiply made up of high and low parts, we can split the
1485/// multiply into:
1486/// x * y == (xh*T + xl) * (yh*T + yl)
1487/// where xh == x>>32 and xl == x & 0xffffffff. T = 2^32.
1488/// This expands to
1489/// xh*yh*T*T + xh*yl*T + xl*yh*T + xl*yl
1490/// which can be drawn as
1491/// [ xh*yh ]
1492/// [ xh*yl ]
1493/// [ xl*yh ]
1494/// [ xl*yl ]
1495/// We are looking for the "high" half, which is xh*yh + xh*yl>>32 + xl*yh>>32 +
1496/// some carrys. The carry makes this difficult and there are multiple ways of
1497/// representing it. The ones we attempt to support here are:
1498/// Carry: xh*yh + carry + lowsum
1499/// carry = lowsum < xh*yl ? 0x1000000 : 0
1500/// lowsum = xh*yl + xl*yh + (xl*yl>>32)
1501/// Ladder: xh*yh + c2>>32 + c3>>32
1502/// c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh
1503/// or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xl*yh
1504/// Carry4: xh*yh + carry + crosssum>>32 + (xl*yl + crosssum&0xffffffff) >> 32
1505/// crosssum = xh*yl + xl*yh
1506/// carry = crosssum < xh*yl ? 0x1000000 : 0
1507/// Ladder4: xh*yh + (xl*yh)>>32 + (xh*yl)>>32 + low>>32;
1508/// low = (xl*yl)>>32 + (xl*yh)&0xffffffff + (xh*yl)&0xffffffff
1509///
1510/// They all start by matching xh*yh + 2 or 3 other operands. The bottom of the
1511/// tree is xh*yh, xh*yl, xl*yh and xl*yl.
1513 Type *Ty = I.getType();
1514 if (!Ty->isIntOrIntVectorTy())
1515 return false;
1516
1517 unsigned BitWidth = Ty->getScalarSizeInBits();
1519 if (BitWidth % 2 != 0)
1520 return false;
1521
1522 auto CreateMulHigh = [&](Value *X, Value *Y) {
1523 IRBuilder<> Builder(&I);
1524 Type *NTy = Ty->getWithNewBitWidth(BitWidth * 2);
1525 Value *XExt = Builder.CreateZExt(X, NTy);
1526 Value *YExt = Builder.CreateZExt(Y, NTy);
1527 Value *Mul = Builder.CreateMul(XExt, YExt, "", /*HasNUW=*/true);
1528 Value *High = Builder.CreateLShr(Mul, BitWidth);
1529 Value *Res = Builder.CreateTrunc(High, Ty, "", /*HasNUW=*/true);
1530 Res->takeName(&I);
1531 I.replaceAllUsesWith(Res);
1532 LLVM_DEBUG(dbgs() << "Created long multiply from parts of " << *X << " and "
1533 << *Y << "\n");
1534 return true;
1535 };
1536
1537 // Common check routines for X_lo*Y_lo and X_hi*Y_lo
1538 auto CheckLoLo = [&](Value *XlYl, Value *X, Value *Y) {
1539 return match(XlYl, m_c_Mul(m_And(m_Specific(X), m_SpecificInt(LowMask)),
1540 m_And(m_Specific(Y), m_SpecificInt(LowMask))));
1541 };
1542 auto CheckHiLo = [&](Value *XhYl, Value *X, Value *Y) {
1543 return match(XhYl,
1545 m_And(m_Specific(Y), m_SpecificInt(LowMask))));
1546 };
1547
1548 auto FoldMulHighCarry = [&](Value *X, Value *Y, Instruction *Carry,
1549 Instruction *B) {
1550 // Looking for LowSum >> 32 and carry (select)
1551 if (Carry->getOpcode() != Instruction::Select)
1552 std::swap(Carry, B);
1553
1554 // Carry = LowSum < XhYl ? 0x100000000 : 0
1555 Value *LowSum, *XhYl;
1556 if (!match(Carry,
1559 m_Value(XhYl))),
1561 m_Zero()))))
1562 return false;
1563
1564 // XhYl can be Xh*Yl or Xl*Yh
1565 if (!CheckHiLo(XhYl, X, Y)) {
1566 if (CheckHiLo(XhYl, Y, X))
1567 std::swap(X, Y);
1568 else
1569 return false;
1570 }
1571 if (XhYl->hasNUsesOrMore(3))
1572 return false;
1573
1574 // B = LowSum >> 32
1575 if (!match(B, m_OneUse(m_LShr(m_Specific(LowSum),
1576 m_SpecificInt(BitWidth / 2)))) ||
1577 LowSum->hasNUsesOrMore(3))
1578 return false;
1579
1580 // LowSum = XhYl + XlYh + XlYl>>32
1581 Value *XlYh, *XlYl;
1582 auto XlYlHi = m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2));
1583 if (!match(LowSum,
1584 m_c_Add(m_Specific(XhYl),
1585 m_OneUse(m_c_Add(m_OneUse(m_Value(XlYh)), XlYlHi)))) &&
1586 !match(LowSum, m_c_Add(m_OneUse(m_Value(XlYh)),
1587 m_OneUse(m_c_Add(m_Specific(XhYl), XlYlHi)))) &&
1588 !match(LowSum,
1589 m_c_Add(XlYlHi, m_OneUse(m_c_Add(m_Specific(XhYl),
1590 m_OneUse(m_Value(XlYh)))))))
1591 return false;
1592
1593 // Check XlYl and XlYh
1594 if (!CheckLoLo(XlYl, X, Y))
1595 return false;
1596 if (!CheckHiLo(XlYh, Y, X))
1597 return false;
1598
1599 return CreateMulHigh(X, Y);
1600 };
1601
1602 auto FoldMulHighLadder = [&](Value *X, Value *Y, Instruction *A,
1603 Instruction *B) {
1604 // xh*yh + c2>>32 + c3>>32
1605 // c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh
1606 // or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xh*yl
1607 Value *XlYh, *XhYl, *XlYl, *C2, *C3;
1608 // Strip off the two expected shifts.
1609 if (!match(A, m_LShr(m_Value(C2), m_SpecificInt(BitWidth / 2))) ||
1611 return false;
1612
1613 if (match(C3, m_c_Add(m_Add(m_Value(), m_Value()), m_Value())))
1614 std::swap(C2, C3);
1615 // Try to match c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32)
1616 if (match(C2,
1618 m_Value(XlYh)),
1619 m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2)))) ||
1621 m_LShr(m_Value(XlYl),
1622 m_SpecificInt(BitWidth / 2))),
1623 m_Value(XlYh))) ||
1625 m_SpecificInt(BitWidth / 2)),
1626 m_Value(XlYh)),
1627 m_And(m_Specific(C3), m_SpecificInt(LowMask))))) {
1628 XhYl = C3;
1629 } else {
1630 // Match c3 = c2&0xffffffff + xl*yh
1631 if (!match(C3, m_c_Add(m_And(m_Specific(C2), m_SpecificInt(LowMask)),
1632 m_Value(XlYh))))
1633 std::swap(C2, C3);
1634 if (!match(C3, m_c_Add(m_OneUse(
1635 m_And(m_Specific(C2), m_SpecificInt(LowMask))),
1636 m_Value(XlYh))) ||
1637 !C3->hasOneUse() || C2->hasNUsesOrMore(3))
1638 return false;
1639
1640 // Match c2 = xh*yl + (xl*yl >> 32)
1641 if (!match(C2, m_c_Add(m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2)),
1642 m_Value(XhYl))))
1643 return false;
1644 }
1645
1646 // Match XhYl and XlYh - they can appear either way around.
1647 if (!CheckHiLo(XlYh, Y, X))
1648 std::swap(XlYh, XhYl);
1649 if (!CheckHiLo(XlYh, Y, X))
1650 return false;
1651 if (!CheckHiLo(XhYl, X, Y))
1652 return false;
1653 if (!CheckLoLo(XlYl, X, Y))
1654 return false;
1655
1656 return CreateMulHigh(X, Y);
1657 };
1658
1659 auto FoldMulHighLadder4 = [&](Value *X, Value *Y, Instruction *A,
1661 /// Ladder4: xh*yh + (xl*yh)>>32 + (xh+yl)>>32 + low>>32;
1662 /// low = (xl*yl)>>32 + (xl*yh)&0xffffffff + (xh*yl)&0xffffffff
1663
1664 // Find A = Low >> 32 and B/C = XhYl>>32, XlYh>>32.
1665 auto ShiftAdd =
1667 if (!match(A, ShiftAdd))
1668 std::swap(A, B);
1669 if (!match(A, ShiftAdd))
1670 std::swap(A, C);
1671 Value *Low;
1673 return false;
1674
1675 // Match B == XhYl>>32 and C == XlYh>>32
1676 Value *XhYl, *XlYh;
1677 if (!match(B, m_LShr(m_Value(XhYl), m_SpecificInt(BitWidth / 2))) ||
1678 !match(C, m_LShr(m_Value(XlYh), m_SpecificInt(BitWidth / 2))))
1679 return false;
1680 if (!CheckHiLo(XhYl, X, Y))
1681 std::swap(XhYl, XlYh);
1682 if (!CheckHiLo(XhYl, X, Y) || XhYl->hasNUsesOrMore(3))
1683 return false;
1684 if (!CheckHiLo(XlYh, Y, X) || XlYh->hasNUsesOrMore(3))
1685 return false;
1686
1687 // Match Low as XlYl>>32 + XhYl&0xffffffff + XlYh&0xffffffff
1688 Value *XlYl;
1689 if (!match(
1690 Low,
1691 m_c_Add(
1693 m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))),
1694 m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))))),
1695 m_OneUse(
1696 m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))) &&
1697 !match(
1698 Low,
1699 m_c_Add(
1701 m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))),
1702 m_OneUse(
1703 m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))),
1704 m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))))) &&
1705 !match(
1706 Low,
1707 m_c_Add(
1709 m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))),
1710 m_OneUse(
1711 m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))),
1712 m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))))))
1713 return false;
1714 if (!CheckLoLo(XlYl, X, Y))
1715 return false;
1716
1717 return CreateMulHigh(X, Y);
1718 };
1719
1720 auto FoldMulHighCarry4 = [&](Value *X, Value *Y, Instruction *Carry,
1722 // xh*yh + carry + crosssum>>32 + (xl*yl + crosssum&0xffffffff) >> 32
1723 // crosssum = xh*yl+xl*yh
1724 // carry = crosssum < xh*yl ? 0x1000000 : 0
1725 if (Carry->getOpcode() != Instruction::Select)
1726 std::swap(Carry, B);
1727 if (Carry->getOpcode() != Instruction::Select)
1728 std::swap(Carry, C);
1729
1730 // Carry = CrossSum < XhYl ? 0x100000000 : 0
1731 Value *CrossSum, *XhYl;
1732 if (!match(Carry,
1735 m_Value(CrossSum), m_Value(XhYl))),
1737 m_Zero()))))
1738 return false;
1739
1740 if (!match(B, m_LShr(m_Specific(CrossSum), m_SpecificInt(BitWidth / 2))))
1741 std::swap(B, C);
1742 if (!match(B, m_LShr(m_Specific(CrossSum), m_SpecificInt(BitWidth / 2))))
1743 return false;
1744
1745 Value *XlYl, *LowAccum;
1746 if (!match(C, m_LShr(m_Value(LowAccum), m_SpecificInt(BitWidth / 2))) ||
1747 !match(LowAccum, m_c_Add(m_OneUse(m_LShr(m_Value(XlYl),
1748 m_SpecificInt(BitWidth / 2))),
1749 m_OneUse(m_And(m_Specific(CrossSum),
1750 m_SpecificInt(LowMask))))) ||
1751 LowAccum->hasNUsesOrMore(3))
1752 return false;
1753 if (!CheckLoLo(XlYl, X, Y))
1754 return false;
1755
1756 if (!CheckHiLo(XhYl, X, Y))
1757 std::swap(X, Y);
1758 if (!CheckHiLo(XhYl, X, Y))
1759 return false;
1760 Value *XlYh;
1761 if (!match(CrossSum, m_c_Add(m_Specific(XhYl), m_OneUse(m_Value(XlYh)))) ||
1762 !CheckHiLo(XlYh, Y, X) || CrossSum->hasNUsesOrMore(4) ||
1763 XhYl->hasNUsesOrMore(3))
1764 return false;
1765
1766 return CreateMulHigh(X, Y);
1767 };
1768
1769 // X and Y are the two inputs, A, B and C are other parts of the pattern
1770 // (crosssum>>32, carry, etc).
1771 Value *X, *Y;
1772 Instruction *A, *B, *C;
1773 auto HiHi = m_OneUse(m_Mul(m_LShr(m_Value(X), m_SpecificInt(BitWidth / 2)),
1775 if ((match(&I, m_c_Add(HiHi, m_OneUse(m_Add(m_Instruction(A),
1776 m_Instruction(B))))) ||
1778 m_OneUse(m_c_Add(HiHi, m_Instruction(B)))))) &&
1779 A->hasOneUse() && B->hasOneUse())
1780 if (FoldMulHighCarry(X, Y, A, B) || FoldMulHighLadder(X, Y, A, B))
1781 return true;
1782
1783 if ((match(&I, m_c_Add(HiHi, m_OneUse(m_c_Add(
1786 m_Instruction(C))))))) ||
1790 m_Instruction(C))))))) ||
1794 m_OneUse(m_c_Add(HiHi, m_Instruction(C))))))) ||
1795 match(&I,
1798 A->hasOneUse() && B->hasOneUse() && C->hasOneUse())
1799 return FoldMulHighCarry4(X, Y, A, B, C) ||
1800 FoldMulHighLadder4(X, Y, A, B, C);
1801
1802 return false;
1803}
1804
1805/// This is the entry point for folds that could be implemented in regular
1806/// InstCombine, but they are separated because they are not expected to
1807/// occur frequently and/or have more than a constant-length pattern match.
1811 AssumptionCache &AC, bool &MadeCFGChange) {
1812 bool MadeChange = false;
1813 for (BasicBlock &BB : F) {
1814 // Ignore unreachable basic blocks.
1815 if (!DT.isReachableFromEntry(&BB))
1816 continue;
1817
1818 const DataLayout &DL = F.getDataLayout();
1819
1820 // Walk the block backwards for efficiency. We're matching a chain of
1821 // use->defs, so we're more likely to succeed by starting from the bottom.
1822 // Also, we want to avoid matching partial patterns.
1823 // TODO: It would be more efficient if we removed dead instructions
1824 // iteratively in this loop rather than waiting until the end.
1826 MadeChange |= foldAnyOrAllBitsSet(I);
1827 MadeChange |= foldGuardedFunnelShift(I, DT);
1828 MadeChange |= tryToRecognizePopCount(I);
1829 MadeChange |= tryToFPToSat(I, TTI);
1830 MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
1831 MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
1832 MadeChange |= foldPatternedLoads(I, DL);
1833 MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);
1834 MadeChange |= foldMulHigh(I);
1835 // NOTE: This function introduces erasing of the instruction `I`, so it
1836 // needs to be called at the end of this sequence, otherwise we may make
1837 // bugs.
1838 MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
1839 }
1840
1841 // Do this separately to avoid redundantly scanning stores multiple times.
1842 MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);
1843 }
1844
1845 // We're done with transforms, so remove dead instructions.
1846 if (MadeChange)
1847 for (BasicBlock &BB : F)
1849
1850 return MadeChange;
1851}
1852
1853/// This is the entry point for all transforms. Pass manager differences are
1854/// handled in the callers of this function.
1857 AliasAnalysis &AA, bool &MadeCFGChange) {
1858 bool MadeChange = false;
1859 const DataLayout &DL = F.getDataLayout();
1860 TruncInstCombine TIC(AC, TLI, DL, DT);
1861 MadeChange |= TIC.run(F);
1862 MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);
1863 return MadeChange;
1864}
1865
1868 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1869 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1870 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1871 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1872 auto &AA = AM.getResult<AAManager>(F);
1873 bool MadeCFGChange = false;
1874 if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
1875 // No changes, all analyses are preserved.
1876 return PreservedAnalyses::all();
1877 }
1878 // Mark all the analyses that instcombine updates as preserved.
1880 if (MadeCFGChange)
1882 else
1884 return PA;
1885}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool tryToRecognizePopCount(Instruction &I)
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
static cl::opt< unsigned > MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string to " "inline a memchr call."))
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
static cl::opt< unsigned > StrNCmpInlineThreshold("strncmp-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string for a builtin string cmp " "call eligible for inlining. The default value is 3."))
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, const DataLayout &DL)
Convert memchr with a small constant string into a switch.
static Value * optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder)
Combine away instructions providing they are still equivalent when compared against 0.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL)
static bool mergePartStores(SmallVectorImpl< PartStore > &Parts, const DataLayout &DL, TargetTransformInfo &TTI)
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA, bool IsRoot=false)
static bool mergeConsecutivePartStores(ArrayRef< PartStore > Parts, unsigned Width, const DataLayout &DL, TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift, const APInt &AndMask, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
static std::optional< PartStore > matchPartStore(Instruction &I, const DataLayout &DL)
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, const DataLayout &DL, bool &MadeCFGChange)
static bool foldMulHigh(Instruction &I)
Match high part of long multiplication.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, AssumptionCache &AC, bool &MadeCFGChange)
This is the entry point for folds that could be implemented in regular InstCombine,...
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
static MaybeAlign getAlign(Value *Ptr)
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t High
This file contains the declarations for profiling metadata utility functions.
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
BinaryOperator * Mul
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1747
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1131
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2465
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2039
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2012
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2762
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isSimple() const
static LocationSize precise(uint64_t Value)
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
size_type size() const
Definition MapVector.h:56
std::pair< KeyT, ValueT > & front()
Definition MapVector.h:79
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_RecipThroughput
Reciprocal throughput.
@ None
The cast is not used with a load/store of any kind.
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition Value.cpp:158
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:888
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
Changed
#define UINT64_MAX
Definition DataTypes.h:77
Abstract Attribute helper functions.
Definition Attributor.h:165
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:798
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::LShr > m_LShrOrSelf(const LHS &L, uint64_t &R)
Matches lshr L, ConstShAmt or L itself (R will be set to zero in this case).
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::Shl > m_ShlOrSelf(const LHS &L, uint64_t &R)
Matches shl L, ConstShAmt or L itself (R will be set to zero in this case).
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isOnlyUsedInZeroComparison(const Instruction *CxtI)
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:721
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName)
Specify that the branch weights for this terminator cannot be known at compile time.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Sub
Subtraction of integers.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition Metadata.cpp:64
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
LoadInst * RootInsert
ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
bool operator<(const PartStore &Other) const
bool isCompatibleWith(const PartStore &Other) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276