LLVM 22.0.0git
AggressiveInstCombine.cpp
Go to the documentation of this file.
1//===- AggressiveInstCombine.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the aggressive expression pattern combiner classes.
10// Currently, it handles expression patterns for:
11// * Truncate instruction
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
36
37using namespace llvm;
38using namespace PatternMatch;
39
40#define DEBUG_TYPE "aggressive-instcombine"
41
42STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
43STATISTIC(NumGuardedRotates,
44 "Number of guarded rotates transformed into funnel shifts");
45STATISTIC(NumGuardedFunnelShifts,
46 "Number of guarded funnel shifts transformed into funnel shifts");
47STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
48
50 "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
51 cl::desc("Max number of instructions to scan for aggressive instcombine."));
52
54 "strncmp-inline-threshold", cl::init(3), cl::Hidden,
55 cl::desc("The maximum length of a constant string for a builtin string cmp "
56 "call eligible for inlining. The default value is 3."));
57
59 MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
60 cl::desc("The maximum length of a constant string to "
61 "inline a memchr call."));
62
63/// Match a pattern for a bitwise funnel/rotate operation that partially guards
64/// against undefined behavior by branching around the funnel-shift/rotation
65/// when the shift amount is 0.
67 if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
68 return false;
69
70 // As with the one-use checks below, this is not strictly necessary, but we
71 // are being cautious to avoid potential perf regressions on targets that
72 // do not actually have a funnel/rotate instruction (where the funnel shift
73 // would be expanded back into math/shift/logic ops).
74 if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
75 return false;
76
77 // Match V to funnel shift left/right and capture the source operands and
78 // shift amount.
79 auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
80 Value *&ShAmt) {
81 unsigned Width = V->getType()->getScalarSizeInBits();
82
83 // fshl(ShVal0, ShVal1, ShAmt)
84 // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
85 if (match(V, m_OneUse(m_c_Or(
86 m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
87 m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),
88 m_Deferred(ShAmt))))))) {
89 return Intrinsic::fshl;
90 }
91
92 // fshr(ShVal0, ShVal1, ShAmt)
93 // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
94 if (match(V,
96 m_Value(ShAmt))),
97 m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
98 return Intrinsic::fshr;
99 }
100
102 };
103
104 // One phi operand must be a funnel/rotate operation, and the other phi
105 // operand must be the source value of that funnel/rotate operation:
106 // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
107 // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
108 // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
109 PHINode &Phi = cast<PHINode>(I);
110 unsigned FunnelOp = 0, GuardOp = 1;
111 Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
112 Value *ShVal0, *ShVal1, *ShAmt;
113 Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
114 if (IID == Intrinsic::not_intrinsic ||
115 (IID == Intrinsic::fshl && ShVal0 != P1) ||
116 (IID == Intrinsic::fshr && ShVal1 != P1)) {
117 IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
118 if (IID == Intrinsic::not_intrinsic ||
119 (IID == Intrinsic::fshl && ShVal0 != P0) ||
120 (IID == Intrinsic::fshr && ShVal1 != P0))
121 return false;
122 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
123 "Pattern must match funnel shift left or right");
124 std::swap(FunnelOp, GuardOp);
125 }
126
127 // The incoming block with our source operand must be the "guard" block.
128 // That must contain a cmp+branch to avoid the funnel/rotate when the shift
129 // amount is equal to 0. The other incoming block is the block with the
130 // funnel/rotate.
131 BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
132 BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
133 Instruction *TermI = GuardBB->getTerminator();
134
135 // Ensure that the shift values dominate each block.
136 if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
137 return false;
138
139 BasicBlock *PhiBB = Phi.getParent();
141 m_ZeroInt()),
142 m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))
143 return false;
144
145 IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
146
147 if (ShVal0 == ShVal1)
148 ++NumGuardedRotates;
149 else
150 ++NumGuardedFunnelShifts;
151
152 // If this is not a rotate then the select was blocking poison from the
153 // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
154 bool IsFshl = IID == Intrinsic::fshl;
155 if (ShVal0 != ShVal1) {
156 if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
157 ShVal1 = Builder.CreateFreeze(ShVal1);
158 else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
159 ShVal0 = Builder.CreateFreeze(ShVal0);
160 }
161
162 // We matched a variation of this IR pattern:
163 // GuardBB:
164 // %cmp = icmp eq i32 %ShAmt, 0
165 // br i1 %cmp, label %PhiBB, label %FunnelBB
166 // FunnelBB:
167 // %sub = sub i32 32, %ShAmt
168 // %shr = lshr i32 %ShVal1, %sub
169 // %shl = shl i32 %ShVal0, %ShAmt
170 // %fsh = or i32 %shr, %shl
171 // br label %PhiBB
172 // PhiBB:
173 // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
174 // -->
175 // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
176 Phi.replaceAllUsesWith(
177 Builder.CreateIntrinsic(IID, Phi.getType(), {ShVal0, ShVal1, ShAmt}));
178 return true;
179}
180
181/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
182/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
183/// of 'and' ops, then we also need to capture the fact that we saw an
184/// "and X, 1", so that's an extra return value for that case.
185namespace {
186struct MaskOps {
187 Value *Root = nullptr;
188 APInt Mask;
189 bool MatchAndChain;
190 bool FoundAnd1 = false;
191
192 MaskOps(unsigned BitWidth, bool MatchAnds)
193 : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
194};
195} // namespace
196
197/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
198/// chain of 'and' or 'or' instructions looking for shift ops of a common source
199/// value. Examples:
200/// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
201/// returns { X, 0x129 }
202/// and (and (X >> 1), 1), (X >> 4)
203/// returns { X, 0x12 }
204static bool matchAndOrChain(Value *V, MaskOps &MOps) {
205 Value *Op0, *Op1;
206 if (MOps.MatchAndChain) {
207 // Recurse through a chain of 'and' operands. This requires an extra check
208 // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere
209 // in the chain to know that all of the high bits are cleared.
210 if (match(V, m_And(m_Value(Op0), m_One()))) {
211 MOps.FoundAnd1 = true;
212 return matchAndOrChain(Op0, MOps);
213 }
214 if (match(V, m_And(m_Value(Op0), m_Value(Op1))))
215 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
216 } else {
217 // Recurse through a chain of 'or' operands.
218 if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
219 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
220 }
221
222 // We need a shift-right or a bare value representing a compare of bit 0 of
223 // the original source operand.
224 Value *Candidate;
225 const APInt *BitIndex = nullptr;
226 if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))
227 Candidate = V;
228
229 // Initialize result source operand.
230 if (!MOps.Root)
231 MOps.Root = Candidate;
232
233 // The shift constant is out-of-range? This code hasn't been simplified.
234 if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))
235 return false;
236
237 // Fill in the mask bit derived from the shift constant.
238 MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);
239 return MOps.Root == Candidate;
240}
241
242/// Match patterns that correspond to "any-bits-set" and "all-bits-set".
243/// These will include a chain of 'or' or 'and'-shifted bits from a
244/// common source value:
245/// and (or (lshr X, C), ...), 1 --> (X & CMask) != 0
246/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
247/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
248/// that differ only with a final 'not' of the result. We expect that final
249/// 'not' to be folded with the compare that we create here (invert predicate).
251 // The 'any-bits-set' ('or' chain) pattern is simpler to match because the
252 // final "and X, 1" instruction must be the final op in the sequence.
253 bool MatchAllBitsSet;
255 MatchAllBitsSet = true;
256 else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
257 MatchAllBitsSet = false;
258 else
259 return false;
260
261 MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
262 if (MatchAllBitsSet) {
263 if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1)
264 return false;
265 } else {
266 if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps))
267 return false;
268 }
269
270 // The pattern was found. Create a masked compare that replaces all of the
271 // shift and logic ops.
272 IRBuilder<> Builder(&I);
273 Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
274 Value *And = Builder.CreateAnd(MOps.Root, Mask);
275 Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
276 : Builder.CreateIsNotNull(And);
277 Value *Zext = Builder.CreateZExt(Cmp, I.getType());
278 I.replaceAllUsesWith(Zext);
279 ++NumAnyOrAllBitsSet;
280 return true;
281}
282
283// Try to recognize below function as popcount intrinsic.
284// This is the "best" algorithm from
285// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
286// Also used in TargetLowering::expandCTPOP().
287//
288// int popcount(unsigned int i) {
289// i = i - ((i >> 1) & 0x55555555);
290// i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
291// i = ((i + (i >> 4)) & 0x0F0F0F0F);
292// return (i * 0x01010101) >> 24;
293// }
295 if (I.getOpcode() != Instruction::LShr)
296 return false;
297
298 Type *Ty = I.getType();
299 if (!Ty->isIntOrIntVectorTy())
300 return false;
301
302 unsigned Len = Ty->getScalarSizeInBits();
303 // FIXME: fix Len == 8 and other irregular type lengths.
304 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
305 return false;
306
307 APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
308 APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
309 APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
310 APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
311 APInt MaskShift = APInt(Len, Len - 8);
312
313 Value *Op0 = I.getOperand(0);
314 Value *Op1 = I.getOperand(1);
315 Value *MulOp0;
316 // Matching "(i * 0x01010101...) >> 24".
317 if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
319 Value *ShiftOp0;
320 // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
321 if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
322 m_Deferred(ShiftOp0)),
323 m_SpecificInt(Mask0F)))) {
324 Value *AndOp0;
325 // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
326 if (match(ShiftOp0,
327 m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
329 m_SpecificInt(Mask33))))) {
330 Value *Root, *SubOp1;
331 // Matching "i - ((i >> 1) & 0x55555555...)".
332 const APInt *AndMask;
333 if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
334 match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
335 m_APInt(AndMask)))) {
336 auto CheckAndMask = [&]() {
337 if (*AndMask == Mask55)
338 return true;
339
340 // Exact match failed, see if any bits are known to be 0 where we
341 // expect a 1 in the mask.
342 if (!AndMask->isSubsetOf(Mask55))
343 return false;
344
345 APInt NeededMask = Mask55 & ~*AndMask;
346 return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
347 NeededMask,
348 SimplifyQuery(I.getDataLayout()));
349 };
350
351 if (CheckAndMask()) {
352 LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
353 IRBuilder<> Builder(&I);
354 I.replaceAllUsesWith(
355 Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
356 ++NumPopCountRecognized;
357 return true;
358 }
359 }
360 }
361 }
362 }
363
364 return false;
365}
366
367/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
368/// C2 saturate the value of the fp conversion. The transform is not reversable
369/// as the fptosi.sat is more defined than the input - all values produce a
370/// valid value for the fptosi.sat, where as some produce poison for original
371/// that were out of range of the integer conversion. The reversed pattern may
372/// use fmax and fmin instead. As we cannot directly reverse the transform, and
373/// it is not always profitable, we make it conditional on the cost being
374/// reported as lower by TTI.
376 // Look for min(max(fptosi, converting to fptosi_sat.
377 Value *In;
378 const APInt *MinC, *MaxC;
380 m_APInt(MinC))),
381 m_APInt(MaxC))) &&
383 m_APInt(MaxC))),
384 m_APInt(MinC))))
385 return false;
386
387 // Check that the constants clamp a saturate.
388 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
389 return false;
390
391 Type *IntTy = I.getType();
392 Type *FpTy = In->getType();
393 Type *SatTy =
394 IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
395 if (auto *VecTy = dyn_cast<VectorType>(IntTy))
396 SatTy = VectorType::get(SatTy, VecTy->getElementCount());
397
398 // Get the cost of the intrinsic, and check that against the cost of
399 // fptosi+smin+smax
400 InstructionCost SatCost = TTI.getIntrinsicInstrCost(
401 IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
403 SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,
406
407 InstructionCost MinMaxCost = TTI.getCastInstrCost(
408 Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
410 MinMaxCost += TTI.getIntrinsicInstrCost(
411 IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
413 MinMaxCost += TTI.getIntrinsicInstrCost(
414 IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
416
417 if (SatCost >= MinMaxCost)
418 return false;
419
420 IRBuilder<> Builder(&I);
421 Value *Sat =
422 Builder.CreateIntrinsic(Intrinsic::fptosi_sat, {SatTy, FpTy}, In);
423 I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
424 return true;
425}
426
427/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
428/// pessimistic codegen that has to account for setting errno and can enable
429/// vectorization.
432 DominatorTree &DT) {
433 // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
434 // (because NNAN or the operand arg must not be less than -0.0) and (2) we
435 // would not end up lowering to a libcall anyway (which could change the value
436 // of errno), then:
437 // (1) errno won't be set.
438 // (2) it is safe to convert this to an intrinsic call.
439 Type *Ty = Call->getType();
440 Value *Arg = Call->getArgOperand(0);
441 if (TTI.haveFastSqrt(Ty) &&
442 (Call->hasNoNaNs() ||
444 Arg, SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) {
445 IRBuilder<> Builder(Call);
446 Value *NewSqrt =
447 Builder.CreateIntrinsic(Intrinsic::sqrt, Ty, Arg, Call, "sqrt");
448 Call->replaceAllUsesWith(NewSqrt);
449
450 // Explicitly erase the old call because a call with side effects is not
451 // trivially dead.
452 Call->eraseFromParent();
453 return true;
454 }
455
456 return false;
457}
458
459// Check if this array of constants represents a cttz table.
460// Iterate over the elements from \p Table by trying to find/match all
461// the numbers from 0 to \p InputBits that should represent cttz results.
462static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,
463 const APInt &AndMask, Type *AccessTy,
464 unsigned InputBits, const APInt &GEPIdxFactor,
465 const DataLayout &DL) {
466 for (unsigned Idx = 0; Idx < InputBits; Idx++) {
467 APInt Index = (APInt(InputBits, 1).shl(Idx) * Mul).lshr(Shift) & AndMask;
469 ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
470 if (!C || C->getValue() != Idx)
471 return false;
472 }
473
474 return true;
475}
476
477// Try to recognize table-based ctz implementation.
478// E.g., an example in C (for more cases please see the llvm/tests):
479// int f(unsigned x) {
480// static const char table[32] =
481// {0, 1, 28, 2, 29, 14, 24, 3, 30,
482// 22, 20, 15, 25, 17, 4, 8, 31, 27,
483// 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
484// return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
485// }
486// this can be lowered to `cttz` instruction.
487// There is also a special case when the element is 0.
488//
489// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
490// sequence that contains each pattern of bits in it. The shift extracts
491// the top bits after the multiply, and that index into the table should
492// represent the number of trailing zeros in the original number.
493//
494// Here are some examples or LLVM IR for a 64-bit target:
495//
496// CASE 1:
497// %sub = sub i32 0, %x
498// %and = and i32 %sub, %x
499// %mul = mul i32 %and, 125613361
500// %shr = lshr i32 %mul, 27
501// %idxprom = zext i32 %shr to i64
502// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
503// i64 %idxprom
504// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
505//
506// CASE 2:
507// %sub = sub i32 0, %x
508// %and = and i32 %sub, %x
509// %mul = mul i32 %and, 72416175
510// %shr = lshr i32 %mul, 26
511// %idxprom = zext i32 %shr to i64
512// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
513// i64 0, i64 %idxprom
514// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
515//
516// CASE 3:
517// %sub = sub i32 0, %x
518// %and = and i32 %sub, %x
519// %mul = mul i32 %and, 81224991
520// %shr = lshr i32 %mul, 27
521// %idxprom = zext i32 %shr to i64
522// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
523// i64 0, i64 %idxprom
524// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
525//
526// CASE 4:
527// %sub = sub i64 0, %x
528// %and = and i64 %sub, %x
529// %mul = mul i64 %and, 283881067100198605
530// %shr = lshr i64 %mul, 58
531// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
532// i64 %shr
533// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
534//
535// All these can be lowered to @llvm.cttz.i32/64 intrinsics.
538 if (!LI)
539 return false;
540
541 Type *AccessType = LI->getType();
542 if (!AccessType->isIntegerTy())
543 return false;
544
546 if (!GEP || !GEP->hasNoUnsignedSignedWrap())
547 return false;
548
549 GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
550 if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())
551 return false;
552
553 unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
554 APInt ModOffset(BW, 0);
556 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||
557 VarOffsets.size() != 1 || ModOffset != 0)
558 return false;
559 auto [GepIdx, GEPScale] = VarOffsets.front();
560
561 Value *X1;
562 const APInt *MulConst, *ShiftConst, *AndCst = nullptr;
563 // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
564 // This might be extended to the pointer index type, and if the gep index type
565 // has been replaced with an i8 then a new And (and different ShiftConst) will
566 // be present.
567 auto MatchInner = m_LShr(
568 m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),
569 m_APInt(ShiftConst));
570 if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&
571 !match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))
572 return false;
573
574 unsigned InputBits = X1->getType()->getScalarSizeInBits();
575 if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)
576 return false;
577
578 if (!GEPScale.isIntN(InputBits) ||
579 !isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,
580 AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,
581 InputBits, GEPScale.zextOrTrunc(InputBits), DL))
582 return false;
583
584 ConstantInt *ZeroTableElem = cast<ConstantInt>(
585 ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));
586 bool DefinedForZero = ZeroTableElem->getZExtValue() == InputBits;
587
588 IRBuilder<> B(LI);
589 ConstantInt *BoolConst = B.getInt1(!DefinedForZero);
590 Type *XType = X1->getType();
591 auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
592 Value *ZExtOrTrunc = nullptr;
593
594 if (DefinedForZero) {
595 ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);
596 } else {
597 // If the value in elem 0 isn't the same as InputBits, we still want to
598 // produce the value from the table.
599 auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));
600 auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz);
601
602 // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
603 // it should be handled as: `cttz(x) & (typeSize - 1)`.
604
605 ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
606 }
607
608 LI->replaceAllUsesWith(ZExtOrTrunc);
609
610 return true;
611}
612
613/// This is used by foldLoadsRecursive() to capture a Root Load node which is
614/// of type or(load, load) and recursively build the wide load. Also capture the
615/// shift amount, zero extend type and loadSize.
625
626// Identify and Merge consecutive loads recursively which is of the form
627// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
628// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
629static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
630 AliasAnalysis &AA) {
631 uint64_t ShAmt2;
632 Value *X;
633 Instruction *L1, *L2;
634
635 // Go to the last node with loads.
636 if (match(V,
639 ShAmt2)))))) {
640 if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
641 // Avoid Partial chain merge.
642 return false;
643 } else
644 return false;
645
646 // Check if the pattern has loads
647 LoadInst *LI1 = LOps.Root;
648 uint64_t ShAmt1 = LOps.Shift;
649 if (LOps.FoundRoot == false &&
651 m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {
652 LI1 = dyn_cast<LoadInst>(L1);
653 }
654 LoadInst *LI2 = dyn_cast<LoadInst>(L2);
655
656 // Check if loads are same, atomic, volatile and having same address space.
657 if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
659 return false;
660
661 // Check if Loads come from same BB.
662 if (LI1->getParent() != LI2->getParent())
663 return false;
664
665 // Find the data layout
666 bool IsBigEndian = DL.isBigEndian();
667
668 // Check if loads are consecutive and same size.
669 Value *Load1Ptr = LI1->getPointerOperand();
670 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
671 Load1Ptr =
672 Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
673 /* AllowNonInbounds */ true);
674
675 Value *Load2Ptr = LI2->getPointerOperand();
676 APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
677 Load2Ptr =
678 Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
679 /* AllowNonInbounds */ true);
680
681 // Verify if both loads have same base pointers
682 uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
683 uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
684 if (Load1Ptr != Load2Ptr)
685 return false;
686
687 // Make sure that there are no padding bits.
688 if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||
689 !DL.typeSizeEqualsStoreSize(LI2->getType()))
690 return false;
691
692 // Alias Analysis to check for stores b/w the loads.
693 LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;
695 if (!Start->comesBefore(End)) {
696 std::swap(Start, End);
698 if (LOps.FoundRoot)
699 Loc = Loc.getWithNewSize(LOps.LoadSize);
700 } else
702 unsigned NumScanned = 0;
703 for (Instruction &Inst :
704 make_range(Start->getIterator(), End->getIterator())) {
705 if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
706 return false;
707
708 if (++NumScanned > MaxInstrsToScan)
709 return false;
710 }
711
712 // Make sure Load with lower Offset is at LI1
713 bool Reverse = false;
714 if (Offset2.slt(Offset1)) {
715 std::swap(LI1, LI2);
716 std::swap(ShAmt1, ShAmt2);
717 std::swap(Offset1, Offset2);
718 std::swap(Load1Ptr, Load2Ptr);
719 std::swap(LoadSize1, LoadSize2);
720 Reverse = true;
721 }
722
723 // Big endian swap the shifts
724 if (IsBigEndian)
725 std::swap(ShAmt1, ShAmt2);
726
727 // First load is always LI1. This is where we put the new load.
728 // Use the merged load size available from LI1 for forward loads.
729 if (LOps.FoundRoot) {
730 if (!Reverse)
731 LoadSize1 = LOps.LoadSize;
732 else
733 LoadSize2 = LOps.LoadSize;
734 }
735
736 // Verify if shift amount and load index aligns and verifies that loads
737 // are consecutive.
738 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
739 uint64_t PrevSize =
740 DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
741 if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
742 return false;
743
744 // Update LOps
745 AAMDNodes AATags1 = LOps.AATags;
746 AAMDNodes AATags2 = LI2->getAAMetadata();
747 if (LOps.FoundRoot == false) {
748 LOps.FoundRoot = true;
749 AATags1 = LI1->getAAMetadata();
750 }
751 LOps.LoadSize = LoadSize1 + LoadSize2;
752 LOps.RootInsert = Start;
753
754 // Concatenate the AATags of the Merged Loads.
755 LOps.AATags = AATags1.concat(AATags2);
756
757 LOps.Root = LI1;
758 LOps.Shift = ShAmt1;
759 LOps.ZextType = X->getType();
760 return true;
761}
762
763// For a given BB instruction, evaluate all loads in the chain that form a
764// pattern which suggests that the loads can be combined. The one and only use
765// of the loads is to form a wider load.
768 const DominatorTree &DT) {
769 // Only consider load chains of scalar values.
770 if (isa<VectorType>(I.getType()))
771 return false;
772
773 LoadOps LOps;
774 if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)
775 return false;
776
777 IRBuilder<> Builder(&I);
778 LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
779
780 IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);
781 // TTI based checks if we want to proceed with wider load
782 bool Allowed = TTI.isTypeLegal(WiderType);
783 if (!Allowed)
784 return false;
785
786 unsigned AS = LI1->getPointerAddressSpace();
787 unsigned Fast = 0;
788 Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
789 AS, LI1->getAlign(), &Fast);
790 if (!Allowed || !Fast)
791 return false;
792
793 // Get the Index and Ptr for the new GEP.
794 Value *Load1Ptr = LI1->getPointerOperand();
795 Builder.SetInsertPoint(LOps.RootInsert);
796 if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {
797 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
798 Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
799 DL, Offset1, /* AllowNonInbounds */ true);
800 Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));
801 }
802 // Generate wider load.
803 NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),
804 LI1->isVolatile(), "");
805 NewLoad->takeName(LI1);
806 // Set the New Load AATags Metadata.
807 if (LOps.AATags)
808 NewLoad->setAAMetadata(LOps.AATags);
809
810 Value *NewOp = NewLoad;
811 // Check if zero extend needed.
812 if (LOps.ZextType)
813 NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
814
815 // Check if shift needed. We need to shift with the amount of load1
816 // shift if not zero.
817 if (LOps.Shift)
818 NewOp = Builder.CreateShl(NewOp, LOps.Shift);
819 I.replaceAllUsesWith(NewOp);
820
821 return true;
822}
823
824/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
825struct PartStore {
832
833 bool isCompatibleWith(const PartStore &Other) const {
834 return PtrBase == Other.PtrBase && Val == Other.Val;
835 }
836
837 bool operator<(const PartStore &Other) const {
838 return PtrOffset.slt(Other.PtrOffset);
839 }
840};
841
842static std::optional<PartStore> matchPartStore(Instruction &I,
843 const DataLayout &DL) {
844 auto *Store = dyn_cast<StoreInst>(&I);
845 if (!Store || !Store->isSimple())
846 return std::nullopt;
847
848 Value *StoredVal = Store->getValueOperand();
849 Type *StoredTy = StoredVal->getType();
850 if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))
851 return std::nullopt;
852
853 uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
854 uint64_t ValOffset;
855 Value *Val;
856 if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))
857 return std::nullopt;
858
859 Value *Ptr = Store->getPointerOperand();
860 APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
861 Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets(
862 DL, PtrOffset, /*AllowNonInbounds=*/true);
863 return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
864}
865
867 unsigned Width, const DataLayout &DL,
869 if (Parts.size() < 2)
870 return false;
871
872 // Check whether combining the stores is profitable.
873 // FIXME: We could generate smaller stores if we can't produce a large one.
874 const PartStore &First = Parts.front();
875 LLVMContext &Ctx = First.Store->getContext();
876 Type *NewTy = Type::getIntNTy(Ctx, Width);
877 unsigned Fast = 0;
878 if (!TTI.isTypeLegal(NewTy) ||
879 !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,
880 First.Store->getPointerAddressSpace(),
881 First.Store->getAlign(), &Fast) ||
882 !Fast)
883 return false;
884
885 // Generate the combined store.
886 IRBuilder<> Builder(First.Store);
887 Value *Val = First.Val;
888 if (First.ValOffset != 0)
889 Val = Builder.CreateLShr(Val, First.ValOffset);
890 Val = Builder.CreateTrunc(Val, NewTy);
891 StoreInst *Store = Builder.CreateAlignedStore(
892 Val, First.Store->getPointerOperand(), First.Store->getAlign());
893
894 AAMDNodes AATags = First.Store->getAAMetadata();
895 for (const PartStore &Part : drop_begin(Parts))
896 AATags = AATags.concat(Part.Store->getAAMetadata());
897 Store->setAAMetadata(AATags);
898
899 // Remove the old stores.
900 for (const PartStore &Part : Parts)
901 Part.Store->eraseFromParent();
902
903 return true;
904}
905
908 if (Parts.size() < 2)
909 return false;
910
911 // We now have multiple parts of the same value stored to the same pointer.
912 // Sort the parts by pointer offset, and make sure they are consistent with
913 // the value offsets. Also check that the value is fully covered without
914 // overlaps.
915 bool Changed = false;
916 llvm::sort(Parts);
917 int64_t LastEndOffsetFromFirst = 0;
918 const PartStore *First = &Parts[0];
919 for (const PartStore &Part : Parts) {
920 APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;
921 int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;
922 if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
923 LastEndOffsetFromFirst != ValOffsetFromFirst) {
925 LastEndOffsetFromFirst, DL, TTI);
926 First = &Part;
927 LastEndOffsetFromFirst = Part.ValWidth;
928 continue;
929 }
930
931 LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
932 }
933
935 LastEndOffsetFromFirst, DL, TTI);
936 return Changed;
937}
938
941 // FIXME: Add big endian support.
942 if (DL.isBigEndian())
943 return false;
944
945 BatchAAResults BatchAA(AA);
947 bool MadeChange = false;
948 for (Instruction &I : make_early_inc_range(BB)) {
949 if (std::optional<PartStore> Part = matchPartStore(I, DL)) {
950 if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {
951 Parts.push_back(std::move(*Part));
952 continue;
953 }
954
955 MadeChange |= mergePartStores(Parts, DL, TTI);
956 Parts.clear();
957 Parts.push_back(std::move(*Part));
958 continue;
959 }
960
961 if (Parts.empty())
962 continue;
963
964 if (I.mayThrow() ||
965 (I.mayReadOrWriteMemory() &&
967 &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {
968 MadeChange |= mergePartStores(Parts, DL, TTI);
969 Parts.clear();
970 continue;
971 }
972 }
973
974 MadeChange |= mergePartStores(Parts, DL, TTI);
975 return MadeChange;
976}
977
978/// Combine away instructions providing they are still equivalent when compared
979/// against 0. i.e do they have any bits set.
981 auto *I = dyn_cast<Instruction>(V);
982 if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())
983 return nullptr;
984
985 Value *A;
986
987 // Look deeper into the chain of or's, combining away shl (so long as they are
988 // nuw or nsw).
989 Value *Op0 = I->getOperand(0);
991 m_NUWShl(m_Value(A), m_Value()))))
992 Op0 = A;
993 else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))
994 Op0 = NOp;
995
996 Value *Op1 = I->getOperand(1);
998 m_NUWShl(m_Value(A), m_Value()))))
999 Op1 = A;
1000 else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))
1001 Op1 = NOp;
1002
1003 if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))
1004 return Builder.CreateOr(Op0, Op1);
1005 return nullptr;
1006}
1007
1010 const DominatorTree &DT) {
1011 CmpPredicate Pred;
1012 Value *Op0;
1013 if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||
1014 !ICmpInst::isEquality(Pred))
1015 return false;
1016
1017 // If the chain or or's matches a load, combine to that before attempting to
1018 // remove shifts.
1019 if (auto OpI = dyn_cast<Instruction>(Op0))
1020 if (OpI->getOpcode() == Instruction::Or)
1021 if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))
1022 return true;
1023
1024 IRBuilder<> Builder(&I);
1025 // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0
1026 if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {
1027 I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));
1028 return true;
1029 }
1030
1031 return false;
1032}
1033
1034// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
1035// ModOffset
1036static std::pair<APInt, APInt>
1038 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1039 std::optional<APInt> Stride;
1040 APInt ModOffset(BW, 0);
1041 // Return a minimum gep stride, greatest common divisor of consective gep
1042 // index scales(c.f. Bézout's identity).
1043 while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
1045 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
1046 break;
1047
1048 for (auto [V, Scale] : VarOffsets) {
1049 // Only keep a power of two factor for non-inbounds
1050 if (!GEP->hasNoUnsignedSignedWrap())
1051 Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
1052
1053 if (!Stride)
1054 Stride = Scale;
1055 else
1056 Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
1057 }
1058
1059 PtrOp = GEP->getPointerOperand();
1060 }
1061
1062 // Check whether pointer arrives back at Global Variable via at least one GEP.
1063 // Even if it doesn't, we can check by alignment.
1064 if (!isa<GlobalVariable>(PtrOp) || !Stride)
1065 return {APInt(BW, 1), APInt(BW, 0)};
1066
1067 // In consideration of signed GEP indices, non-negligible offset become
1068 // remainder of division by minimum GEP stride.
1069 ModOffset = ModOffset.srem(*Stride);
1070 if (ModOffset.isNegative())
1071 ModOffset += *Stride;
1072
1073 return {*Stride, ModOffset};
1074}
1075
1076/// If C is a constant patterned array and all valid loaded results for given
1077/// alignment are same to a constant, return that constant.
1079 auto *LI = dyn_cast<LoadInst>(&I);
1080 if (!LI || LI->isVolatile())
1081 return false;
1082
1083 // We can only fold the load if it is from a constant global with definitive
1084 // initializer. Skip expensive logic if this is not the case.
1085 auto *PtrOp = LI->getPointerOperand();
1087 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
1088 return false;
1089
1090 // Bail for large initializers in excess of 4K to avoid too many scans.
1091 Constant *C = GV->getInitializer();
1092 uint64_t GVSize = DL.getTypeAllocSize(C->getType());
1093 if (!GVSize || 4096 < GVSize)
1094 return false;
1095
1096 Type *LoadTy = LI->getType();
1097 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1098 auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);
1099
1100 // Any possible offset could be multiple of GEP stride. And any valid
1101 // offset is multiple of load alignment, so checking only multiples of bigger
1102 // one is sufficient to say results' equality.
1103 if (auto LA = LI->getAlign();
1104 LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
1105 ConstOffset = APInt(BW, 0);
1106 Stride = APInt(BW, LA.value());
1107 }
1108
1109 Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
1110 if (!Ca)
1111 return false;
1112
1113 unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);
1114 for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)
1115 if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))
1116 return false;
1117
1118 I.replaceAllUsesWith(Ca);
1119
1120 return true;
1121}
1122
1123namespace {
1124class StrNCmpInliner {
1125public:
1126 StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,
1127 const DataLayout &DL)
1128 : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
1129
1130 bool optimizeStrNCmp();
1131
1132private:
1133 void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
1134
1135 CallInst *CI;
1136 LibFunc Func;
1137 DomTreeUpdater *DTU;
1138 const DataLayout &DL;
1139};
1140
1141} // namespace
1142
1143/// First we normalize calls to strncmp/strcmp to the form of
1144/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
1145/// (without considering '\0').
1146///
1147/// Examples:
1148///
1149/// \code
1150/// strncmp(s, "a", 3) -> compare(s, "a", 2)
1151/// strncmp(s, "abc", 3) -> compare(s, "abc", 3)
1152/// strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
1153/// strcmp(s, "a") -> compare(s, "a", 2)
1154///
1155/// char s2[] = {'a'}
1156/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1157///
1158/// char s2[] = {'a', 'b', 'c', 'd'}
1159/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1160/// \endcode
1161///
1162/// We only handle cases where N and exactly one of s1 and s2 are constant.
1163/// Cases that s1 and s2 are both constant are already handled by the
1164/// instcombine pass.
1165///
1166/// We do not handle cases where N > StrNCmpInlineThreshold.
1167///
1168/// We also do not handles cases where N < 2, which are already
1169/// handled by the instcombine pass.
1170///
1171bool StrNCmpInliner::optimizeStrNCmp() {
1172 if (StrNCmpInlineThreshold < 2)
1173 return false;
1174
1176 return false;
1177
1178 Value *Str1P = CI->getArgOperand(0);
1179 Value *Str2P = CI->getArgOperand(1);
1180 // Should be handled elsewhere.
1181 if (Str1P == Str2P)
1182 return false;
1183
1184 StringRef Str1, Str2;
1185 bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);
1186 bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);
1187 if (HasStr1 == HasStr2)
1188 return false;
1189
1190 // Note that '\0' and characters after it are not trimmed.
1191 StringRef Str = HasStr1 ? Str1 : Str2;
1192 Value *StrP = HasStr1 ? Str2P : Str1P;
1193
1194 size_t Idx = Str.find('\0');
1195 uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
1196 if (Func == LibFunc_strncmp) {
1197 if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
1198 N = std::min(N, ConstInt->getZExtValue());
1199 else
1200 return false;
1201 }
1202 // Now N means how many bytes we need to compare at most.
1203 if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
1204 return false;
1205
1206 // Cases where StrP has two or more dereferenceable bytes might be better
1207 // optimized elsewhere.
1208 bool CanBeNull = false, CanBeFreed = false;
1209 if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
1210 return false;
1211 inlineCompare(StrP, Str, N, HasStr1);
1212 return true;
1213}
1214
1215/// Convert
1216///
1217/// \code
1218/// ret = compare(s1, s2, N)
1219/// \endcode
1220///
1221/// into
1222///
1223/// \code
1224/// ret = (int)s1[0] - (int)s2[0]
1225/// if (ret != 0)
1226/// goto NE
1227/// ...
1228/// ret = (int)s1[N-2] - (int)s2[N-2]
1229/// if (ret != 0)
1230/// goto NE
1231/// ret = (int)s1[N-1] - (int)s2[N-1]
1232/// NE:
1233/// \endcode
1234///
1235/// CFG before and after the transformation:
1236///
1237/// (before)
1238/// BBCI
1239///
1240/// (after)
1241/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
1242/// | ^
1243/// E |
1244/// | |
1245/// BBSubs[1] (sub,icmp) --NE-----+
1246/// ... |
1247/// BBSubs[N-1] (sub) ---------+
1248///
1249void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
1250 bool Swapped) {
1251 auto &Ctx = CI->getContext();
1252 IRBuilder<> B(Ctx);
1253 // We want these instructions to be recognized as inlined instructions for the
1254 // compare call, but we don't have a source location for the definition of
1255 // that function, since we're generating that code now. Because the generated
1256 // code is a viable point for a memory access error, we make the pragmatic
1257 // choice here to directly use CI's location so that we have useful
1258 // attribution for the generated code.
1259 B.SetCurrentDebugLocation(CI->getDebugLoc());
1260
1261 BasicBlock *BBCI = CI->getParent();
1262 BasicBlock *BBTail =
1263 SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
1264
1266 for (uint64_t I = 0; I < N; ++I)
1267 BBSubs.push_back(
1268 BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));
1269 BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
1270
1271 cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
1272
1273 B.SetInsertPoint(BBNE);
1274 PHINode *Phi = B.CreatePHI(CI->getType(), N);
1275 B.CreateBr(BBTail);
1276
1277 Value *Base = LHS;
1278 for (uint64_t i = 0; i < N; ++i) {
1279 B.SetInsertPoint(BBSubs[i]);
1280 Value *VL =
1281 B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
1282 B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
1283 CI->getType());
1284 Value *VR =
1285 ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));
1286 Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
1287 if (i < N - 1) {
1288 BranchInst *CondBrInst = B.CreateCondBr(
1289 B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)), BBNE,
1290 BBSubs[i + 1]);
1291
1292 Function *F = CI->getFunction();
1293 assert(F && "Instruction does not belong to a function!");
1294 std::optional<Function::ProfileCount> EC = F->getEntryCount();
1295 if (EC && EC->getCount() > 0)
1297 } else {
1298 B.CreateBr(BBNE);
1299 }
1300
1301 Phi->addIncoming(Sub, BBSubs[i]);
1302 }
1303
1304 CI->replaceAllUsesWith(Phi);
1305 CI->eraseFromParent();
1306
1307 if (DTU) {
1309 Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});
1310 for (uint64_t i = 0; i < N; ++i) {
1311 if (i < N - 1)
1312 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
1313 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
1314 }
1315 Updates.push_back({DominatorTree::Insert, BBNE, BBTail});
1316 Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
1317 DTU->applyUpdates(Updates);
1318 }
1319}
1320
1321/// Convert memchr with a small constant string into a switch
1323 const DataLayout &DL) {
1324 if (isa<Constant>(Call->getArgOperand(1)))
1325 return false;
1326
1327 StringRef Str;
1328 Value *Base = Call->getArgOperand(0);
1329 if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))
1330 return false;
1331
1332 uint64_t N = Str.size();
1333 if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
1334 uint64_t Val = ConstInt->getZExtValue();
1335 // Ignore the case that n is larger than the size of string.
1336 if (Val > N)
1337 return false;
1338 N = Val;
1339 } else
1340 return false;
1341
1343 return false;
1344
1345 BasicBlock *BB = Call->getParent();
1346 BasicBlock *BBNext = SplitBlock(BB, Call, DTU);
1347 IRBuilder<> IRB(BB);
1348 IRB.SetCurrentDebugLocation(Call->getDebugLoc());
1349 IntegerType *ByteTy = IRB.getInt8Ty();
1351 SwitchInst *SI = IRB.CreateSwitch(
1352 IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
1353 Type *IndexTy = DL.getIndexType(Call->getType());
1355
1356 BasicBlock *BBSuccess = BasicBlock::Create(
1357 Call->getContext(), "memchr.success", BB->getParent(), BBNext);
1358 IRB.SetInsertPoint(BBSuccess);
1359 PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");
1360 Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);
1361 IRB.CreateBr(BBNext);
1362 if (DTU)
1363 Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});
1364
1366 for (uint64_t I = 0; I < N; ++I) {
1367 ConstantInt *CaseVal = ConstantInt::get(ByteTy, Str[I]);
1368 if (!Cases.insert(CaseVal).second)
1369 continue;
1370
1371 BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",
1372 BB->getParent(), BBSuccess);
1373 SI->addCase(CaseVal, BBCase);
1374 IRB.SetInsertPoint(BBCase);
1375 IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);
1376 IRB.CreateBr(BBSuccess);
1377 if (DTU) {
1378 Updates.push_back({DominatorTree::Insert, BB, BBCase});
1379 Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});
1380 }
1381 }
1382
1383 PHINode *PHI =
1384 PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());
1385 PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);
1386 PHI->addIncoming(FirstOccursLocation, BBSuccess);
1387
1388 Call->replaceAllUsesWith(PHI);
1389 Call->eraseFromParent();
1390
1391 if (DTU)
1392 DTU->applyUpdates(Updates);
1393
1394 return true;
1395}
1396
1399 DominatorTree &DT, const DataLayout &DL,
1400 bool &MadeCFGChange) {
1401
1402 auto *CI = dyn_cast<CallInst>(&I);
1403 if (!CI || CI->isNoBuiltin())
1404 return false;
1405
1406 Function *CalledFunc = CI->getCalledFunction();
1407 if (!CalledFunc)
1408 return false;
1409
1410 LibFunc LF;
1411 if (!TLI.getLibFunc(*CalledFunc, LF) ||
1412 !isLibFuncEmittable(CI->getModule(), &TLI, LF))
1413 return false;
1414
1415 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
1416
1417 switch (LF) {
1418 case LibFunc_sqrt:
1419 case LibFunc_sqrtf:
1420 case LibFunc_sqrtl:
1421 return foldSqrt(CI, LF, TTI, TLI, AC, DT);
1422 case LibFunc_strcmp:
1423 case LibFunc_strncmp:
1424 if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {
1425 MadeCFGChange = true;
1426 return true;
1427 }
1428 break;
1429 case LibFunc_memchr:
1430 if (foldMemChr(CI, &DTU, DL)) {
1431 MadeCFGChange = true;
1432 return true;
1433 }
1434 break;
1435 default:;
1436 }
1437 return false;
1438}
1439
1440/// This is the entry point for folds that could be implemented in regular
1441/// InstCombine, but they are separated because they are not expected to
1442/// occur frequently and/or have more than a constant-length pattern match.
1446 AssumptionCache &AC, bool &MadeCFGChange) {
1447 bool MadeChange = false;
1448 for (BasicBlock &BB : F) {
1449 // Ignore unreachable basic blocks.
1450 if (!DT.isReachableFromEntry(&BB))
1451 continue;
1452
1453 const DataLayout &DL = F.getDataLayout();
1454
1455 // Walk the block backwards for efficiency. We're matching a chain of
1456 // use->defs, so we're more likely to succeed by starting from the bottom.
1457 // Also, we want to avoid matching partial patterns.
1458 // TODO: It would be more efficient if we removed dead instructions
1459 // iteratively in this loop rather than waiting until the end.
1461 MadeChange |= foldAnyOrAllBitsSet(I);
1462 MadeChange |= foldGuardedFunnelShift(I, DT);
1463 MadeChange |= tryToRecognizePopCount(I);
1464 MadeChange |= tryToFPToSat(I, TTI);
1465 MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
1466 MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
1467 MadeChange |= foldPatternedLoads(I, DL);
1468 MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);
1469 // NOTE: This function introduces erasing of the instruction `I`, so it
1470 // needs to be called at the end of this sequence, otherwise we may make
1471 // bugs.
1472 MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
1473 }
1474
1475 // Do this separately to avoid redundantly scanning stores multiple times.
1476 MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);
1477 }
1478
1479 // We're done with transforms, so remove dead instructions.
1480 if (MadeChange)
1481 for (BasicBlock &BB : F)
1483
1484 return MadeChange;
1485}
1486
1487/// This is the entry point for all transforms. Pass manager differences are
1488/// handled in the callers of this function.
1491 AliasAnalysis &AA, bool &MadeCFGChange) {
1492 bool MadeChange = false;
1493 const DataLayout &DL = F.getDataLayout();
1494 TruncInstCombine TIC(AC, TLI, DL, DT);
1495 MadeChange |= TIC.run(F);
1496 MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);
1497 return MadeChange;
1498}
1499
1502 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1503 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1504 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1505 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1506 auto &AA = AM.getResult<AAManager>(F);
1507 bool MadeCFGChange = false;
1508 if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
1509 // No changes, all analyses are preserved.
1510 return PreservedAnalyses::all();
1511 }
1512 // Mark all the analyses that instcombine updates as preserved.
1514 if (MadeCFGChange)
1516 else
1518 return PA;
1519}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool tryToRecognizePopCount(Instruction &I)
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
static cl::opt< unsigned > MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string to " "inline a memchr call."))
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
static cl::opt< unsigned > StrNCmpInlineThreshold("strncmp-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string for a builtin string cmp " "call eligible for inlining. The default value is 3."))
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, const DataLayout &DL)
Convert memchr with a small constant string into a switch.
static Value * optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder)
Combine away instructions providing they are still equivalent when compared against 0.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL)
static bool mergePartStores(SmallVectorImpl< PartStore > &Parts, const DataLayout &DL, TargetTransformInfo &TTI)
static bool mergeConsecutivePartStores(ArrayRef< PartStore > Parts, unsigned Width, const DataLayout &DL, TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift, const APInt &AndMask, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
static std::optional< PartStore > matchPartStore(Instruction &I, const DataLayout &DL)
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, const DataLayout &DL, bool &MadeCFGChange)
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, AssumptionCache &AC, bool &MadeCFGChange)
This is the entry point for folds that could be implemented in regular InstCombine,...
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:992
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
static MaybeAlign getAlign(Value *Ptr)
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file contains the declarations for profiling metadata utility functions.
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
BinaryOperator * Mul
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2494
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2068
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2041
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isSimple() const
size_type size() const
Definition MapVector.h:56
std::pair< KeyT, ValueT > & front()
Definition MapVector.h:79
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_RecipThroughput
Reciprocal throughput.
@ None
The cast is not used with a load/store of any kind.
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:881
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
Changed
#define UINT64_MAX
Definition DataTypes.h:77
Abstract Attribute helper functions.
Definition Attributor.h:165
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:798
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::LShr > m_LShrOrSelf(const LHS &L, uint64_t &R)
Matches lshr L, ConstShAmt or L itself (R will be set to zero in this case).
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::Shl > m_ShlOrSelf(const LHS &L, uint64_t &R)
Matches shl L, ConstShAmt or L itself (R will be set to zero in this case).
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI bool isOnlyUsedInZeroComparison(const Instruction *CxtI)
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:721
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName)
Specify that the branch weights for this terminator cannot be known at compile time.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Sub
Subtraction of integers.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
LoadInst * RootInsert
ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
bool operator<(const PartStore &Other) const
bool isCompatibleWith(const PartStore &Other) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:760
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257