LLVM 22.0.0git
AggressiveInstCombine.cpp
Go to the documentation of this file.
1//===- AggressiveInstCombine.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the aggressive expression pattern combiner classes.
10// Currently, it handles expression patterns for:
11// * Truncate instruction
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/MDBuilder.h"
40
41using namespace llvm;
42using namespace PatternMatch;
43
44#define DEBUG_TYPE "aggressive-instcombine"
45
46namespace llvm {
48}
49
50STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
51STATISTIC(NumGuardedRotates,
52 "Number of guarded rotates transformed into funnel shifts");
53STATISTIC(NumGuardedFunnelShifts,
54 "Number of guarded funnel shifts transformed into funnel shifts");
55STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
56
58 "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
59 cl::desc("Max number of instructions to scan for aggressive instcombine."));
60
62 "strncmp-inline-threshold", cl::init(3), cl::Hidden,
63 cl::desc("The maximum length of a constant string for a builtin string cmp "
64 "call eligible for inlining. The default value is 3."));
65
67 MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
68 cl::desc("The maximum length of a constant string to "
69 "inline a memchr call."));
70
71/// Match a pattern for a bitwise funnel/rotate operation that partially guards
72/// against undefined behavior by branching around the funnel-shift/rotation
73/// when the shift amount is 0.
75 if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
76 return false;
77
78 // As with the one-use checks below, this is not strictly necessary, but we
79 // are being cautious to avoid potential perf regressions on targets that
80 // do not actually have a funnel/rotate instruction (where the funnel shift
81 // would be expanded back into math/shift/logic ops).
82 if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
83 return false;
84
85 // Match V to funnel shift left/right and capture the source operands and
86 // shift amount.
87 auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
88 Value *&ShAmt) {
89 unsigned Width = V->getType()->getScalarSizeInBits();
90
91 // fshl(ShVal0, ShVal1, ShAmt)
92 // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
93 if (match(V, m_OneUse(m_c_Or(
94 m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
95 m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),
96 m_Deferred(ShAmt))))))) {
97 return Intrinsic::fshl;
98 }
99
100 // fshr(ShVal0, ShVal1, ShAmt)
101 // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
102 if (match(V,
104 m_Value(ShAmt))),
105 m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
106 return Intrinsic::fshr;
107 }
108
110 };
111
112 // One phi operand must be a funnel/rotate operation, and the other phi
113 // operand must be the source value of that funnel/rotate operation:
114 // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
115 // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
116 // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
117 PHINode &Phi = cast<PHINode>(I);
118 unsigned FunnelOp = 0, GuardOp = 1;
119 Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
120 Value *ShVal0, *ShVal1, *ShAmt;
121 Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
122 if (IID == Intrinsic::not_intrinsic ||
123 (IID == Intrinsic::fshl && ShVal0 != P1) ||
124 (IID == Intrinsic::fshr && ShVal1 != P1)) {
125 IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
126 if (IID == Intrinsic::not_intrinsic ||
127 (IID == Intrinsic::fshl && ShVal0 != P0) ||
128 (IID == Intrinsic::fshr && ShVal1 != P0))
129 return false;
130 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
131 "Pattern must match funnel shift left or right");
132 std::swap(FunnelOp, GuardOp);
133 }
134
135 // The incoming block with our source operand must be the "guard" block.
136 // That must contain a cmp+branch to avoid the funnel/rotate when the shift
137 // amount is equal to 0. The other incoming block is the block with the
138 // funnel/rotate.
139 BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
140 BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
141 Instruction *TermI = GuardBB->getTerminator();
142
143 // Ensure that the shift values dominate each block.
144 if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
145 return false;
146
147 BasicBlock *PhiBB = Phi.getParent();
149 m_ZeroInt()),
150 m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))
151 return false;
152
153 IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
154
155 if (ShVal0 == ShVal1)
156 ++NumGuardedRotates;
157 else
158 ++NumGuardedFunnelShifts;
159
160 // If this is not a rotate then the select was blocking poison from the
161 // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
162 bool IsFshl = IID == Intrinsic::fshl;
163 if (ShVal0 != ShVal1) {
164 if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
165 ShVal1 = Builder.CreateFreeze(ShVal1);
166 else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
167 ShVal0 = Builder.CreateFreeze(ShVal0);
168 }
169
170 // We matched a variation of this IR pattern:
171 // GuardBB:
172 // %cmp = icmp eq i32 %ShAmt, 0
173 // br i1 %cmp, label %PhiBB, label %FunnelBB
174 // FunnelBB:
175 // %sub = sub i32 32, %ShAmt
176 // %shr = lshr i32 %ShVal1, %sub
177 // %shl = shl i32 %ShVal0, %ShAmt
178 // %fsh = or i32 %shr, %shl
179 // br label %PhiBB
180 // PhiBB:
181 // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
182 // -->
183 // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
184 Phi.replaceAllUsesWith(
185 Builder.CreateIntrinsic(IID, Phi.getType(), {ShVal0, ShVal1, ShAmt}));
186 return true;
187}
188
189/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
190/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
191/// of 'and' ops, then we also need to capture the fact that we saw an
192/// "and X, 1", so that's an extra return value for that case.
193namespace {
194struct MaskOps {
195 Value *Root = nullptr;
196 APInt Mask;
197 bool MatchAndChain;
198 bool FoundAnd1 = false;
199
200 MaskOps(unsigned BitWidth, bool MatchAnds)
201 : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
202};
203} // namespace
204
205/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
206/// chain of 'and' or 'or' instructions looking for shift ops of a common source
207/// value. Examples:
208/// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
209/// returns { X, 0x129 }
210/// and (and (X >> 1), 1), (X >> 4)
211/// returns { X, 0x12 }
212static bool matchAndOrChain(Value *V, MaskOps &MOps) {
213 Value *Op0, *Op1;
214 if (MOps.MatchAndChain) {
215 // Recurse through a chain of 'and' operands. This requires an extra check
216 // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere
217 // in the chain to know that all of the high bits are cleared.
218 if (match(V, m_And(m_Value(Op0), m_One()))) {
219 MOps.FoundAnd1 = true;
220 return matchAndOrChain(Op0, MOps);
221 }
222 if (match(V, m_And(m_Value(Op0), m_Value(Op1))))
223 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
224 } else {
225 // Recurse through a chain of 'or' operands.
226 if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
227 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
228 }
229
230 // We need a shift-right or a bare value representing a compare of bit 0 of
231 // the original source operand.
232 Value *Candidate;
233 const APInt *BitIndex = nullptr;
234 if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))
235 Candidate = V;
236
237 // Initialize result source operand.
238 if (!MOps.Root)
239 MOps.Root = Candidate;
240
241 // The shift constant is out-of-range? This code hasn't been simplified.
242 if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))
243 return false;
244
245 // Fill in the mask bit derived from the shift constant.
246 MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);
247 return MOps.Root == Candidate;
248}
249
250/// Match patterns that correspond to "any-bits-set" and "all-bits-set".
251/// These will include a chain of 'or' or 'and'-shifted bits from a
252/// common source value:
253/// and (or (lshr X, C), ...), 1 --> (X & CMask) != 0
254/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
255/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
256/// that differ only with a final 'not' of the result. We expect that final
257/// 'not' to be folded with the compare that we create here (invert predicate).
259 // The 'any-bits-set' ('or' chain) pattern is simpler to match because the
260 // final "and X, 1" instruction must be the final op in the sequence.
261 bool MatchAllBitsSet;
263 MatchAllBitsSet = true;
264 else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
265 MatchAllBitsSet = false;
266 else
267 return false;
268
269 MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
270 if (MatchAllBitsSet) {
271 if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1)
272 return false;
273 } else {
274 if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps))
275 return false;
276 }
277
278 // The pattern was found. Create a masked compare that replaces all of the
279 // shift and logic ops.
280 IRBuilder<> Builder(&I);
281 Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
282 Value *And = Builder.CreateAnd(MOps.Root, Mask);
283 Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
284 : Builder.CreateIsNotNull(And);
285 Value *Zext = Builder.CreateZExt(Cmp, I.getType());
286 I.replaceAllUsesWith(Zext);
287 ++NumAnyOrAllBitsSet;
288 return true;
289}
290
291// Try to recognize below function as popcount intrinsic.
292// This is the "best" algorithm from
293// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
294// Also used in TargetLowering::expandCTPOP().
295//
296// int popcount(unsigned int i) {
297// i = i - ((i >> 1) & 0x55555555);
298// i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
299// i = ((i + (i >> 4)) & 0x0F0F0F0F);
300// return (i * 0x01010101) >> 24;
301// }
303 if (I.getOpcode() != Instruction::LShr)
304 return false;
305
306 Type *Ty = I.getType();
307 if (!Ty->isIntOrIntVectorTy())
308 return false;
309
310 unsigned Len = Ty->getScalarSizeInBits();
311 // FIXME: fix Len == 8 and other irregular type lengths.
312 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
313 return false;
314
315 APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
316 APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
317 APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
318 APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
319 APInt MaskShift = APInt(Len, Len - 8);
320
321 Value *Op0 = I.getOperand(0);
322 Value *Op1 = I.getOperand(1);
323 Value *MulOp0;
324 // Matching "(i * 0x01010101...) >> 24".
325 if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
327 Value *ShiftOp0;
328 // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
329 if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
330 m_Deferred(ShiftOp0)),
331 m_SpecificInt(Mask0F)))) {
332 Value *AndOp0;
333 // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
334 if (match(ShiftOp0,
335 m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
337 m_SpecificInt(Mask33))))) {
338 Value *Root, *SubOp1;
339 // Matching "i - ((i >> 1) & 0x55555555...)".
340 const APInt *AndMask;
341 if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
342 match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
343 m_APInt(AndMask)))) {
344 auto CheckAndMask = [&]() {
345 if (*AndMask == Mask55)
346 return true;
347
348 // Exact match failed, see if any bits are known to be 0 where we
349 // expect a 1 in the mask.
350 if (!AndMask->isSubsetOf(Mask55))
351 return false;
352
353 APInt NeededMask = Mask55 & ~*AndMask;
354 return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
355 NeededMask,
356 SimplifyQuery(I.getDataLayout()));
357 };
358
359 if (CheckAndMask()) {
360 LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
361 IRBuilder<> Builder(&I);
362 I.replaceAllUsesWith(
363 Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
364 ++NumPopCountRecognized;
365 return true;
366 }
367 }
368 }
369 }
370 }
371
372 return false;
373}
374
375/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
376/// C2 saturate the value of the fp conversion. The transform is not reversable
377/// as the fptosi.sat is more defined than the input - all values produce a
378/// valid value for the fptosi.sat, where as some produce poison for original
379/// that were out of range of the integer conversion. The reversed pattern may
380/// use fmax and fmin instead. As we cannot directly reverse the transform, and
381/// it is not always profitable, we make it conditional on the cost being
382/// reported as lower by TTI.
384 // Look for min(max(fptosi, converting to fptosi_sat.
385 Value *In;
386 const APInt *MinC, *MaxC;
388 m_APInt(MinC))),
389 m_APInt(MaxC))) &&
391 m_APInt(MaxC))),
392 m_APInt(MinC))))
393 return false;
394
395 // Check that the constants clamp a saturate.
396 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
397 return false;
398
399 Type *IntTy = I.getType();
400 Type *FpTy = In->getType();
401 Type *SatTy =
402 IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
403 if (auto *VecTy = dyn_cast<VectorType>(IntTy))
404 SatTy = VectorType::get(SatTy, VecTy->getElementCount());
405
406 // Get the cost of the intrinsic, and check that against the cost of
407 // fptosi+smin+smax
408 InstructionCost SatCost = TTI.getIntrinsicInstrCost(
409 IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
411 SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,
414
415 InstructionCost MinMaxCost = TTI.getCastInstrCost(
416 Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
418 MinMaxCost += TTI.getIntrinsicInstrCost(
419 IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
421 MinMaxCost += TTI.getIntrinsicInstrCost(
422 IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
424
425 if (SatCost >= MinMaxCost)
426 return false;
427
428 IRBuilder<> Builder(&I);
429 Value *Sat =
430 Builder.CreateIntrinsic(Intrinsic::fptosi_sat, {SatTy, FpTy}, In);
431 I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
432 return true;
433}
434
435/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
436/// pessimistic codegen that has to account for setting errno and can enable
437/// vectorization.
440 DominatorTree &DT) {
441 // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
442 // (because NNAN or the operand arg must not be less than -0.0) and (2) we
443 // would not end up lowering to a libcall anyway (which could change the value
444 // of errno), then:
445 // (1) errno won't be set.
446 // (2) it is safe to convert this to an intrinsic call.
447 Type *Ty = Call->getType();
448 Value *Arg = Call->getArgOperand(0);
449 if (TTI.haveFastSqrt(Ty) &&
450 (Call->hasNoNaNs() ||
452 Arg, SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) {
453 IRBuilder<> Builder(Call);
454 Value *NewSqrt =
455 Builder.CreateIntrinsic(Intrinsic::sqrt, Ty, Arg, Call, "sqrt");
456 Call->replaceAllUsesWith(NewSqrt);
457
458 // Explicitly erase the old call because a call with side effects is not
459 // trivially dead.
460 Call->eraseFromParent();
461 return true;
462 }
463
464 return false;
465}
466
467// Check if this array of constants represents a cttz table.
468// Iterate over the elements from \p Table by trying to find/match all
469// the numbers from 0 to \p InputBits that should represent cttz results.
470static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,
471 const APInt &AndMask, Type *AccessTy,
472 unsigned InputBits, const APInt &GEPIdxFactor,
473 const DataLayout &DL) {
474 for (unsigned Idx = 0; Idx < InputBits; Idx++) {
475 APInt Index = (APInt(InputBits, 1).shl(Idx) * Mul).lshr(Shift) & AndMask;
477 ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
478 if (!C || C->getValue() != Idx)
479 return false;
480 }
481
482 return true;
483}
484
485// Try to recognize table-based ctz implementation.
486// E.g., an example in C (for more cases please see the llvm/tests):
487// int f(unsigned x) {
488// static const char table[32] =
489// {0, 1, 28, 2, 29, 14, 24, 3, 30,
490// 22, 20, 15, 25, 17, 4, 8, 31, 27,
491// 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
492// return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
493// }
494// this can be lowered to `cttz` instruction.
495// There is also a special case when the element is 0.
496//
497// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
498// sequence that contains each pattern of bits in it. The shift extracts
499// the top bits after the multiply, and that index into the table should
500// represent the number of trailing zeros in the original number.
501//
502// Here are some examples or LLVM IR for a 64-bit target:
503//
504// CASE 1:
505// %sub = sub i32 0, %x
506// %and = and i32 %sub, %x
507// %mul = mul i32 %and, 125613361
508// %shr = lshr i32 %mul, 27
509// %idxprom = zext i32 %shr to i64
510// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
511// i64 %idxprom
512// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
513//
514// CASE 2:
515// %sub = sub i32 0, %x
516// %and = and i32 %sub, %x
517// %mul = mul i32 %and, 72416175
518// %shr = lshr i32 %mul, 26
519// %idxprom = zext i32 %shr to i64
520// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
521// i64 0, i64 %idxprom
522// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
523//
524// CASE 3:
525// %sub = sub i32 0, %x
526// %and = and i32 %sub, %x
527// %mul = mul i32 %and, 81224991
528// %shr = lshr i32 %mul, 27
529// %idxprom = zext i32 %shr to i64
530// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
531// i64 0, i64 %idxprom
532// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
533//
534// CASE 4:
535// %sub = sub i64 0, %x
536// %and = and i64 %sub, %x
537// %mul = mul i64 %and, 283881067100198605
538// %shr = lshr i64 %mul, 58
539// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
540// i64 %shr
541// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
542//
543// All these can be lowered to @llvm.cttz.i32/64 intrinsics.
546 if (!LI)
547 return false;
548
549 Type *AccessType = LI->getType();
550 if (!AccessType->isIntegerTy())
551 return false;
552
554 if (!GEP || !GEP->hasNoUnsignedSignedWrap())
555 return false;
556
557 GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
558 if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())
559 return false;
560
561 unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
562 APInt ModOffset(BW, 0);
564 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||
565 VarOffsets.size() != 1 || ModOffset != 0)
566 return false;
567 auto [GepIdx, GEPScale] = VarOffsets.front();
568
569 Value *X1;
570 const APInt *MulConst, *ShiftConst, *AndCst = nullptr;
571 // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
572 // This might be extended to the pointer index type, and if the gep index type
573 // has been replaced with an i8 then a new And (and different ShiftConst) will
574 // be present.
575 auto MatchInner = m_LShr(
576 m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),
577 m_APInt(ShiftConst));
578 if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&
579 !match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))
580 return false;
581
582 unsigned InputBits = X1->getType()->getScalarSizeInBits();
583 if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)
584 return false;
585
586 if (!GEPScale.isIntN(InputBits) ||
587 !isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,
588 AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,
589 InputBits, GEPScale.zextOrTrunc(InputBits), DL))
590 return false;
591
592 ConstantInt *ZeroTableElem = cast<ConstantInt>(
593 ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));
594 bool DefinedForZero = ZeroTableElem->getZExtValue() == InputBits;
595
596 IRBuilder<> B(LI);
597 ConstantInt *BoolConst = B.getInt1(!DefinedForZero);
598 Type *XType = X1->getType();
599 auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
600 Value *ZExtOrTrunc = nullptr;
601
602 if (DefinedForZero) {
603 ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);
604 } else {
605 // If the value in elem 0 isn't the same as InputBits, we still want to
606 // produce the value from the table.
607 auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));
608 auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz);
609
610 // The true branch of select handles the cttz(0) case, which is rare.
613 SelectI->setMetadata(
614 LLVMContext::MD_prof,
615 MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights());
616 }
617
618 // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
619 // it should be handled as: `cttz(x) & (typeSize - 1)`.
620
621 ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
622 }
623
624 LI->replaceAllUsesWith(ZExtOrTrunc);
625
626 return true;
627}
628
629/// This is used by foldLoadsRecursive() to capture a Root Load node which is
630/// of type or(load, load) and recursively build the wide load. Also capture the
631/// shift amount, zero extend type and loadSize.
641
642// Identify and Merge consecutive loads recursively which is of the form
643// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
644// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
645static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
646 AliasAnalysis &AA) {
647 uint64_t ShAmt2;
648 Value *X;
649 Instruction *L1, *L2;
650
651 // Go to the last node with loads.
652 if (match(V,
655 ShAmt2)))))) {
656 if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
657 // Avoid Partial chain merge.
658 return false;
659 } else
660 return false;
661
662 // Check if the pattern has loads
663 LoadInst *LI1 = LOps.Root;
664 uint64_t ShAmt1 = LOps.Shift;
665 if (LOps.FoundRoot == false &&
667 m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {
668 LI1 = dyn_cast<LoadInst>(L1);
669 }
670 LoadInst *LI2 = dyn_cast<LoadInst>(L2);
671
672 // Check if loads are same, atomic, volatile and having same address space.
673 if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
675 return false;
676
677 // Check if Loads come from same BB.
678 if (LI1->getParent() != LI2->getParent())
679 return false;
680
681 // Find the data layout
682 bool IsBigEndian = DL.isBigEndian();
683
684 // Check if loads are consecutive and same size.
685 Value *Load1Ptr = LI1->getPointerOperand();
686 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
687 Load1Ptr =
688 Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
689 /* AllowNonInbounds */ true);
690
691 Value *Load2Ptr = LI2->getPointerOperand();
692 APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
693 Load2Ptr =
694 Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
695 /* AllowNonInbounds */ true);
696
697 // Verify if both loads have same base pointers
698 uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
699 uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
700 if (Load1Ptr != Load2Ptr)
701 return false;
702
703 // Make sure that there are no padding bits.
704 if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||
705 !DL.typeSizeEqualsStoreSize(LI2->getType()))
706 return false;
707
708 // Alias Analysis to check for stores b/w the loads.
709 LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;
711 if (!Start->comesBefore(End)) {
712 std::swap(Start, End);
714 if (LOps.FoundRoot)
715 Loc = Loc.getWithNewSize(LOps.LoadSize);
716 } else
718 unsigned NumScanned = 0;
719 for (Instruction &Inst :
720 make_range(Start->getIterator(), End->getIterator())) {
721 if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
722 return false;
723
724 if (++NumScanned > MaxInstrsToScan)
725 return false;
726 }
727
728 // Make sure Load with lower Offset is at LI1
729 bool Reverse = false;
730 if (Offset2.slt(Offset1)) {
731 std::swap(LI1, LI2);
732 std::swap(ShAmt1, ShAmt2);
733 std::swap(Offset1, Offset2);
734 std::swap(Load1Ptr, Load2Ptr);
735 std::swap(LoadSize1, LoadSize2);
736 Reverse = true;
737 }
738
739 // Big endian swap the shifts
740 if (IsBigEndian)
741 std::swap(ShAmt1, ShAmt2);
742
743 // First load is always LI1. This is where we put the new load.
744 // Use the merged load size available from LI1 for forward loads.
745 if (LOps.FoundRoot) {
746 if (!Reverse)
747 LoadSize1 = LOps.LoadSize;
748 else
749 LoadSize2 = LOps.LoadSize;
750 }
751
752 // Verify if shift amount and load index aligns and verifies that loads
753 // are consecutive.
754 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
755 uint64_t PrevSize =
756 DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
757 if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
758 return false;
759
760 // Update LOps
761 AAMDNodes AATags1 = LOps.AATags;
762 AAMDNodes AATags2 = LI2->getAAMetadata();
763 if (LOps.FoundRoot == false) {
764 LOps.FoundRoot = true;
765 AATags1 = LI1->getAAMetadata();
766 }
767 LOps.LoadSize = LoadSize1 + LoadSize2;
768 LOps.RootInsert = Start;
769
770 // Concatenate the AATags of the Merged Loads.
771 LOps.AATags = AATags1.concat(AATags2);
772
773 LOps.Root = LI1;
774 LOps.Shift = ShAmt1;
775 LOps.ZextType = X->getType();
776 return true;
777}
778
779// For a given BB instruction, evaluate all loads in the chain that form a
780// pattern which suggests that the loads can be combined. The one and only use
781// of the loads is to form a wider load.
784 const DominatorTree &DT) {
785 // Only consider load chains of scalar values.
786 if (isa<VectorType>(I.getType()))
787 return false;
788
789 LoadOps LOps;
790 if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)
791 return false;
792
793 IRBuilder<> Builder(&I);
794 LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
795
796 IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);
797 // TTI based checks if we want to proceed with wider load
798 bool Allowed = TTI.isTypeLegal(WiderType);
799 if (!Allowed)
800 return false;
801
802 unsigned AS = LI1->getPointerAddressSpace();
803 unsigned Fast = 0;
804 Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
805 AS, LI1->getAlign(), &Fast);
806 if (!Allowed || !Fast)
807 return false;
808
809 // Get the Index and Ptr for the new GEP.
810 Value *Load1Ptr = LI1->getPointerOperand();
811 Builder.SetInsertPoint(LOps.RootInsert);
812 if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {
813 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
814 Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
815 DL, Offset1, /* AllowNonInbounds */ true);
816 Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));
817 }
818 // Generate wider load.
819 NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),
820 LI1->isVolatile(), "");
821 NewLoad->takeName(LI1);
822 // Set the New Load AATags Metadata.
823 if (LOps.AATags)
824 NewLoad->setAAMetadata(LOps.AATags);
825
826 Value *NewOp = NewLoad;
827 // Check if zero extend needed.
828 if (LOps.ZextType)
829 NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
830
831 // Check if shift needed. We need to shift with the amount of load1
832 // shift if not zero.
833 if (LOps.Shift)
834 NewOp = Builder.CreateShl(NewOp, LOps.Shift);
835 I.replaceAllUsesWith(NewOp);
836
837 return true;
838}
839
840/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
841struct PartStore {
848
849 bool isCompatibleWith(const PartStore &Other) const {
850 return PtrBase == Other.PtrBase && Val == Other.Val;
851 }
852
853 bool operator<(const PartStore &Other) const {
854 return PtrOffset.slt(Other.PtrOffset);
855 }
856};
857
858static std::optional<PartStore> matchPartStore(Instruction &I,
859 const DataLayout &DL) {
860 auto *Store = dyn_cast<StoreInst>(&I);
861 if (!Store || !Store->isSimple())
862 return std::nullopt;
863
864 Value *StoredVal = Store->getValueOperand();
865 Type *StoredTy = StoredVal->getType();
866 if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))
867 return std::nullopt;
868
869 uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
870 uint64_t ValOffset;
871 Value *Val;
872 if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))
873 return std::nullopt;
874
875 Value *Ptr = Store->getPointerOperand();
876 APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
877 Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets(
878 DL, PtrOffset, /*AllowNonInbounds=*/true);
879 return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
880}
881
883 unsigned Width, const DataLayout &DL,
885 if (Parts.size() < 2)
886 return false;
887
888 // Check whether combining the stores is profitable.
889 // FIXME: We could generate smaller stores if we can't produce a large one.
890 const PartStore &First = Parts.front();
891 LLVMContext &Ctx = First.Store->getContext();
892 Type *NewTy = Type::getIntNTy(Ctx, Width);
893 unsigned Fast = 0;
894 if (!TTI.isTypeLegal(NewTy) ||
895 !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,
896 First.Store->getPointerAddressSpace(),
897 First.Store->getAlign(), &Fast) ||
898 !Fast)
899 return false;
900
901 // Generate the combined store.
902 IRBuilder<> Builder(First.Store);
903 Value *Val = First.Val;
904 if (First.ValOffset != 0)
905 Val = Builder.CreateLShr(Val, First.ValOffset);
906 Val = Builder.CreateTrunc(Val, NewTy);
907 StoreInst *Store = Builder.CreateAlignedStore(
908 Val, First.Store->getPointerOperand(), First.Store->getAlign());
909
910 // Merge various metadata onto the new store.
911 AAMDNodes AATags = First.Store->getAAMetadata();
912 SmallVector<Instruction *> Stores = {First.Store};
913 Stores.reserve(Parts.size());
914 SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()};
915 DbgLocs.reserve(Parts.size());
916 for (const PartStore &Part : drop_begin(Parts)) {
917 AATags = AATags.concat(Part.Store->getAAMetadata());
918 Stores.push_back(Part.Store);
919 DbgLocs.push_back(Part.Store->getDebugLoc());
920 }
921 Store->setAAMetadata(AATags);
922 Store->mergeDIAssignID(Stores);
923 Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs));
924
925 // Remove the old stores.
926 for (const PartStore &Part : Parts)
927 Part.Store->eraseFromParent();
928
929 return true;
930}
931
934 if (Parts.size() < 2)
935 return false;
936
937 // We now have multiple parts of the same value stored to the same pointer.
938 // Sort the parts by pointer offset, and make sure they are consistent with
939 // the value offsets. Also check that the value is fully covered without
940 // overlaps.
941 bool Changed = false;
942 llvm::sort(Parts);
943 int64_t LastEndOffsetFromFirst = 0;
944 const PartStore *First = &Parts[0];
945 for (const PartStore &Part : Parts) {
946 APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;
947 int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;
948 if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
949 LastEndOffsetFromFirst != ValOffsetFromFirst) {
951 LastEndOffsetFromFirst, DL, TTI);
952 First = &Part;
953 LastEndOffsetFromFirst = Part.ValWidth;
954 continue;
955 }
956
957 LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
958 }
959
961 LastEndOffsetFromFirst, DL, TTI);
962 return Changed;
963}
964
967 // FIXME: Add big endian support.
968 if (DL.isBigEndian())
969 return false;
970
971 BatchAAResults BatchAA(AA);
973 bool MadeChange = false;
974 for (Instruction &I : make_early_inc_range(BB)) {
975 if (std::optional<PartStore> Part = matchPartStore(I, DL)) {
976 if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {
977 Parts.push_back(std::move(*Part));
978 continue;
979 }
980
981 MadeChange |= mergePartStores(Parts, DL, TTI);
982 Parts.clear();
983 Parts.push_back(std::move(*Part));
984 continue;
985 }
986
987 if (Parts.empty())
988 continue;
989
990 if (I.mayThrow() ||
991 (I.mayReadOrWriteMemory() &&
993 &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {
994 MadeChange |= mergePartStores(Parts, DL, TTI);
995 Parts.clear();
996 continue;
997 }
998 }
999
1000 MadeChange |= mergePartStores(Parts, DL, TTI);
1001 return MadeChange;
1002}
1003
1004/// Combine away instructions providing they are still equivalent when compared
1005/// against 0. i.e do they have any bits set.
1007 auto *I = dyn_cast<Instruction>(V);
1008 if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())
1009 return nullptr;
1010
1011 Value *A;
1012
1013 // Look deeper into the chain of or's, combining away shl (so long as they are
1014 // nuw or nsw).
1015 Value *Op0 = I->getOperand(0);
1016 if (match(Op0, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1017 m_NUWShl(m_Value(A), m_Value()))))
1018 Op0 = A;
1019 else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))
1020 Op0 = NOp;
1021
1022 Value *Op1 = I->getOperand(1);
1023 if (match(Op1, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1024 m_NUWShl(m_Value(A), m_Value()))))
1025 Op1 = A;
1026 else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))
1027 Op1 = NOp;
1028
1029 if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))
1030 return Builder.CreateOr(Op0, Op1);
1031 return nullptr;
1032}
1033
1036 const DominatorTree &DT) {
1037 CmpPredicate Pred;
1038 Value *Op0;
1039 if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||
1040 !ICmpInst::isEquality(Pred))
1041 return false;
1042
1043 // If the chain or or's matches a load, combine to that before attempting to
1044 // remove shifts.
1045 if (auto OpI = dyn_cast<Instruction>(Op0))
1046 if (OpI->getOpcode() == Instruction::Or)
1047 if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))
1048 return true;
1049
1050 IRBuilder<> Builder(&I);
1051 // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0
1052 if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {
1053 I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));
1054 return true;
1055 }
1056
1057 return false;
1058}
1059
1060// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
1061// ModOffset
1062static std::pair<APInt, APInt>
1064 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1065 std::optional<APInt> Stride;
1066 APInt ModOffset(BW, 0);
1067 // Return a minimum gep stride, greatest common divisor of consective gep
1068 // index scales(c.f. Bézout's identity).
1069 while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
1071 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
1072 break;
1073
1074 for (auto [V, Scale] : VarOffsets) {
1075 // Only keep a power of two factor for non-inbounds
1076 if (!GEP->hasNoUnsignedSignedWrap())
1077 Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
1078
1079 if (!Stride)
1080 Stride = Scale;
1081 else
1082 Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
1083 }
1084
1085 PtrOp = GEP->getPointerOperand();
1086 }
1087
1088 // Check whether pointer arrives back at Global Variable via at least one GEP.
1089 // Even if it doesn't, we can check by alignment.
1090 if (!isa<GlobalVariable>(PtrOp) || !Stride)
1091 return {APInt(BW, 1), APInt(BW, 0)};
1092
1093 // In consideration of signed GEP indices, non-negligible offset become
1094 // remainder of division by minimum GEP stride.
1095 ModOffset = ModOffset.srem(*Stride);
1096 if (ModOffset.isNegative())
1097 ModOffset += *Stride;
1098
1099 return {*Stride, ModOffset};
1100}
1101
1102/// If C is a constant patterned array and all valid loaded results for given
1103/// alignment are same to a constant, return that constant.
1105 auto *LI = dyn_cast<LoadInst>(&I);
1106 if (!LI || LI->isVolatile())
1107 return false;
1108
1109 // We can only fold the load if it is from a constant global with definitive
1110 // initializer. Skip expensive logic if this is not the case.
1111 auto *PtrOp = LI->getPointerOperand();
1113 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
1114 return false;
1115
1116 // Bail for large initializers in excess of 4K to avoid too many scans.
1117 Constant *C = GV->getInitializer();
1118 uint64_t GVSize = DL.getTypeAllocSize(C->getType());
1119 if (!GVSize || 4096 < GVSize)
1120 return false;
1121
1122 Type *LoadTy = LI->getType();
1123 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1124 auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);
1125
1126 // Any possible offset could be multiple of GEP stride. And any valid
1127 // offset is multiple of load alignment, so checking only multiples of bigger
1128 // one is sufficient to say results' equality.
1129 if (auto LA = LI->getAlign();
1130 LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
1131 ConstOffset = APInt(BW, 0);
1132 Stride = APInt(BW, LA.value());
1133 }
1134
1135 Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
1136 if (!Ca)
1137 return false;
1138
1139 unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);
1140 for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)
1141 if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))
1142 return false;
1143
1144 I.replaceAllUsesWith(Ca);
1145
1146 return true;
1147}
1148
1149namespace {
1150class StrNCmpInliner {
1151public:
1152 StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,
1153 const DataLayout &DL)
1154 : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
1155
1156 bool optimizeStrNCmp();
1157
1158private:
1159 void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
1160
1161 CallInst *CI;
1162 LibFunc Func;
1163 DomTreeUpdater *DTU;
1164 const DataLayout &DL;
1165};
1166
1167} // namespace
1168
1169/// First we normalize calls to strncmp/strcmp to the form of
1170/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
1171/// (without considering '\0').
1172///
1173/// Examples:
1174///
1175/// \code
1176/// strncmp(s, "a", 3) -> compare(s, "a", 2)
1177/// strncmp(s, "abc", 3) -> compare(s, "abc", 3)
1178/// strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
1179/// strcmp(s, "a") -> compare(s, "a", 2)
1180///
1181/// char s2[] = {'a'}
1182/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1183///
1184/// char s2[] = {'a', 'b', 'c', 'd'}
1185/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1186/// \endcode
1187///
1188/// We only handle cases where N and exactly one of s1 and s2 are constant.
1189/// Cases that s1 and s2 are both constant are already handled by the
1190/// instcombine pass.
1191///
1192/// We do not handle cases where N > StrNCmpInlineThreshold.
1193///
1194/// We also do not handles cases where N < 2, which are already
1195/// handled by the instcombine pass.
1196///
1197bool StrNCmpInliner::optimizeStrNCmp() {
1198 if (StrNCmpInlineThreshold < 2)
1199 return false;
1200
1202 return false;
1203
1204 Value *Str1P = CI->getArgOperand(0);
1205 Value *Str2P = CI->getArgOperand(1);
1206 // Should be handled elsewhere.
1207 if (Str1P == Str2P)
1208 return false;
1209
1210 StringRef Str1, Str2;
1211 bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);
1212 bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);
1213 if (HasStr1 == HasStr2)
1214 return false;
1215
1216 // Note that '\0' and characters after it are not trimmed.
1217 StringRef Str = HasStr1 ? Str1 : Str2;
1218 Value *StrP = HasStr1 ? Str2P : Str1P;
1219
1220 size_t Idx = Str.find('\0');
1221 uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
1222 if (Func == LibFunc_strncmp) {
1223 if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
1224 N = std::min(N, ConstInt->getZExtValue());
1225 else
1226 return false;
1227 }
1228 // Now N means how many bytes we need to compare at most.
1229 if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
1230 return false;
1231
1232 // Cases where StrP has two or more dereferenceable bytes might be better
1233 // optimized elsewhere.
1234 bool CanBeNull = false, CanBeFreed = false;
1235 if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
1236 return false;
1237 inlineCompare(StrP, Str, N, HasStr1);
1238 return true;
1239}
1240
1241/// Convert
1242///
1243/// \code
1244/// ret = compare(s1, s2, N)
1245/// \endcode
1246///
1247/// into
1248///
1249/// \code
1250/// ret = (int)s1[0] - (int)s2[0]
1251/// if (ret != 0)
1252/// goto NE
1253/// ...
1254/// ret = (int)s1[N-2] - (int)s2[N-2]
1255/// if (ret != 0)
1256/// goto NE
1257/// ret = (int)s1[N-1] - (int)s2[N-1]
1258/// NE:
1259/// \endcode
1260///
1261/// CFG before and after the transformation:
1262///
1263/// (before)
1264/// BBCI
1265///
1266/// (after)
1267/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
1268/// | ^
1269/// E |
1270/// | |
1271/// BBSubs[1] (sub,icmp) --NE-----+
1272/// ... |
1273/// BBSubs[N-1] (sub) ---------+
1274///
1275void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
1276 bool Swapped) {
1277 auto &Ctx = CI->getContext();
1278 IRBuilder<> B(Ctx);
1279 // We want these instructions to be recognized as inlined instructions for the
1280 // compare call, but we don't have a source location for the definition of
1281 // that function, since we're generating that code now. Because the generated
1282 // code is a viable point for a memory access error, we make the pragmatic
1283 // choice here to directly use CI's location so that we have useful
1284 // attribution for the generated code.
1285 B.SetCurrentDebugLocation(CI->getDebugLoc());
1286
1287 BasicBlock *BBCI = CI->getParent();
1288 BasicBlock *BBTail =
1289 SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
1290
1292 for (uint64_t I = 0; I < N; ++I)
1293 BBSubs.push_back(
1294 BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));
1295 BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
1296
1297 cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
1298
1299 B.SetInsertPoint(BBNE);
1300 PHINode *Phi = B.CreatePHI(CI->getType(), N);
1301 B.CreateBr(BBTail);
1302
1303 Value *Base = LHS;
1304 for (uint64_t i = 0; i < N; ++i) {
1305 B.SetInsertPoint(BBSubs[i]);
1306 Value *VL =
1307 B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
1308 B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
1309 CI->getType());
1310 Value *VR =
1311 ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));
1312 Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
1313 if (i < N - 1) {
1314 BranchInst *CondBrInst = B.CreateCondBr(
1315 B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)), BBNE,
1316 BBSubs[i + 1]);
1317
1318 Function *F = CI->getFunction();
1319 assert(F && "Instruction does not belong to a function!");
1320 std::optional<Function::ProfileCount> EC = F->getEntryCount();
1321 if (EC && EC->getCount() > 0)
1323 } else {
1324 B.CreateBr(BBNE);
1325 }
1326
1327 Phi->addIncoming(Sub, BBSubs[i]);
1328 }
1329
1330 CI->replaceAllUsesWith(Phi);
1331 CI->eraseFromParent();
1332
1333 if (DTU) {
1335 Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});
1336 for (uint64_t i = 0; i < N; ++i) {
1337 if (i < N - 1)
1338 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
1339 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
1340 }
1341 Updates.push_back({DominatorTree::Insert, BBNE, BBTail});
1342 Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
1343 DTU->applyUpdates(Updates);
1344 }
1345}
1346
1347/// Convert memchr with a small constant string into a switch
1349 const DataLayout &DL) {
1350 if (isa<Constant>(Call->getArgOperand(1)))
1351 return false;
1352
1353 StringRef Str;
1354 Value *Base = Call->getArgOperand(0);
1355 if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))
1356 return false;
1357
1358 uint64_t N = Str.size();
1359 if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
1360 uint64_t Val = ConstInt->getZExtValue();
1361 // Ignore the case that n is larger than the size of string.
1362 if (Val > N)
1363 return false;
1364 N = Val;
1365 } else
1366 return false;
1367
1369 return false;
1370
1371 BasicBlock *BB = Call->getParent();
1372 BasicBlock *BBNext = SplitBlock(BB, Call, DTU);
1373 IRBuilder<> IRB(BB);
1374 IRB.SetCurrentDebugLocation(Call->getDebugLoc());
1375 IntegerType *ByteTy = IRB.getInt8Ty();
1377 SwitchInst *SI = IRB.CreateSwitch(
1378 IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
1379 // We can't know the precise weights here, as they would depend on the value
1380 // distribution of Call->getArgOperand(1). So we just mark it as "unknown".
1382 DEBUG_TYPE);
1383 Type *IndexTy = DL.getIndexType(Call->getType());
1385
1386 BasicBlock *BBSuccess = BasicBlock::Create(
1387 Call->getContext(), "memchr.success", BB->getParent(), BBNext);
1388 IRB.SetInsertPoint(BBSuccess);
1389 PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");
1390 Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);
1391 IRB.CreateBr(BBNext);
1392 if (DTU)
1393 Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});
1394
1396 for (uint64_t I = 0; I < N; ++I) {
1397 ConstantInt *CaseVal = ConstantInt::get(ByteTy, Str[I]);
1398 if (!Cases.insert(CaseVal).second)
1399 continue;
1400
1401 BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",
1402 BB->getParent(), BBSuccess);
1403 SI->addCase(CaseVal, BBCase);
1404 IRB.SetInsertPoint(BBCase);
1405 IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);
1406 IRB.CreateBr(BBSuccess);
1407 if (DTU) {
1408 Updates.push_back({DominatorTree::Insert, BB, BBCase});
1409 Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});
1410 }
1411 }
1412
1413 PHINode *PHI =
1414 PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());
1415 PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);
1416 PHI->addIncoming(FirstOccursLocation, BBSuccess);
1417
1418 Call->replaceAllUsesWith(PHI);
1419 Call->eraseFromParent();
1420
1421 if (DTU)
1422 DTU->applyUpdates(Updates);
1423
1424 return true;
1425}
1426
1429 DominatorTree &DT, const DataLayout &DL,
1430 bool &MadeCFGChange) {
1431
1432 auto *CI = dyn_cast<CallInst>(&I);
1433 if (!CI || CI->isNoBuiltin())
1434 return false;
1435
1436 Function *CalledFunc = CI->getCalledFunction();
1437 if (!CalledFunc)
1438 return false;
1439
1440 LibFunc LF;
1441 if (!TLI.getLibFunc(*CalledFunc, LF) ||
1442 !isLibFuncEmittable(CI->getModule(), &TLI, LF))
1443 return false;
1444
1445 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
1446
1447 switch (LF) {
1448 case LibFunc_sqrt:
1449 case LibFunc_sqrtf:
1450 case LibFunc_sqrtl:
1451 return foldSqrt(CI, LF, TTI, TLI, AC, DT);
1452 case LibFunc_strcmp:
1453 case LibFunc_strncmp:
1454 if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {
1455 MadeCFGChange = true;
1456 return true;
1457 }
1458 break;
1459 case LibFunc_memchr:
1460 if (foldMemChr(CI, &DTU, DL)) {
1461 MadeCFGChange = true;
1462 return true;
1463 }
1464 break;
1465 default:;
1466 }
1467 return false;
1468}
1469
1470/// This is the entry point for folds that could be implemented in regular
1471/// InstCombine, but they are separated because they are not expected to
1472/// occur frequently and/or have more than a constant-length pattern match.
1476 AssumptionCache &AC, bool &MadeCFGChange) {
1477 bool MadeChange = false;
1478 for (BasicBlock &BB : F) {
1479 // Ignore unreachable basic blocks.
1480 if (!DT.isReachableFromEntry(&BB))
1481 continue;
1482
1483 const DataLayout &DL = F.getDataLayout();
1484
1485 // Walk the block backwards for efficiency. We're matching a chain of
1486 // use->defs, so we're more likely to succeed by starting from the bottom.
1487 // Also, we want to avoid matching partial patterns.
1488 // TODO: It would be more efficient if we removed dead instructions
1489 // iteratively in this loop rather than waiting until the end.
1491 MadeChange |= foldAnyOrAllBitsSet(I);
1492 MadeChange |= foldGuardedFunnelShift(I, DT);
1493 MadeChange |= tryToRecognizePopCount(I);
1494 MadeChange |= tryToFPToSat(I, TTI);
1495 MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
1496 MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
1497 MadeChange |= foldPatternedLoads(I, DL);
1498 MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);
1499 // NOTE: This function introduces erasing of the instruction `I`, so it
1500 // needs to be called at the end of this sequence, otherwise we may make
1501 // bugs.
1502 MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
1503 }
1504
1505 // Do this separately to avoid redundantly scanning stores multiple times.
1506 MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);
1507 }
1508
1509 // We're done with transforms, so remove dead instructions.
1510 if (MadeChange)
1511 for (BasicBlock &BB : F)
1513
1514 return MadeChange;
1515}
1516
1517/// This is the entry point for all transforms. Pass manager differences are
1518/// handled in the callers of this function.
1521 AliasAnalysis &AA, bool &MadeCFGChange) {
1522 bool MadeChange = false;
1523 const DataLayout &DL = F.getDataLayout();
1524 TruncInstCombine TIC(AC, TLI, DL, DT);
1525 MadeChange |= TIC.run(F);
1526 MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);
1527 return MadeChange;
1528}
1529
1532 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1533 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1534 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1535 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1536 auto &AA = AM.getResult<AAManager>(F);
1537 bool MadeCFGChange = false;
1538 if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
1539 // No changes, all analyses are preserved.
1540 return PreservedAnalyses::all();
1541 }
1542 // Mark all the analyses that instcombine updates as preserved.
1544 if (MadeCFGChange)
1546 else
1548 return PA;
1549}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool tryToRecognizePopCount(Instruction &I)
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
static cl::opt< unsigned > MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string to " "inline a memchr call."))
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
static cl::opt< unsigned > StrNCmpInlineThreshold("strncmp-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string for a builtin string cmp " "call eligible for inlining. The default value is 3."))
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, const DataLayout &DL)
Convert memchr with a small constant string into a switch.
static Value * optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder)
Combine away instructions providing they are still equivalent when compared against 0.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL)
static bool mergePartStores(SmallVectorImpl< PartStore > &Parts, const DataLayout &DL, TargetTransformInfo &TTI)
static bool mergeConsecutivePartStores(ArrayRef< PartStore > Parts, unsigned Width, const DataLayout &DL, TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift, const APInt &AndMask, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
static std::optional< PartStore > matchPartStore(Instruction &I, const DataLayout &DL)
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, const DataLayout &DL, bool &MadeCFGChange)
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, AssumptionCache &AC, bool &MadeCFGChange)
This is the entry point for folds that could be implemented in regular InstCombine,...
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
static MaybeAlign getAlign(Value *Ptr)
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file contains the declarations for profiling metadata utility functions.
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
BinaryOperator * Mul
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2497
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2044
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isSimple() const
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
size_type size() const
Definition MapVector.h:56
std::pair< KeyT, ValueT > & front()
Definition MapVector.h:79
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_RecipThroughput
Reciprocal throughput.
@ None
The cast is not used with a load/store of any kind.
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:881
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
Changed
#define UINT64_MAX
Definition DataTypes.h:77
Abstract Attribute helper functions.
Definition Attributor.h:165
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:798
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::LShr > m_LShrOrSelf(const LHS &L, uint64_t &R)
Matches lshr L, ConstShAmt or L itself (R will be set to zero in this case).
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::Shl > m_ShlOrSelf(const LHS &L, uint64_t &R)
Matches shl L, ConstShAmt or L itself (R will be set to zero in this case).
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isOnlyUsedInZeroComparison(const Instruction *CxtI)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, StringRef PassName)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:721
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName)
Specify that the branch weights for this terminator cannot be known at compile time.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Sub
Subtraction of integers.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
LoadInst * RootInsert
ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
bool operator<(const PartStore &Other) const
bool isCompatibleWith(const PartStore &Other) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257