LLVM 22.0.0git
AggressiveInstCombine.cpp
Go to the documentation of this file.
1//===- AggressiveInstCombine.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the aggressive expression pattern combiner classes.
10// Currently, it handles expression patterns for:
11// * Truncate instruction
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
35
36using namespace llvm;
37using namespace PatternMatch;
38
39#define DEBUG_TYPE "aggressive-instcombine"
40
41STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
42STATISTIC(NumGuardedRotates,
43 "Number of guarded rotates transformed into funnel shifts");
44STATISTIC(NumGuardedFunnelShifts,
45 "Number of guarded funnel shifts transformed into funnel shifts");
46STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
47
49 "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
50 cl::desc("Max number of instructions to scan for aggressive instcombine."));
51
53 "strncmp-inline-threshold", cl::init(3), cl::Hidden,
54 cl::desc("The maximum length of a constant string for a builtin string cmp "
55 "call eligible for inlining. The default value is 3."));
56
58 MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
59 cl::desc("The maximum length of a constant string to "
60 "inline a memchr call."));
61
62/// Match a pattern for a bitwise funnel/rotate operation that partially guards
63/// against undefined behavior by branching around the funnel-shift/rotation
64/// when the shift amount is 0.
66 if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
67 return false;
68
69 // As with the one-use checks below, this is not strictly necessary, but we
70 // are being cautious to avoid potential perf regressions on targets that
71 // do not actually have a funnel/rotate instruction (where the funnel shift
72 // would be expanded back into math/shift/logic ops).
73 if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
74 return false;
75
76 // Match V to funnel shift left/right and capture the source operands and
77 // shift amount.
78 auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
79 Value *&ShAmt) {
80 unsigned Width = V->getType()->getScalarSizeInBits();
81
82 // fshl(ShVal0, ShVal1, ShAmt)
83 // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
84 if (match(V, m_OneUse(m_c_Or(
85 m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
86 m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),
87 m_Deferred(ShAmt))))))) {
88 return Intrinsic::fshl;
89 }
90
91 // fshr(ShVal0, ShVal1, ShAmt)
92 // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
93 if (match(V,
95 m_Value(ShAmt))),
96 m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
97 return Intrinsic::fshr;
98 }
99
101 };
102
103 // One phi operand must be a funnel/rotate operation, and the other phi
104 // operand must be the source value of that funnel/rotate operation:
105 // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
106 // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
107 // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
108 PHINode &Phi = cast<PHINode>(I);
109 unsigned FunnelOp = 0, GuardOp = 1;
110 Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
111 Value *ShVal0, *ShVal1, *ShAmt;
112 Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
113 if (IID == Intrinsic::not_intrinsic ||
114 (IID == Intrinsic::fshl && ShVal0 != P1) ||
115 (IID == Intrinsic::fshr && ShVal1 != P1)) {
116 IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
117 if (IID == Intrinsic::not_intrinsic ||
118 (IID == Intrinsic::fshl && ShVal0 != P0) ||
119 (IID == Intrinsic::fshr && ShVal1 != P0))
120 return false;
121 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
122 "Pattern must match funnel shift left or right");
123 std::swap(FunnelOp, GuardOp);
124 }
125
126 // The incoming block with our source operand must be the "guard" block.
127 // That must contain a cmp+branch to avoid the funnel/rotate when the shift
128 // amount is equal to 0. The other incoming block is the block with the
129 // funnel/rotate.
130 BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
131 BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
132 Instruction *TermI = GuardBB->getTerminator();
133
134 // Ensure that the shift values dominate each block.
135 if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
136 return false;
137
138 BasicBlock *PhiBB = Phi.getParent();
140 m_ZeroInt()),
141 m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))
142 return false;
143
144 IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
145
146 if (ShVal0 == ShVal1)
147 ++NumGuardedRotates;
148 else
149 ++NumGuardedFunnelShifts;
150
151 // If this is not a rotate then the select was blocking poison from the
152 // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
153 bool IsFshl = IID == Intrinsic::fshl;
154 if (ShVal0 != ShVal1) {
155 if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
156 ShVal1 = Builder.CreateFreeze(ShVal1);
157 else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
158 ShVal0 = Builder.CreateFreeze(ShVal0);
159 }
160
161 // We matched a variation of this IR pattern:
162 // GuardBB:
163 // %cmp = icmp eq i32 %ShAmt, 0
164 // br i1 %cmp, label %PhiBB, label %FunnelBB
165 // FunnelBB:
166 // %sub = sub i32 32, %ShAmt
167 // %shr = lshr i32 %ShVal1, %sub
168 // %shl = shl i32 %ShVal0, %ShAmt
169 // %fsh = or i32 %shr, %shl
170 // br label %PhiBB
171 // PhiBB:
172 // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
173 // -->
174 // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
175 Phi.replaceAllUsesWith(
176 Builder.CreateIntrinsic(IID, Phi.getType(), {ShVal0, ShVal1, ShAmt}));
177 return true;
178}
179
180/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
181/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
182/// of 'and' ops, then we also need to capture the fact that we saw an
183/// "and X, 1", so that's an extra return value for that case.
184namespace {
185struct MaskOps {
186 Value *Root = nullptr;
187 APInt Mask;
188 bool MatchAndChain;
189 bool FoundAnd1 = false;
190
191 MaskOps(unsigned BitWidth, bool MatchAnds)
192 : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
193};
194} // namespace
195
196/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
197/// chain of 'and' or 'or' instructions looking for shift ops of a common source
198/// value. Examples:
199/// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
200/// returns { X, 0x129 }
201/// and (and (X >> 1), 1), (X >> 4)
202/// returns { X, 0x12 }
203static bool matchAndOrChain(Value *V, MaskOps &MOps) {
204 Value *Op0, *Op1;
205 if (MOps.MatchAndChain) {
206 // Recurse through a chain of 'and' operands. This requires an extra check
207 // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere
208 // in the chain to know that all of the high bits are cleared.
209 if (match(V, m_And(m_Value(Op0), m_One()))) {
210 MOps.FoundAnd1 = true;
211 return matchAndOrChain(Op0, MOps);
212 }
213 if (match(V, m_And(m_Value(Op0), m_Value(Op1))))
214 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
215 } else {
216 // Recurse through a chain of 'or' operands.
217 if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
218 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
219 }
220
221 // We need a shift-right or a bare value representing a compare of bit 0 of
222 // the original source operand.
223 Value *Candidate;
224 const APInt *BitIndex = nullptr;
225 if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))
226 Candidate = V;
227
228 // Initialize result source operand.
229 if (!MOps.Root)
230 MOps.Root = Candidate;
231
232 // The shift constant is out-of-range? This code hasn't been simplified.
233 if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))
234 return false;
235
236 // Fill in the mask bit derived from the shift constant.
237 MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);
238 return MOps.Root == Candidate;
239}
240
241/// Match patterns that correspond to "any-bits-set" and "all-bits-set".
242/// These will include a chain of 'or' or 'and'-shifted bits from a
243/// common source value:
244/// and (or (lshr X, C), ...), 1 --> (X & CMask) != 0
245/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
246/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
247/// that differ only with a final 'not' of the result. We expect that final
248/// 'not' to be folded with the compare that we create here (invert predicate).
250 // The 'any-bits-set' ('or' chain) pattern is simpler to match because the
251 // final "and X, 1" instruction must be the final op in the sequence.
252 bool MatchAllBitsSet;
254 MatchAllBitsSet = true;
255 else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
256 MatchAllBitsSet = false;
257 else
258 return false;
259
260 MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
261 if (MatchAllBitsSet) {
262 if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1)
263 return false;
264 } else {
265 if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps))
266 return false;
267 }
268
269 // The pattern was found. Create a masked compare that replaces all of the
270 // shift and logic ops.
271 IRBuilder<> Builder(&I);
272 Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
273 Value *And = Builder.CreateAnd(MOps.Root, Mask);
274 Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
275 : Builder.CreateIsNotNull(And);
276 Value *Zext = Builder.CreateZExt(Cmp, I.getType());
277 I.replaceAllUsesWith(Zext);
278 ++NumAnyOrAllBitsSet;
279 return true;
280}
281
282// Try to recognize below function as popcount intrinsic.
283// This is the "best" algorithm from
284// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
285// Also used in TargetLowering::expandCTPOP().
286//
287// int popcount(unsigned int i) {
288// i = i - ((i >> 1) & 0x55555555);
289// i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
290// i = ((i + (i >> 4)) & 0x0F0F0F0F);
291// return (i * 0x01010101) >> 24;
292// }
294 if (I.getOpcode() != Instruction::LShr)
295 return false;
296
297 Type *Ty = I.getType();
298 if (!Ty->isIntOrIntVectorTy())
299 return false;
300
301 unsigned Len = Ty->getScalarSizeInBits();
302 // FIXME: fix Len == 8 and other irregular type lengths.
303 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
304 return false;
305
306 APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
307 APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
308 APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
309 APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
310 APInt MaskShift = APInt(Len, Len - 8);
311
312 Value *Op0 = I.getOperand(0);
313 Value *Op1 = I.getOperand(1);
314 Value *MulOp0;
315 // Matching "(i * 0x01010101...) >> 24".
316 if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
318 Value *ShiftOp0;
319 // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
320 if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
321 m_Deferred(ShiftOp0)),
322 m_SpecificInt(Mask0F)))) {
323 Value *AndOp0;
324 // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
325 if (match(ShiftOp0,
326 m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
328 m_SpecificInt(Mask33))))) {
329 Value *Root, *SubOp1;
330 // Matching "i - ((i >> 1) & 0x55555555...)".
331 const APInt *AndMask;
332 if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
333 match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
334 m_APInt(AndMask)))) {
335 auto CheckAndMask = [&]() {
336 if (*AndMask == Mask55)
337 return true;
338
339 // Exact match failed, see if any bits are known to be 0 where we
340 // expect a 1 in the mask.
341 if (!AndMask->isSubsetOf(Mask55))
342 return false;
343
344 APInt NeededMask = Mask55 & ~*AndMask;
345 return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
346 NeededMask,
347 SimplifyQuery(I.getDataLayout()));
348 };
349
350 if (CheckAndMask()) {
351 LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
352 IRBuilder<> Builder(&I);
353 I.replaceAllUsesWith(
354 Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
355 ++NumPopCountRecognized;
356 return true;
357 }
358 }
359 }
360 }
361 }
362
363 return false;
364}
365
366/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
367/// C2 saturate the value of the fp conversion. The transform is not reversable
368/// as the fptosi.sat is more defined than the input - all values produce a
369/// valid value for the fptosi.sat, where as some produce poison for original
370/// that were out of range of the integer conversion. The reversed pattern may
371/// use fmax and fmin instead. As we cannot directly reverse the transform, and
372/// it is not always profitable, we make it conditional on the cost being
373/// reported as lower by TTI.
375 // Look for min(max(fptosi, converting to fptosi_sat.
376 Value *In;
377 const APInt *MinC, *MaxC;
379 m_APInt(MinC))),
380 m_APInt(MaxC))) &&
382 m_APInt(MaxC))),
383 m_APInt(MinC))))
384 return false;
385
386 // Check that the constants clamp a saturate.
387 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
388 return false;
389
390 Type *IntTy = I.getType();
391 Type *FpTy = In->getType();
392 Type *SatTy =
393 IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
394 if (auto *VecTy = dyn_cast<VectorType>(IntTy))
395 SatTy = VectorType::get(SatTy, VecTy->getElementCount());
396
397 // Get the cost of the intrinsic, and check that against the cost of
398 // fptosi+smin+smax
399 InstructionCost SatCost = TTI.getIntrinsicInstrCost(
400 IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
402 SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,
405
406 InstructionCost MinMaxCost = TTI.getCastInstrCost(
407 Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
409 MinMaxCost += TTI.getIntrinsicInstrCost(
410 IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
412 MinMaxCost += TTI.getIntrinsicInstrCost(
413 IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
415
416 if (SatCost >= MinMaxCost)
417 return false;
418
419 IRBuilder<> Builder(&I);
420 Value *Sat =
421 Builder.CreateIntrinsic(Intrinsic::fptosi_sat, {SatTy, FpTy}, In);
422 I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
423 return true;
424}
425
426/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
427/// pessimistic codegen that has to account for setting errno and can enable
428/// vectorization.
431 DominatorTree &DT) {
432 // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
433 // (because NNAN or the operand arg must not be less than -0.0) and (2) we
434 // would not end up lowering to a libcall anyway (which could change the value
435 // of errno), then:
436 // (1) errno won't be set.
437 // (2) it is safe to convert this to an intrinsic call.
438 Type *Ty = Call->getType();
439 Value *Arg = Call->getArgOperand(0);
440 if (TTI.haveFastSqrt(Ty) &&
441 (Call->hasNoNaNs() ||
443 Arg, SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) {
444 IRBuilder<> Builder(Call);
445 Value *NewSqrt =
446 Builder.CreateIntrinsic(Intrinsic::sqrt, Ty, Arg, Call, "sqrt");
447 Call->replaceAllUsesWith(NewSqrt);
448
449 // Explicitly erase the old call because a call with side effects is not
450 // trivially dead.
451 Call->eraseFromParent();
452 return true;
453 }
454
455 return false;
456}
457
458// Check if this array of constants represents a cttz table.
459// Iterate over the elements from \p Table by trying to find/match all
460// the numbers from 0 to \p InputBits that should represent cttz results.
461static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,
462 const APInt &AndMask, Type *AccessTy,
463 unsigned InputBits, const APInt &GEPIdxFactor,
464 const DataLayout &DL) {
465 for (unsigned Idx = 0; Idx < InputBits; Idx++) {
466 APInt Index = (APInt(InputBits, 1).shl(Idx) * Mul).lshr(Shift) & AndMask;
468 ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
469 if (!C || C->getValue() != Idx)
470 return false;
471 }
472
473 return true;
474}
475
476// Try to recognize table-based ctz implementation.
477// E.g., an example in C (for more cases please see the llvm/tests):
478// int f(unsigned x) {
479// static const char table[32] =
480// {0, 1, 28, 2, 29, 14, 24, 3, 30,
481// 22, 20, 15, 25, 17, 4, 8, 31, 27,
482// 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
483// return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
484// }
485// this can be lowered to `cttz` instruction.
486// There is also a special case when the element is 0.
487//
488// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
489// sequence that contains each pattern of bits in it. The shift extracts
490// the top bits after the multiply, and that index into the table should
491// represent the number of trailing zeros in the original number.
492//
493// Here are some examples or LLVM IR for a 64-bit target:
494//
495// CASE 1:
496// %sub = sub i32 0, %x
497// %and = and i32 %sub, %x
498// %mul = mul i32 %and, 125613361
499// %shr = lshr i32 %mul, 27
500// %idxprom = zext i32 %shr to i64
501// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
502// i64 %idxprom
503// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
504//
505// CASE 2:
506// %sub = sub i32 0, %x
507// %and = and i32 %sub, %x
508// %mul = mul i32 %and, 72416175
509// %shr = lshr i32 %mul, 26
510// %idxprom = zext i32 %shr to i64
511// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
512// i64 0, i64 %idxprom
513// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
514//
515// CASE 3:
516// %sub = sub i32 0, %x
517// %and = and i32 %sub, %x
518// %mul = mul i32 %and, 81224991
519// %shr = lshr i32 %mul, 27
520// %idxprom = zext i32 %shr to i64
521// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
522// i64 0, i64 %idxprom
523// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
524//
525// CASE 4:
526// %sub = sub i64 0, %x
527// %and = and i64 %sub, %x
528// %mul = mul i64 %and, 283881067100198605
529// %shr = lshr i64 %mul, 58
530// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
531// i64 %shr
532// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
533//
534// All these can be lowered to @llvm.cttz.i32/64 intrinsics.
537 if (!LI)
538 return false;
539
540 Type *AccessType = LI->getType();
541 if (!AccessType->isIntegerTy())
542 return false;
543
545 if (!GEP || !GEP->hasNoUnsignedSignedWrap())
546 return false;
547
548 GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
549 if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())
550 return false;
551
552 unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
553 APInt ModOffset(BW, 0);
555 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||
556 VarOffsets.size() != 1 || ModOffset != 0)
557 return false;
558 auto [GepIdx, GEPScale] = VarOffsets.front();
559
560 Value *X1;
561 const APInt *MulConst, *ShiftConst, *AndCst = nullptr;
562 // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
563 // This might be extended to the pointer index type, and if the gep index type
564 // has been replaced with an i8 then a new And (and different ShiftConst) will
565 // be present.
566 auto MatchInner = m_LShr(
567 m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),
568 m_APInt(ShiftConst));
569 if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&
570 !match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))
571 return false;
572
573 unsigned InputBits = X1->getType()->getScalarSizeInBits();
574 if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)
575 return false;
576
577 if (!GEPScale.isIntN(InputBits) ||
578 !isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,
579 AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,
580 InputBits, GEPScale.zextOrTrunc(InputBits), DL))
581 return false;
582
583 ConstantInt *ZeroTableElem = cast<ConstantInt>(
584 ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));
585 bool DefinedForZero = ZeroTableElem->getZExtValue() == InputBits;
586
587 IRBuilder<> B(LI);
588 ConstantInt *BoolConst = B.getInt1(!DefinedForZero);
589 Type *XType = X1->getType();
590 auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
591 Value *ZExtOrTrunc = nullptr;
592
593 if (DefinedForZero) {
594 ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);
595 } else {
596 // If the value in elem 0 isn't the same as InputBits, we still want to
597 // produce the value from the table.
598 auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));
599 auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz);
600
601 // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
602 // it should be handled as: `cttz(x) & (typeSize - 1)`.
603
604 ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
605 }
606
607 LI->replaceAllUsesWith(ZExtOrTrunc);
608
609 return true;
610}
611
612/// This is used by foldLoadsRecursive() to capture a Root Load node which is
613/// of type or(load, load) and recursively build the wide load. Also capture the
614/// shift amount, zero extend type and loadSize.
624
625// Identify and Merge consecutive loads recursively which is of the form
626// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
627// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
628static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
629 AliasAnalysis &AA) {
630 uint64_t ShAmt2;
631 Value *X;
632 Instruction *L1, *L2;
633
634 // Go to the last node with loads.
635 if (match(V,
638 ShAmt2)))))) {
639 if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
640 // Avoid Partial chain merge.
641 return false;
642 } else
643 return false;
644
645 // Check if the pattern has loads
646 LoadInst *LI1 = LOps.Root;
647 uint64_t ShAmt1 = LOps.Shift;
648 if (LOps.FoundRoot == false &&
650 m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {
651 LI1 = dyn_cast<LoadInst>(L1);
652 }
653 LoadInst *LI2 = dyn_cast<LoadInst>(L2);
654
655 // Check if loads are same, atomic, volatile and having same address space.
656 if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
658 return false;
659
660 // Check if Loads come from same BB.
661 if (LI1->getParent() != LI2->getParent())
662 return false;
663
664 // Find the data layout
665 bool IsBigEndian = DL.isBigEndian();
666
667 // Check if loads are consecutive and same size.
668 Value *Load1Ptr = LI1->getPointerOperand();
669 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
670 Load1Ptr =
671 Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
672 /* AllowNonInbounds */ true);
673
674 Value *Load2Ptr = LI2->getPointerOperand();
675 APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
676 Load2Ptr =
677 Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
678 /* AllowNonInbounds */ true);
679
680 // Verify if both loads have same base pointers
681 uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
682 uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
683 if (Load1Ptr != Load2Ptr)
684 return false;
685
686 // Make sure that there are no padding bits.
687 if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||
688 !DL.typeSizeEqualsStoreSize(LI2->getType()))
689 return false;
690
691 // Alias Analysis to check for stores b/w the loads.
692 LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;
694 if (!Start->comesBefore(End)) {
695 std::swap(Start, End);
697 if (LOps.FoundRoot)
698 Loc = Loc.getWithNewSize(LOps.LoadSize);
699 } else
701 unsigned NumScanned = 0;
702 for (Instruction &Inst :
703 make_range(Start->getIterator(), End->getIterator())) {
704 if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
705 return false;
706
707 if (++NumScanned > MaxInstrsToScan)
708 return false;
709 }
710
711 // Make sure Load with lower Offset is at LI1
712 bool Reverse = false;
713 if (Offset2.slt(Offset1)) {
714 std::swap(LI1, LI2);
715 std::swap(ShAmt1, ShAmt2);
716 std::swap(Offset1, Offset2);
717 std::swap(Load1Ptr, Load2Ptr);
718 std::swap(LoadSize1, LoadSize2);
719 Reverse = true;
720 }
721
722 // Big endian swap the shifts
723 if (IsBigEndian)
724 std::swap(ShAmt1, ShAmt2);
725
726 // First load is always LI1. This is where we put the new load.
727 // Use the merged load size available from LI1 for forward loads.
728 if (LOps.FoundRoot) {
729 if (!Reverse)
730 LoadSize1 = LOps.LoadSize;
731 else
732 LoadSize2 = LOps.LoadSize;
733 }
734
735 // Verify if shift amount and load index aligns and verifies that loads
736 // are consecutive.
737 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
738 uint64_t PrevSize =
739 DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
740 if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
741 return false;
742
743 // Update LOps
744 AAMDNodes AATags1 = LOps.AATags;
745 AAMDNodes AATags2 = LI2->getAAMetadata();
746 if (LOps.FoundRoot == false) {
747 LOps.FoundRoot = true;
748 AATags1 = LI1->getAAMetadata();
749 }
750 LOps.LoadSize = LoadSize1 + LoadSize2;
751 LOps.RootInsert = Start;
752
753 // Concatenate the AATags of the Merged Loads.
754 LOps.AATags = AATags1.concat(AATags2);
755
756 LOps.Root = LI1;
757 LOps.Shift = ShAmt1;
758 LOps.ZextType = X->getType();
759 return true;
760}
761
762// For a given BB instruction, evaluate all loads in the chain that form a
763// pattern which suggests that the loads can be combined. The one and only use
764// of the loads is to form a wider load.
767 const DominatorTree &DT) {
768 // Only consider load chains of scalar values.
769 if (isa<VectorType>(I.getType()))
770 return false;
771
772 LoadOps LOps;
773 if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)
774 return false;
775
776 IRBuilder<> Builder(&I);
777 LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
778
779 IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);
780 // TTI based checks if we want to proceed with wider load
781 bool Allowed = TTI.isTypeLegal(WiderType);
782 if (!Allowed)
783 return false;
784
785 unsigned AS = LI1->getPointerAddressSpace();
786 unsigned Fast = 0;
787 Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
788 AS, LI1->getAlign(), &Fast);
789 if (!Allowed || !Fast)
790 return false;
791
792 // Get the Index and Ptr for the new GEP.
793 Value *Load1Ptr = LI1->getPointerOperand();
794 Builder.SetInsertPoint(LOps.RootInsert);
795 if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {
796 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
797 Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
798 DL, Offset1, /* AllowNonInbounds */ true);
799 Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));
800 }
801 // Generate wider load.
802 NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),
803 LI1->isVolatile(), "");
804 NewLoad->takeName(LI1);
805 // Set the New Load AATags Metadata.
806 if (LOps.AATags)
807 NewLoad->setAAMetadata(LOps.AATags);
808
809 Value *NewOp = NewLoad;
810 // Check if zero extend needed.
811 if (LOps.ZextType)
812 NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
813
814 // Check if shift needed. We need to shift with the amount of load1
815 // shift if not zero.
816 if (LOps.Shift)
817 NewOp = Builder.CreateShl(NewOp, LOps.Shift);
818 I.replaceAllUsesWith(NewOp);
819
820 return true;
821}
822
823/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
824struct PartStore {
831
832 bool isCompatibleWith(const PartStore &Other) const {
833 return PtrBase == Other.PtrBase && Val == Other.Val;
834 }
835
836 bool operator<(const PartStore &Other) const {
837 return PtrOffset.slt(Other.PtrOffset);
838 }
839};
840
841static std::optional<PartStore> matchPartStore(Instruction &I,
842 const DataLayout &DL) {
843 auto *Store = dyn_cast<StoreInst>(&I);
844 if (!Store || !Store->isSimple())
845 return std::nullopt;
846
847 Value *StoredVal = Store->getValueOperand();
848 Type *StoredTy = StoredVal->getType();
849 if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))
850 return std::nullopt;
851
852 uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
853 uint64_t ValOffset;
854 Value *Val;
855 if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))
856 return std::nullopt;
857
858 Value *Ptr = Store->getPointerOperand();
859 APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
860 Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets(
861 DL, PtrOffset, /*AllowNonInbounds=*/true);
862 return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
863}
864
866 unsigned Width, const DataLayout &DL,
868 if (Parts.size() < 2)
869 return false;
870
871 // Check whether combining the stores is profitable.
872 // FIXME: We could generate smaller stores if we can't produce a large one.
873 const PartStore &First = Parts.front();
874 LLVMContext &Ctx = First.Store->getContext();
875 Type *NewTy = Type::getIntNTy(Ctx, Width);
876 unsigned Fast = 0;
877 if (!TTI.isTypeLegal(NewTy) ||
878 !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,
879 First.Store->getPointerAddressSpace(),
880 First.Store->getAlign(), &Fast) ||
881 !Fast)
882 return false;
883
884 // Generate the combined store.
885 IRBuilder<> Builder(First.Store);
886 Value *Val = First.Val;
887 if (First.ValOffset != 0)
888 Val = Builder.CreateLShr(Val, First.ValOffset);
889 Val = Builder.CreateTrunc(Val, NewTy);
890 StoreInst *Store = Builder.CreateAlignedStore(
891 Val, First.Store->getPointerOperand(), First.Store->getAlign());
892
893 AAMDNodes AATags = First.Store->getAAMetadata();
894 for (const PartStore &Part : drop_begin(Parts))
895 AATags = AATags.concat(Part.Store->getAAMetadata());
896 Store->setAAMetadata(AATags);
897
898 // Remove the old stores.
899 for (const PartStore &Part : Parts)
900 Part.Store->eraseFromParent();
901
902 return true;
903}
904
907 if (Parts.size() < 2)
908 return false;
909
910 // We now have multiple parts of the same value stored to the same pointer.
911 // Sort the parts by pointer offset, and make sure they are consistent with
912 // the value offsets. Also check that the value is fully covered without
913 // overlaps.
914 bool Changed = false;
915 llvm::sort(Parts);
916 int64_t LastEndOffsetFromFirst = 0;
917 const PartStore *First = &Parts[0];
918 for (const PartStore &Part : Parts) {
919 APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;
920 int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;
921 if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
922 LastEndOffsetFromFirst != ValOffsetFromFirst) {
924 LastEndOffsetFromFirst, DL, TTI);
925 First = &Part;
926 LastEndOffsetFromFirst = Part.ValWidth;
927 continue;
928 }
929
930 LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
931 }
932
934 LastEndOffsetFromFirst, DL, TTI);
935 return Changed;
936}
937
940 // FIXME: Add big endian support.
941 if (DL.isBigEndian())
942 return false;
943
944 BatchAAResults BatchAA(AA);
946 bool MadeChange = false;
947 for (Instruction &I : make_early_inc_range(BB)) {
948 if (std::optional<PartStore> Part = matchPartStore(I, DL)) {
949 if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {
950 Parts.push_back(std::move(*Part));
951 continue;
952 }
953
954 MadeChange |= mergePartStores(Parts, DL, TTI);
955 Parts.clear();
956 Parts.push_back(std::move(*Part));
957 continue;
958 }
959
960 if (Parts.empty())
961 continue;
962
963 if (I.mayThrow() ||
964 (I.mayReadOrWriteMemory() &&
966 &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {
967 MadeChange |= mergePartStores(Parts, DL, TTI);
968 Parts.clear();
969 continue;
970 }
971 }
972
973 MadeChange |= mergePartStores(Parts, DL, TTI);
974 return MadeChange;
975}
976
977/// Combine away instructions providing they are still equivalent when compared
978/// against 0. i.e do they have any bits set.
980 auto *I = dyn_cast<Instruction>(V);
981 if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())
982 return nullptr;
983
984 Value *A;
985
986 // Look deeper into the chain of or's, combining away shl (so long as they are
987 // nuw or nsw).
988 Value *Op0 = I->getOperand(0);
990 m_NUWShl(m_Value(A), m_Value()))))
991 Op0 = A;
992 else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))
993 Op0 = NOp;
994
995 Value *Op1 = I->getOperand(1);
997 m_NUWShl(m_Value(A), m_Value()))))
998 Op1 = A;
999 else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))
1000 Op1 = NOp;
1001
1002 if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))
1003 return Builder.CreateOr(Op0, Op1);
1004 return nullptr;
1005}
1006
1009 const DominatorTree &DT) {
1010 CmpPredicate Pred;
1011 Value *Op0;
1012 if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||
1013 !ICmpInst::isEquality(Pred))
1014 return false;
1015
1016 // If the chain or or's matches a load, combine to that before attempting to
1017 // remove shifts.
1018 if (auto OpI = dyn_cast<Instruction>(Op0))
1019 if (OpI->getOpcode() == Instruction::Or)
1020 if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))
1021 return true;
1022
1023 IRBuilder<> Builder(&I);
1024 // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0
1025 if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {
1026 I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));
1027 return true;
1028 }
1029
1030 return false;
1031}
1032
1033// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
1034// ModOffset
1035static std::pair<APInt, APInt>
1037 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1038 std::optional<APInt> Stride;
1039 APInt ModOffset(BW, 0);
1040 // Return a minimum gep stride, greatest common divisor of consective gep
1041 // index scales(c.f. Bézout's identity).
1042 while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
1044 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
1045 break;
1046
1047 for (auto [V, Scale] : VarOffsets) {
1048 // Only keep a power of two factor for non-inbounds
1049 if (!GEP->hasNoUnsignedSignedWrap())
1050 Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
1051
1052 if (!Stride)
1053 Stride = Scale;
1054 else
1055 Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
1056 }
1057
1058 PtrOp = GEP->getPointerOperand();
1059 }
1060
1061 // Check whether pointer arrives back at Global Variable via at least one GEP.
1062 // Even if it doesn't, we can check by alignment.
1063 if (!isa<GlobalVariable>(PtrOp) || !Stride)
1064 return {APInt(BW, 1), APInt(BW, 0)};
1065
1066 // In consideration of signed GEP indices, non-negligible offset become
1067 // remainder of division by minimum GEP stride.
1068 ModOffset = ModOffset.srem(*Stride);
1069 if (ModOffset.isNegative())
1070 ModOffset += *Stride;
1071
1072 return {*Stride, ModOffset};
1073}
1074
1075/// If C is a constant patterned array and all valid loaded results for given
1076/// alignment are same to a constant, return that constant.
1078 auto *LI = dyn_cast<LoadInst>(&I);
1079 if (!LI || LI->isVolatile())
1080 return false;
1081
1082 // We can only fold the load if it is from a constant global with definitive
1083 // initializer. Skip expensive logic if this is not the case.
1084 auto *PtrOp = LI->getPointerOperand();
1086 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
1087 return false;
1088
1089 // Bail for large initializers in excess of 4K to avoid too many scans.
1090 Constant *C = GV->getInitializer();
1091 uint64_t GVSize = DL.getTypeAllocSize(C->getType());
1092 if (!GVSize || 4096 < GVSize)
1093 return false;
1094
1095 Type *LoadTy = LI->getType();
1096 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1097 auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);
1098
1099 // Any possible offset could be multiple of GEP stride. And any valid
1100 // offset is multiple of load alignment, so checking only multiples of bigger
1101 // one is sufficient to say results' equality.
1102 if (auto LA = LI->getAlign();
1103 LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
1104 ConstOffset = APInt(BW, 0);
1105 Stride = APInt(BW, LA.value());
1106 }
1107
1108 Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
1109 if (!Ca)
1110 return false;
1111
1112 unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);
1113 for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)
1114 if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))
1115 return false;
1116
1117 I.replaceAllUsesWith(Ca);
1118
1119 return true;
1120}
1121
1122namespace {
1123class StrNCmpInliner {
1124public:
1125 StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,
1126 const DataLayout &DL)
1127 : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
1128
1129 bool optimizeStrNCmp();
1130
1131private:
1132 void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
1133
1134 CallInst *CI;
1135 LibFunc Func;
1136 DomTreeUpdater *DTU;
1137 const DataLayout &DL;
1138};
1139
1140} // namespace
1141
1142/// First we normalize calls to strncmp/strcmp to the form of
1143/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
1144/// (without considering '\0').
1145///
1146/// Examples:
1147///
1148/// \code
1149/// strncmp(s, "a", 3) -> compare(s, "a", 2)
1150/// strncmp(s, "abc", 3) -> compare(s, "abc", 3)
1151/// strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
1152/// strcmp(s, "a") -> compare(s, "a", 2)
1153///
1154/// char s2[] = {'a'}
1155/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1156///
1157/// char s2[] = {'a', 'b', 'c', 'd'}
1158/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1159/// \endcode
1160///
1161/// We only handle cases where N and exactly one of s1 and s2 are constant.
1162/// Cases that s1 and s2 are both constant are already handled by the
1163/// instcombine pass.
1164///
1165/// We do not handle cases where N > StrNCmpInlineThreshold.
1166///
1167/// We also do not handles cases where N < 2, which are already
1168/// handled by the instcombine pass.
1169///
1170bool StrNCmpInliner::optimizeStrNCmp() {
1171 if (StrNCmpInlineThreshold < 2)
1172 return false;
1173
1175 return false;
1176
1177 Value *Str1P = CI->getArgOperand(0);
1178 Value *Str2P = CI->getArgOperand(1);
1179 // Should be handled elsewhere.
1180 if (Str1P == Str2P)
1181 return false;
1182
1183 StringRef Str1, Str2;
1184 bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);
1185 bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);
1186 if (HasStr1 == HasStr2)
1187 return false;
1188
1189 // Note that '\0' and characters after it are not trimmed.
1190 StringRef Str = HasStr1 ? Str1 : Str2;
1191 Value *StrP = HasStr1 ? Str2P : Str1P;
1192
1193 size_t Idx = Str.find('\0');
1194 uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
1195 if (Func == LibFunc_strncmp) {
1196 if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
1197 N = std::min(N, ConstInt->getZExtValue());
1198 else
1199 return false;
1200 }
1201 // Now N means how many bytes we need to compare at most.
1202 if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
1203 return false;
1204
1205 // Cases where StrP has two or more dereferenceable bytes might be better
1206 // optimized elsewhere.
1207 bool CanBeNull = false, CanBeFreed = false;
1208 if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
1209 return false;
1210 inlineCompare(StrP, Str, N, HasStr1);
1211 return true;
1212}
1213
1214/// Convert
1215///
1216/// \code
1217/// ret = compare(s1, s2, N)
1218/// \endcode
1219///
1220/// into
1221///
1222/// \code
1223/// ret = (int)s1[0] - (int)s2[0]
1224/// if (ret != 0)
1225/// goto NE
1226/// ...
1227/// ret = (int)s1[N-2] - (int)s2[N-2]
1228/// if (ret != 0)
1229/// goto NE
1230/// ret = (int)s1[N-1] - (int)s2[N-1]
1231/// NE:
1232/// \endcode
1233///
1234/// CFG before and after the transformation:
1235///
1236/// (before)
1237/// BBCI
1238///
1239/// (after)
1240/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
1241/// | ^
1242/// E |
1243/// | |
1244/// BBSubs[1] (sub,icmp) --NE-----+
1245/// ... |
1246/// BBSubs[N-1] (sub) ---------+
1247///
1248void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
1249 bool Swapped) {
1250 auto &Ctx = CI->getContext();
1251 IRBuilder<> B(Ctx);
1252 // We want these instructions to be recognized as inlined instructions for the
1253 // compare call, but we don't have a source location for the definition of
1254 // that function, since we're generating that code now. Because the generated
1255 // code is a viable point for a memory access error, we make the pragmatic
1256 // choice here to directly use CI's location so that we have useful
1257 // attribution for the generated code.
1258 B.SetCurrentDebugLocation(CI->getDebugLoc());
1259
1260 BasicBlock *BBCI = CI->getParent();
1261 BasicBlock *BBTail =
1262 SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
1263
1265 for (uint64_t I = 0; I < N; ++I)
1266 BBSubs.push_back(
1267 BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));
1268 BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
1269
1270 cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
1271
1272 B.SetInsertPoint(BBNE);
1273 PHINode *Phi = B.CreatePHI(CI->getType(), N);
1274 B.CreateBr(BBTail);
1275
1276 Value *Base = LHS;
1277 for (uint64_t i = 0; i < N; ++i) {
1278 B.SetInsertPoint(BBSubs[i]);
1279 Value *VL =
1280 B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
1281 B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
1282 CI->getType());
1283 Value *VR =
1284 ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));
1285 Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
1286 if (i < N - 1)
1287 B.CreateCondBr(B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)),
1288 BBNE, BBSubs[i + 1]);
1289 else
1290 B.CreateBr(BBNE);
1291
1292 Phi->addIncoming(Sub, BBSubs[i]);
1293 }
1294
1295 CI->replaceAllUsesWith(Phi);
1296 CI->eraseFromParent();
1297
1298 if (DTU) {
1300 Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});
1301 for (uint64_t i = 0; i < N; ++i) {
1302 if (i < N - 1)
1303 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
1304 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
1305 }
1306 Updates.push_back({DominatorTree::Insert, BBNE, BBTail});
1307 Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
1308 DTU->applyUpdates(Updates);
1309 }
1310}
1311
1312/// Convert memchr with a small constant string into a switch
1314 const DataLayout &DL) {
1315 if (isa<Constant>(Call->getArgOperand(1)))
1316 return false;
1317
1318 StringRef Str;
1319 Value *Base = Call->getArgOperand(0);
1320 if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))
1321 return false;
1322
1323 uint64_t N = Str.size();
1324 if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
1325 uint64_t Val = ConstInt->getZExtValue();
1326 // Ignore the case that n is larger than the size of string.
1327 if (Val > N)
1328 return false;
1329 N = Val;
1330 } else
1331 return false;
1332
1334 return false;
1335
1336 BasicBlock *BB = Call->getParent();
1337 BasicBlock *BBNext = SplitBlock(BB, Call, DTU);
1338 IRBuilder<> IRB(BB);
1339 IRB.SetCurrentDebugLocation(Call->getDebugLoc());
1340 IntegerType *ByteTy = IRB.getInt8Ty();
1342 SwitchInst *SI = IRB.CreateSwitch(
1343 IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
1344 Type *IndexTy = DL.getIndexType(Call->getType());
1346
1347 BasicBlock *BBSuccess = BasicBlock::Create(
1348 Call->getContext(), "memchr.success", BB->getParent(), BBNext);
1349 IRB.SetInsertPoint(BBSuccess);
1350 PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");
1351 Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);
1352 IRB.CreateBr(BBNext);
1353 if (DTU)
1354 Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});
1355
1357 for (uint64_t I = 0; I < N; ++I) {
1358 ConstantInt *CaseVal = ConstantInt::get(ByteTy, Str[I]);
1359 if (!Cases.insert(CaseVal).second)
1360 continue;
1361
1362 BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",
1363 BB->getParent(), BBSuccess);
1364 SI->addCase(CaseVal, BBCase);
1365 IRB.SetInsertPoint(BBCase);
1366 IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);
1367 IRB.CreateBr(BBSuccess);
1368 if (DTU) {
1369 Updates.push_back({DominatorTree::Insert, BB, BBCase});
1370 Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});
1371 }
1372 }
1373
1374 PHINode *PHI =
1375 PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());
1376 PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);
1377 PHI->addIncoming(FirstOccursLocation, BBSuccess);
1378
1379 Call->replaceAllUsesWith(PHI);
1380 Call->eraseFromParent();
1381
1382 if (DTU)
1383 DTU->applyUpdates(Updates);
1384
1385 return true;
1386}
1387
1390 DominatorTree &DT, const DataLayout &DL,
1391 bool &MadeCFGChange) {
1392
1393 auto *CI = dyn_cast<CallInst>(&I);
1394 if (!CI || CI->isNoBuiltin())
1395 return false;
1396
1397 Function *CalledFunc = CI->getCalledFunction();
1398 if (!CalledFunc)
1399 return false;
1400
1401 LibFunc LF;
1402 if (!TLI.getLibFunc(*CalledFunc, LF) ||
1403 !isLibFuncEmittable(CI->getModule(), &TLI, LF))
1404 return false;
1405
1406 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
1407
1408 switch (LF) {
1409 case LibFunc_sqrt:
1410 case LibFunc_sqrtf:
1411 case LibFunc_sqrtl:
1412 return foldSqrt(CI, LF, TTI, TLI, AC, DT);
1413 case LibFunc_strcmp:
1414 case LibFunc_strncmp:
1415 if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {
1416 MadeCFGChange = true;
1417 return true;
1418 }
1419 break;
1420 case LibFunc_memchr:
1421 if (foldMemChr(CI, &DTU, DL)) {
1422 MadeCFGChange = true;
1423 return true;
1424 }
1425 break;
1426 default:;
1427 }
1428 return false;
1429}
1430
1431/// This is the entry point for folds that could be implemented in regular
1432/// InstCombine, but they are separated because they are not expected to
1433/// occur frequently and/or have more than a constant-length pattern match.
1437 AssumptionCache &AC, bool &MadeCFGChange) {
1438 bool MadeChange = false;
1439 for (BasicBlock &BB : F) {
1440 // Ignore unreachable basic blocks.
1441 if (!DT.isReachableFromEntry(&BB))
1442 continue;
1443
1444 const DataLayout &DL = F.getDataLayout();
1445
1446 // Walk the block backwards for efficiency. We're matching a chain of
1447 // use->defs, so we're more likely to succeed by starting from the bottom.
1448 // Also, we want to avoid matching partial patterns.
1449 // TODO: It would be more efficient if we removed dead instructions
1450 // iteratively in this loop rather than waiting until the end.
1452 MadeChange |= foldAnyOrAllBitsSet(I);
1453 MadeChange |= foldGuardedFunnelShift(I, DT);
1454 MadeChange |= tryToRecognizePopCount(I);
1455 MadeChange |= tryToFPToSat(I, TTI);
1456 MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
1457 MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
1458 MadeChange |= foldPatternedLoads(I, DL);
1459 MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);
1460 // NOTE: This function introduces erasing of the instruction `I`, so it
1461 // needs to be called at the end of this sequence, otherwise we may make
1462 // bugs.
1463 MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
1464 }
1465
1466 // Do this separately to avoid redundantly scanning stores multiple times.
1467 MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);
1468 }
1469
1470 // We're done with transforms, so remove dead instructions.
1471 if (MadeChange)
1472 for (BasicBlock &BB : F)
1474
1475 return MadeChange;
1476}
1477
1478/// This is the entry point for all transforms. Pass manager differences are
1479/// handled in the callers of this function.
1482 AliasAnalysis &AA, bool &MadeCFGChange) {
1483 bool MadeChange = false;
1484 const DataLayout &DL = F.getDataLayout();
1485 TruncInstCombine TIC(AC, TLI, DL, DT);
1486 MadeChange |= TIC.run(F);
1487 MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);
1488 return MadeChange;
1489}
1490
1493 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1494 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1495 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1496 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1497 auto &AA = AM.getResult<AAManager>(F);
1498 bool MadeCFGChange = false;
1499 if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
1500 // No changes, all analyses are preserved.
1501 return PreservedAnalyses::all();
1502 }
1503 // Mark all the analyses that instcombine updates as preserved.
1505 if (MadeCFGChange)
1507 else
1509 return PA;
1510}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool tryToRecognizePopCount(Instruction &I)
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
static cl::opt< unsigned > MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string to " "inline a memchr call."))
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
static cl::opt< unsigned > StrNCmpInlineThreshold("strncmp-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string for a builtin string cmp " "call eligible for inlining. The default value is 3."))
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, const DataLayout &DL)
Convert memchr with a small constant string into a switch.
static Value * optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder)
Combine away instructions providing they are still equivalent when compared against 0.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL)
static bool mergePartStores(SmallVectorImpl< PartStore > &Parts, const DataLayout &DL, TargetTransformInfo &TTI)
static bool mergeConsecutivePartStores(ArrayRef< PartStore > Parts, unsigned Width, const DataLayout &DL, TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift, const APInt &AndMask, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
static std::optional< PartStore > matchPartStore(Instruction &I, const DataLayout &DL)
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, const DataLayout &DL, bool &MadeCFGChange)
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, AssumptionCache &AC, bool &MadeCFGChange)
This is the entry point for folds that could be implemented in regular InstCombine,...
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:992
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
static MaybeAlign getAlign(Value *Ptr)
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
BinaryOperator * Mul
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2494
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2068
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2041
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isSimple() const
size_type size() const
Definition MapVector.h:56
std::pair< KeyT, ValueT > & front()
Definition MapVector.h:79
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_RecipThroughput
Reciprocal throughput.
@ None
The cast is not used with a load/store of any kind.
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:881
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
Changed
#define UINT64_MAX
Definition DataTypes.h:77
Abstract Attribute helper functions.
Definition Attributor.h:165
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:798
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::LShr > m_LShrOrSelf(const LHS &L, uint64_t &R)
Matches lshr L, ConstShAmt or L itself (R will be set to zero in this case).
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::Shl > m_ShlOrSelf(const LHS &L, uint64_t &R)
Matches shl L, ConstShAmt or L itself (R will be set to zero in this case).
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI bool isOnlyUsedInZeroComparison(const Instruction *CxtI)
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:646
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:721
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
auto reverse(ContainerTy &&C)
Definition STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1652
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Sub
Subtraction of integers.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
LoadInst * RootInsert
ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
bool operator<(const PartStore &Other) const
bool isCompatibleWith(const PartStore &Other) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:760
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:249