LLVM 22.0.0git
InstCombineSimplifyDemanded.cpp
Go to the documentation of this file.
1//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains logic for simplifying instructions based on information
10// about how they are used.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
22
23using namespace llvm;
24using namespace llvm::PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28static cl::opt<bool>
29 VerifyKnownBits("instcombine-verify-known-bits",
30 cl::desc("Verify that computeKnownBits() and "
31 "SimplifyDemandedBits() are consistent"),
32 cl::Hidden, cl::init(false));
33
35 "instcombine-simplify-vector-elts-depth",
37 "Depth limit when simplifying vector instructions and their operands"),
38 cl::Hidden, cl::init(10));
39
40/// Check to see if the specified operand of the specified instruction is a
41/// constant integer. If so, check to see if there are any bits set in the
42/// constant that are not demanded. If so, shrink the constant and return true.
43static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
44 const APInt &Demanded) {
45 assert(I && "No instruction?");
46 assert(OpNo < I->getNumOperands() && "Operand index too large");
47
48 // The operand must be a constant integer or splat integer.
49 Value *Op = I->getOperand(OpNo);
50 const APInt *C;
51 if (!match(Op, m_APInt(C)))
52 return false;
53
54 // If there are no bits set that aren't demanded, nothing to do.
55 if (C->isSubsetOf(Demanded))
56 return false;
57
58 // This instruction is producing bits that are not demanded. Shrink the RHS.
59 I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
60
61 return true;
62}
63
64/// Let N = 2 * M.
65/// Given an N-bit integer representing a pack of two M-bit integers,
66/// we can select one of the packed integers by right-shifting by either
67/// zero or M (which is the most straightforward to check if M is a power
68/// of 2), and then isolating the lower M bits. In this case, we can
69/// represent the shift as a select on whether the shr amount is nonzero.
71 const APInt &DemandedMask,
73 unsigned Depth) {
74 assert(I->getOpcode() == Instruction::LShr &&
75 "Only lshr instruction supported");
76
77 uint64_t ShlAmt;
78 Value *Upper, *Lower;
79 if (!match(I->getOperand(0),
82 m_Value(Lower)))))
83 return nullptr;
84
85 if (!isPowerOf2_64(ShlAmt))
86 return nullptr;
87
88 const uint64_t DemandedBitWidth = DemandedMask.getActiveBits();
89 if (DemandedBitWidth > ShlAmt)
90 return nullptr;
91
92 // Check that upper demanded bits are not lost from lshift.
93 if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth)
94 return nullptr;
95
96 KnownBits KnownLowerBits = IC.computeKnownBits(Lower, I, Depth);
97 if (!KnownLowerBits.getMaxValue().isIntN(ShlAmt))
98 return nullptr;
99
100 Value *ShrAmt = I->getOperand(1);
101 KnownBits KnownShrBits = IC.computeKnownBits(ShrAmt, I, Depth);
102
103 // Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or
104 // zero.
105 if (~KnownShrBits.Zero != ShlAmt)
106 return nullptr;
107
110 Value *ShrAmtZ =
112 ShrAmt->getName() + ".z");
113 // There is no existing !prof metadata we can derive the !prof metadata for
114 // this select.
117 Select->takeName(I);
118 return Select;
119}
120
121/// Returns the bitwidth of the given scalar or pointer type. For vector types,
122/// returns the element type's bitwidth.
123static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
124 if (unsigned BitWidth = Ty->getScalarSizeInBits())
125 return BitWidth;
126
127 return DL.getPointerTypeSizeInBits(Ty);
128}
129
130/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
131/// the instruction has any properties that allow us to simplify its operands.
133 KnownBits &Known) {
134 APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));
135 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
136 SQ.getWithInstruction(&Inst));
137 if (!V) return false;
138 if (V == &Inst) return true;
139 replaceInstUsesWith(Inst, V);
140 return true;
141}
142
143/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
144/// the instruction has any properties that allow us to simplify its operands.
149
150/// This form of SimplifyDemandedBits simplifies the specified instruction
151/// operand if possible, updating it in place. It returns true if it made any
152/// change and false otherwise.
154 const APInt &DemandedMask,
155 KnownBits &Known,
156 const SimplifyQuery &Q,
157 unsigned Depth) {
158 Use &U = I->getOperandUse(OpNo);
159 Value *V = U.get();
160 if (isa<Constant>(V)) {
161 llvm::computeKnownBits(V, Known, Q, Depth);
162 return false;
163 }
164
165 Known.resetAll();
166 if (DemandedMask.isZero()) {
167 // Not demanding any bits from V.
168 replaceUse(U, UndefValue::get(V->getType()));
169 return true;
170 }
171
173 if (!VInst) {
174 llvm::computeKnownBits(V, Known, Q, Depth);
175 return false;
176 }
177
179 return false;
180
181 Value *NewVal;
182 if (VInst->hasOneUse()) {
183 // If the instruction has one use, we can directly simplify it.
184 NewVal = SimplifyDemandedUseBits(VInst, DemandedMask, Known, Q, Depth);
185 } else {
186 // If there are multiple uses of this instruction, then we can simplify
187 // VInst to some other value, but not modify the instruction.
188 NewVal =
189 SimplifyMultipleUseDemandedBits(VInst, DemandedMask, Known, Q, Depth);
190 }
191 if (!NewVal) return false;
192 if (Instruction* OpInst = dyn_cast<Instruction>(U))
193 salvageDebugInfo(*OpInst);
194
195 replaceUse(U, NewVal);
196 return true;
197}
198
199/// This function attempts to replace V with a simpler value based on the
200/// demanded bits. When this function is called, it is known that only the bits
201/// set in DemandedMask of the result of V are ever used downstream.
202/// Consequently, depending on the mask and V, it may be possible to replace V
203/// with a constant or one of its operands. In such cases, this function does
204/// the replacement and returns true. In all other cases, it returns false after
205/// analyzing the expression and setting KnownOne and known to be one in the
206/// expression. Known.Zero contains all the bits that are known to be zero in
207/// the expression. These are provided to potentially allow the caller (which
208/// might recursively be SimplifyDemandedBits itself) to simplify the
209/// expression.
210/// Known.One and Known.Zero always follow the invariant that:
211/// Known.One & Known.Zero == 0.
212/// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero
213/// are accurate even for bits not in DemandedMask. Note
214/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
215/// be the same.
216///
217/// This returns null if it did not change anything and it permits no
218/// simplification. This returns V itself if it did some simplification of V's
219/// operands based on the information about what bits are demanded. This returns
220/// some other non-null value if it found out that V is equal to another value
221/// in the context where the specified bits are demanded, but not for all users.
223 const APInt &DemandedMask,
224 KnownBits &Known,
225 const SimplifyQuery &Q,
226 unsigned Depth) {
227 assert(I != nullptr && "Null pointer of Value???");
228 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
229 uint32_t BitWidth = DemandedMask.getBitWidth();
230 Type *VTy = I->getType();
231 assert(
232 (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
233 Known.getBitWidth() == BitWidth &&
234 "Value *V, DemandedMask and Known must have same BitWidth");
235
236 KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
237
238 // Update flags after simplifying an operand based on the fact that some high
239 // order bits are not demanded.
240 auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I,
241 unsigned NLZ) {
242 if (NLZ > 0) {
243 // Disable the nsw and nuw flags here: We can no longer guarantee that
244 // we won't wrap after simplification. Removing the nsw/nuw flags is
245 // legal here because the top bit is not demanded.
246 I->setHasNoSignedWrap(false);
247 I->setHasNoUnsignedWrap(false);
248 }
249 return I;
250 };
251
252 // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care
253 // about the high bits of the operands.
254 auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {
255 unsigned NLZ = DemandedMask.countl_zero();
256 // Right fill the mask of bits for the operands to demand the most
257 // significant bit and all those below it.
258 DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
259 if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
260 SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Q, Depth + 1) ||
261 ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
262 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1)) {
263 disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
264 return true;
265 }
266 return false;
267 };
268
269 switch (I->getOpcode()) {
270 default:
271 llvm::computeKnownBits(I, Known, Q, Depth);
272 break;
273 case Instruction::And: {
274 // If either the LHS or the RHS are Zero, the result is zero.
275 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
276 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, Q,
277 Depth + 1))
278 return I;
279
280 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
281 Q, Depth);
282
283 // If the client is only demanding bits that we know, return the known
284 // constant.
285 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
286 return Constant::getIntegerValue(VTy, Known.One);
287
288 // If all of the demanded bits are known 1 on one side, return the other.
289 // These bits cannot contribute to the result of the 'and'.
290 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
291 return I->getOperand(0);
292 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
293 return I->getOperand(1);
294
295 // If the RHS is a constant, see if we can simplify it.
296 if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
297 return I;
298
299 break;
300 }
301 case Instruction::Or: {
302 // If either the LHS or the RHS are One, the result is One.
303 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
304 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, Q,
305 Depth + 1)) {
306 // Disjoint flag may not longer hold.
307 I->dropPoisonGeneratingFlags();
308 return I;
309 }
310
311 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
312 Q, Depth);
313
314 // If the client is only demanding bits that we know, return the known
315 // constant.
316 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
317 return Constant::getIntegerValue(VTy, Known.One);
318
319 // If all of the demanded bits are known zero on one side, return the other.
320 // These bits cannot contribute to the result of the 'or'.
321 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
322 return I->getOperand(0);
323 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
324 return I->getOperand(1);
325
326 // If the RHS is a constant, see if we can simplify it.
327 if (ShrinkDemandedConstant(I, 1, DemandedMask))
328 return I;
329
330 // Infer disjoint flag if no common bits are set.
331 if (!cast<PossiblyDisjointInst>(I)->isDisjoint()) {
332 WithCache<const Value *> LHSCache(I->getOperand(0), LHSKnown),
333 RHSCache(I->getOperand(1), RHSKnown);
334 if (haveNoCommonBitsSet(LHSCache, RHSCache, Q)) {
335 cast<PossiblyDisjointInst>(I)->setIsDisjoint(true);
336 return I;
337 }
338 }
339
340 break;
341 }
342 case Instruction::Xor: {
343 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
344 SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Q, Depth + 1))
345 return I;
346 Value *LHS, *RHS;
347 if (DemandedMask == 1 &&
348 match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
349 match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
350 // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
352 Builder.SetInsertPoint(I);
353 auto *Xor = Builder.CreateXor(LHS, RHS);
354 return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);
355 }
356
357 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
358 Q, Depth);
359
360 // If the client is only demanding bits that we know, return the known
361 // constant.
362 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
363 return Constant::getIntegerValue(VTy, Known.One);
364
365 // If all of the demanded bits are known zero on one side, return the other.
366 // These bits cannot contribute to the result of the 'xor'.
367 if (DemandedMask.isSubsetOf(RHSKnown.Zero))
368 return I->getOperand(0);
369 if (DemandedMask.isSubsetOf(LHSKnown.Zero))
370 return I->getOperand(1);
371
372 // If all of the demanded bits are known to be zero on one side or the
373 // other, turn this into an *inclusive* or.
374 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
375 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
376 Instruction *Or =
377 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1));
378 if (DemandedMask.isAllOnes())
379 cast<PossiblyDisjointInst>(Or)->setIsDisjoint(true);
380 Or->takeName(I);
381 return InsertNewInstWith(Or, I->getIterator());
382 }
383
384 // If all of the demanded bits on one side are known, and all of the set
385 // bits on that side are also known to be set on the other side, turn this
386 // into an AND, as we know the bits will be cleared.
387 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
388 if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
389 RHSKnown.One.isSubsetOf(LHSKnown.One)) {
391 ~RHSKnown.One & DemandedMask);
392 Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
393 return InsertNewInstWith(And, I->getIterator());
394 }
395
396 // If the RHS is a constant, see if we can change it. Don't alter a -1
397 // constant because that's a canonical 'not' op, and that is better for
398 // combining, SCEV, and codegen.
399 const APInt *C;
400 if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnes()) {
401 if ((*C | ~DemandedMask).isAllOnes()) {
402 // Force bits to 1 to create a 'not' op.
403 I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
404 return I;
405 }
406 // If we can't turn this into a 'not', try to shrink the constant.
407 if (ShrinkDemandedConstant(I, 1, DemandedMask))
408 return I;
409 }
410
411 // If our LHS is an 'and' and if it has one use, and if any of the bits we
412 // are flipping are known to be set, then the xor is just resetting those
413 // bits to zero. We can just knock out bits from the 'and' and the 'xor',
414 // simplifying both of them.
415 if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) {
416 ConstantInt *AndRHS, *XorRHS;
417 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
418 match(I->getOperand(1), m_ConstantInt(XorRHS)) &&
419 match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) &&
420 (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
421 APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
422
423 Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue());
424 Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
425 InsertNewInstWith(NewAnd, I->getIterator());
426
427 Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue());
428 Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
429 return InsertNewInstWith(NewXor, I->getIterator());
430 }
431 }
432 break;
433 }
434 case Instruction::Select: {
435 if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Q, Depth + 1) ||
436 SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Q, Depth + 1))
437 return I;
438
439 // If the operands are constants, see if we can simplify them.
440 // This is similar to ShrinkDemandedConstant, but for a select we want to
441 // try to keep the selected constants the same as icmp value constants, if
442 // we can. This helps not break apart (or helps put back together)
443 // canonical patterns like min and max.
444 auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,
445 const APInt &DemandedMask) {
446 const APInt *SelC;
447 if (!match(I->getOperand(OpNo), m_APInt(SelC)))
448 return false;
449
450 // Get the constant out of the ICmp, if there is one.
451 // Only try this when exactly 1 operand is a constant (if both operands
452 // are constant, the icmp should eventually simplify). Otherwise, we may
453 // invert the transform that reduces set bits and infinite-loop.
454 Value *X;
455 const APInt *CmpC;
456 if (!match(I->getOperand(0), m_ICmp(m_Value(X), m_APInt(CmpC))) ||
457 isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
458 return ShrinkDemandedConstant(I, OpNo, DemandedMask);
459
460 // If the constant is already the same as the ICmp, leave it as-is.
461 if (*CmpC == *SelC)
462 return false;
463 // If the constants are not already the same, but can be with the demand
464 // mask, use the constant value from the ICmp.
465 if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) {
466 I->setOperand(OpNo, ConstantInt::get(I->getType(), *CmpC));
467 return true;
468 }
469 return ShrinkDemandedConstant(I, OpNo, DemandedMask);
470 };
471 if (CanonicalizeSelectConstant(I, 1, DemandedMask) ||
472 CanonicalizeSelectConstant(I, 2, DemandedMask))
473 return I;
474
475 // Only known if known in both the LHS and RHS.
476 adjustKnownBitsForSelectArm(LHSKnown, I->getOperand(0), I->getOperand(1),
477 /*Invert=*/false, Q, Depth);
478 adjustKnownBitsForSelectArm(RHSKnown, I->getOperand(0), I->getOperand(2),
479 /*Invert=*/true, Q, Depth);
480 Known = LHSKnown.intersectWith(RHSKnown);
481 break;
482 }
483 case Instruction::Trunc: {
484 // If we do not demand the high bits of a right-shifted and truncated value,
485 // then we may be able to truncate it before the shift.
486 Value *X;
487 const APInt *C;
488 if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
489 // The shift amount must be valid (not poison) in the narrow type, and
490 // it must not be greater than the high bits demanded of the result.
491 if (C->ult(VTy->getScalarSizeInBits()) &&
492 C->ule(DemandedMask.countl_zero())) {
493 // trunc (lshr X, C) --> lshr (trunc X), C
495 Builder.SetInsertPoint(I);
496 Value *Trunc = Builder.CreateTrunc(X, VTy);
497 return Builder.CreateLShr(Trunc, C->getZExtValue());
498 }
499 }
500 }
501 [[fallthrough]];
502 case Instruction::ZExt: {
503 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
504
505 APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
506 KnownBits InputKnown(SrcBitWidth);
507 if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Q,
508 Depth + 1)) {
509 // For zext nneg, we may have dropped the instruction which made the
510 // input non-negative.
511 I->dropPoisonGeneratingFlags();
512 return I;
513 }
514 assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
515 if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() &&
516 !InputKnown.isNegative())
517 InputKnown.makeNonNegative();
518 Known = InputKnown.zextOrTrunc(BitWidth);
519
520 break;
521 }
522 case Instruction::SExt: {
523 // Compute the bits in the result that are not present in the input.
524 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
525
526 APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
527
528 // If any of the sign extended bits are demanded, we know that the sign
529 // bit is demanded.
530 if (DemandedMask.getActiveBits() > SrcBitWidth)
531 InputDemandedBits.setBit(SrcBitWidth-1);
532
533 KnownBits InputKnown(SrcBitWidth);
534 if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Q, Depth + 1))
535 return I;
536
537 // If the input sign bit is known zero, or if the NewBits are not demanded
538 // convert this into a zero extension.
539 if (InputKnown.isNonNegative() ||
540 DemandedMask.getActiveBits() <= SrcBitWidth) {
541 // Convert to ZExt cast.
542 CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy);
543 NewCast->takeName(I);
544 return InsertNewInstWith(NewCast, I->getIterator());
545 }
546
547 // If the sign bit of the input is known set or clear, then we know the
548 // top bits of the result.
549 Known = InputKnown.sext(BitWidth);
550 break;
551 }
552 case Instruction::Add: {
553 if ((DemandedMask & 1) == 0) {
554 // If we do not need the low bit, try to convert bool math to logic:
555 // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
556 Value *X, *Y;
558 m_OneUse(m_SExt(m_Value(Y))))) &&
559 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
560 // Truth table for inputs and output signbits:
561 // X:0 | X:1
562 // ----------
563 // Y:0 | 0 | 0 |
564 // Y:1 | -1 | 0 |
565 // ----------
567 Builder.SetInsertPoint(I);
568 Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);
569 return Builder.CreateSExt(AndNot, VTy);
570 }
571
572 // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN
573 if (match(I, m_Add(m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
574 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
575 (I->getOperand(0)->hasOneUse() || I->getOperand(1)->hasOneUse())) {
576
577 // Truth table for inputs and output signbits:
578 // X:0 | X:1
579 // -----------
580 // Y:0 | -1 | -1 |
581 // Y:1 | -1 | 0 |
582 // -----------
584 Builder.SetInsertPoint(I);
585 Value *Or = Builder.CreateOr(X, Y);
586 return Builder.CreateSExt(Or, VTy);
587 }
588 }
589
590 // Right fill the mask of bits for the operands to demand the most
591 // significant bit and all those below it.
592 unsigned NLZ = DemandedMask.countl_zero();
593 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
594 if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
595 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1))
596 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
597
598 // If low order bits are not demanded and known to be zero in one operand,
599 // then we don't need to demand them from the other operand, since they
600 // can't cause overflow into any bits that are demanded in the result.
601 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
602 APInt DemandedFromLHS = DemandedFromOps;
603 DemandedFromLHS.clearLowBits(NTZ);
604 if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
605 SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Q, Depth + 1))
606 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
607
608 // If we are known to be adding zeros to every bit below
609 // the highest demanded bit, we just return the other side.
610 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
611 return I->getOperand(0);
612 if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
613 return I->getOperand(1);
614
615 // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask
616 {
617 const APInt *C;
618 if (match(I->getOperand(1), m_APInt(C)) &&
619 C->isOneBitSet(DemandedMask.getActiveBits() - 1)) {
621 Builder.SetInsertPoint(I);
622 return Builder.CreateXor(I->getOperand(0), ConstantInt::get(VTy, *C));
623 }
624 }
625
626 // Otherwise just compute the known bits of the result.
627 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
628 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
629 Known = KnownBits::add(LHSKnown, RHSKnown, NSW, NUW);
630 break;
631 }
632 case Instruction::Sub: {
633 // Right fill the mask of bits for the operands to demand the most
634 // significant bit and all those below it.
635 unsigned NLZ = DemandedMask.countl_zero();
636 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
637 if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
638 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1))
639 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
640
641 // If low order bits are not demanded and are known to be zero in RHS,
642 // then we don't need to demand them from LHS, since they can't cause a
643 // borrow from any bits that are demanded in the result.
644 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
645 APInt DemandedFromLHS = DemandedFromOps;
646 DemandedFromLHS.clearLowBits(NTZ);
647 if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
648 SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Q, Depth + 1))
649 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
650
651 // If we are known to be subtracting zeros from every bit below
652 // the highest demanded bit, we just return the other side.
653 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
654 return I->getOperand(0);
655 // We can't do this with the LHS for subtraction, unless we are only
656 // demanding the LSB.
657 if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(LHSKnown.Zero))
658 return I->getOperand(1);
659
660 // Canonicalize sub mask, X -> ~X
661 const APInt *LHSC;
662 if (match(I->getOperand(0), m_LowBitMask(LHSC)) &&
663 DemandedFromOps.isSubsetOf(*LHSC)) {
665 Builder.SetInsertPoint(I);
666 return Builder.CreateNot(I->getOperand(1));
667 }
668
669 // Otherwise just compute the known bits of the result.
670 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
671 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
672 Known = KnownBits::sub(LHSKnown, RHSKnown, NSW, NUW);
673 break;
674 }
675 case Instruction::Mul: {
676 APInt DemandedFromOps;
677 if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))
678 return I;
679
680 if (DemandedMask.isPowerOf2()) {
681 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
682 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
683 // odd (has LSB set), then the left-shifted low bit of X is the answer.
684 unsigned CTZ = DemandedMask.countr_zero();
685 const APInt *C;
686 if (match(I->getOperand(1), m_APInt(C)) && C->countr_zero() == CTZ) {
687 Constant *ShiftC = ConstantInt::get(VTy, CTZ);
688 Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);
689 return InsertNewInstWith(Shl, I->getIterator());
690 }
691 }
692 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
693 // X * X is odd iff X is odd.
694 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
695 if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) {
696 Constant *One = ConstantInt::get(VTy, 1);
697 Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One);
698 return InsertNewInstWith(And1, I->getIterator());
699 }
700
701 llvm::computeKnownBits(I, Known, Q, Depth);
702 break;
703 }
704 case Instruction::Shl: {
705 const APInt *SA;
706 if (match(I->getOperand(1), m_APInt(SA))) {
707 const APInt *ShrAmt;
708 if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt))))
709 if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0)))
710 if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA,
711 DemandedMask, Known))
712 return R;
713
714 // Do not simplify if shl is part of funnel-shift pattern
715 if (I->hasOneUse()) {
716 auto *Inst = dyn_cast<Instruction>(I->user_back());
717 if (Inst && Inst->getOpcode() == BinaryOperator::Or) {
718 if (auto Opt = convertOrOfShiftsToFunnelShift(*Inst)) {
719 auto [IID, FShiftArgs] = *Opt;
720 if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
721 FShiftArgs[0] == FShiftArgs[1]) {
722 llvm::computeKnownBits(I, Known, Q, Depth);
723 break;
724 }
725 }
726 }
727 }
728
729 // We only want bits that already match the signbit then we don't
730 // need to shift.
731 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth - 1);
732 if (DemandedMask.countr_zero() >= ShiftAmt) {
733 if (I->hasNoSignedWrap()) {
734 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
735 unsigned SignBits =
736 ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
737 if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits)
738 return I->getOperand(0);
739 }
740
741 // If we can pre-shift a right-shifted constant to the left without
742 // losing any high bits and we don't demand the low bits, then eliminate
743 // the left-shift:
744 // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X
745 Value *X;
746 Constant *C;
747 if (match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) {
748 Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
749 Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C,
750 LeftShiftAmtC, DL);
751 if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC,
752 LeftShiftAmtC, DL) == C) {
753 Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X);
754 return InsertNewInstWith(Lshr, I->getIterator());
755 }
756 }
757 }
758
759 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
760
761 // If the shift is NUW/NSW, then it does demand the high bits.
763 if (IOp->hasNoSignedWrap())
764 DemandedMaskIn.setHighBits(ShiftAmt+1);
765 else if (IOp->hasNoUnsignedWrap())
766 DemandedMaskIn.setHighBits(ShiftAmt);
767
768 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1))
769 return I;
770
771 Known = KnownBits::shl(Known,
773 /* NUW */ IOp->hasNoUnsignedWrap(),
774 /* NSW */ IOp->hasNoSignedWrap());
775 } else {
776 // This is a variable shift, so we can't shift the demand mask by a known
777 // amount. But if we are not demanding high bits, then we are not
778 // demanding those bits from the pre-shifted operand either.
779 if (unsigned CTLZ = DemandedMask.countl_zero()) {
780 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
781 if (SimplifyDemandedBits(I, 0, DemandedFromOp, Known, Q, Depth + 1)) {
782 // We can't guarantee that nsw/nuw hold after simplifying the operand.
783 I->dropPoisonGeneratingFlags();
784 return I;
785 }
786 }
787 llvm::computeKnownBits(I, Known, Q, Depth);
788 }
789 break;
790 }
791 case Instruction::LShr: {
792 const APInt *SA;
793 if (match(I->getOperand(1), m_APInt(SA))) {
794 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
795
796 // Do not simplify if lshr is part of funnel-shift pattern
797 if (I->hasOneUse()) {
798 auto *Inst = dyn_cast<Instruction>(I->user_back());
799 if (Inst && Inst->getOpcode() == BinaryOperator::Or) {
800 if (auto Opt = convertOrOfShiftsToFunnelShift(*Inst)) {
801 auto [IID, FShiftArgs] = *Opt;
802 if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
803 FShiftArgs[0] == FShiftArgs[1]) {
804 llvm::computeKnownBits(I, Known, Q, Depth);
805 break;
806 }
807 }
808 }
809 }
810
811 // If we are just demanding the shifted sign bit and below, then this can
812 // be treated as an ASHR in disguise.
813 if (DemandedMask.countl_zero() >= ShiftAmt) {
814 // If we only want bits that already match the signbit then we don't
815 // need to shift.
816 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
817 unsigned SignBits =
818 ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
819 if (SignBits >= NumHiDemandedBits)
820 return I->getOperand(0);
821
822 // If we can pre-shift a left-shifted constant to the right without
823 // losing any low bits (we already know we don't demand the high bits),
824 // then eliminate the right-shift:
825 // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
826 Value *X;
827 Constant *C;
828 if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {
829 Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
830 Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::LShr, C,
831 RightShiftAmtC, DL);
832 if (ConstantFoldBinaryOpOperands(Instruction::Shl, NewC,
833 RightShiftAmtC, DL) == C) {
834 Instruction *Shl = BinaryOperator::CreateShl(NewC, X);
835 return InsertNewInstWith(Shl, I->getIterator());
836 }
837 }
838
839 const APInt *Factor;
840 if (match(I->getOperand(0),
841 m_OneUse(m_Mul(m_Value(X), m_APInt(Factor)))) &&
842 Factor->countr_zero() >= ShiftAmt) {
843 BinaryOperator *Mul = BinaryOperator::CreateMul(
844 X, ConstantInt::get(X->getType(), Factor->lshr(ShiftAmt)));
845 return InsertNewInstWith(Mul, I->getIterator());
846 }
847 }
848
849 // Unsigned shift right.
850 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
851 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1)) {
852 // exact flag may not longer hold.
853 I->dropPoisonGeneratingFlags();
854 return I;
855 }
856 Known >>= ShiftAmt;
857 if (ShiftAmt)
858 Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
859 break;
860 }
861 if (Value *V =
862 simplifyShiftSelectingPackedElement(I, DemandedMask, *this, Depth))
863 return V;
864
865 llvm::computeKnownBits(I, Known, Q, Depth);
866 break;
867 }
868 case Instruction::AShr: {
869 unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
870
871 // If we only want bits that already match the signbit then we don't need
872 // to shift.
873 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
874 if (SignBits >= NumHiDemandedBits)
875 return I->getOperand(0);
876
877 // If this is an arithmetic shift right and only the low-bit is set, we can
878 // always convert this into a logical shr, even if the shift amount is
879 // variable. The low bit of the shift cannot be an input sign bit unless
880 // the shift amount is >= the size of the datatype, which is undefined.
881 if (DemandedMask.isOne()) {
882 // Perform the logical shift right.
883 Instruction *NewVal = BinaryOperator::CreateLShr(
884 I->getOperand(0), I->getOperand(1), I->getName());
885 return InsertNewInstWith(NewVal, I->getIterator());
886 }
887
888 const APInt *SA;
889 if (match(I->getOperand(1), m_APInt(SA))) {
890 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
891
892 // Signed shift right.
893 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
894 // If any of the bits being shifted in are demanded, then we should set
895 // the sign bit as demanded.
896 bool ShiftedInBitsDemanded = DemandedMask.countl_zero() < ShiftAmt;
897 if (ShiftedInBitsDemanded)
898 DemandedMaskIn.setSignBit();
899 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1)) {
900 // exact flag may not longer hold.
901 I->dropPoisonGeneratingFlags();
902 return I;
903 }
904
905 // If the input sign bit is known to be zero, or if none of the shifted in
906 // bits are demanded, turn this into an unsigned shift right.
907 if (Known.Zero[BitWidth - 1] || !ShiftedInBitsDemanded) {
908 BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
909 I->getOperand(1));
910 LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
911 LShr->takeName(I);
912 return InsertNewInstWith(LShr, I->getIterator());
913 }
914
915 Known = KnownBits::ashr(
916 Known, KnownBits::makeConstant(APInt(BitWidth, ShiftAmt)),
917 ShiftAmt != 0, I->isExact());
918 } else {
919 llvm::computeKnownBits(I, Known, Q, Depth);
920 }
921 break;
922 }
923 case Instruction::UDiv: {
924 // UDiv doesn't demand low bits that are zero in the divisor.
925 const APInt *SA;
926 if (match(I->getOperand(1), m_APInt(SA))) {
927 // TODO: Take the demanded mask of the result into account.
928 unsigned RHSTrailingZeros = SA->countr_zero();
929 APInt DemandedMaskIn =
930 APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros);
931 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Q, Depth + 1)) {
932 // We can't guarantee that "exact" is still true after changing the
933 // the dividend.
934 I->dropPoisonGeneratingFlags();
935 return I;
936 }
937
938 Known = KnownBits::udiv(LHSKnown, KnownBits::makeConstant(*SA),
939 cast<BinaryOperator>(I)->isExact());
940 } else {
941 llvm::computeKnownBits(I, Known, Q, Depth);
942 }
943 break;
944 }
945 case Instruction::SRem: {
946 const APInt *Rem;
947 if (match(I->getOperand(1), m_APInt(Rem)) && Rem->isPowerOf2()) {
948 if (DemandedMask.ult(*Rem)) // srem won't affect demanded bits
949 return I->getOperand(0);
950
951 APInt LowBits = *Rem - 1;
952 APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
953 if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Q, Depth + 1))
954 return I;
955 Known = KnownBits::srem(LHSKnown, KnownBits::makeConstant(*Rem));
956 break;
957 }
958
959 llvm::computeKnownBits(I, Known, Q, Depth);
960 break;
961 }
962 case Instruction::Call: {
963 bool KnownBitsComputed = false;
965 switch (II->getIntrinsicID()) {
966 case Intrinsic::abs: {
967 if (DemandedMask == 1)
968 return II->getArgOperand(0);
969 break;
970 }
971 case Intrinsic::ctpop: {
972 // Checking if the number of clear bits is odd (parity)? If the type has
973 // an even number of bits, that's the same as checking if the number of
974 // set bits is odd, so we can eliminate the 'not' op.
975 Value *X;
976 if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 &&
977 match(II->getArgOperand(0), m_Not(m_Value(X)))) {
979 II->getModule(), Intrinsic::ctpop, VTy);
980 return InsertNewInstWith(CallInst::Create(Ctpop, {X}), I->getIterator());
981 }
982 break;
983 }
984 case Intrinsic::bswap: {
985 // If the only bits demanded come from one byte of the bswap result,
986 // just shift the input byte into position to eliminate the bswap.
987 unsigned NLZ = DemandedMask.countl_zero();
988 unsigned NTZ = DemandedMask.countr_zero();
989
990 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
991 // we need all the bits down to bit 8. Likewise, round NLZ. If we
992 // have 14 leading zeros, round to 8.
993 NLZ = alignDown(NLZ, 8);
994 NTZ = alignDown(NTZ, 8);
995 // If we need exactly one byte, we can do this transformation.
996 if (BitWidth - NLZ - NTZ == 8) {
997 // Replace this with either a left or right shift to get the byte into
998 // the right place.
999 Instruction *NewVal;
1000 if (NLZ > NTZ)
1001 NewVal = BinaryOperator::CreateLShr(
1002 II->getArgOperand(0), ConstantInt::get(VTy, NLZ - NTZ));
1003 else
1004 NewVal = BinaryOperator::CreateShl(
1005 II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ));
1006 NewVal->takeName(I);
1007 return InsertNewInstWith(NewVal, I->getIterator());
1008 }
1009 break;
1010 }
1011 case Intrinsic::ptrmask: {
1012 unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();
1013 RHSKnown = KnownBits(MaskWidth);
1014 // If either the LHS or the RHS are Zero, the result is zero.
1015 if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Q, Depth + 1) ||
1017 I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),
1018 RHSKnown, Q, Depth + 1))
1019 return I;
1020
1021 // TODO: Should be 1-extend
1022 RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);
1023
1024 Known = LHSKnown & RHSKnown;
1025 KnownBitsComputed = true;
1026
1027 // If the client is only demanding bits we know to be zero, return
1028 // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
1029 // provenance, but making the mask zero will be easily optimizable in
1030 // the backend.
1031 if (DemandedMask.isSubsetOf(Known.Zero) &&
1032 !match(I->getOperand(1), m_Zero()))
1033 return replaceOperand(
1034 *I, 1, Constant::getNullValue(I->getOperand(1)->getType()));
1035
1036 // Mask in demanded space does nothing.
1037 // NOTE: We may have attributes associated with the return value of the
1038 // llvm.ptrmask intrinsic that will be lost when we just return the
1039 // operand. We should try to preserve them.
1040 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
1041 return I->getOperand(0);
1042
1043 // If the RHS is a constant, see if we can simplify it.
1045 I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))
1046 return I;
1047
1048 // Combine:
1049 // (ptrmask (getelementptr i8, ptr p, imm i), imm mask)
1050 // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask)
1051 // where only the low bits known to be zero in the pointer are changed
1052 Value *InnerPtr;
1053 uint64_t GEPIndex;
1054 uint64_t PtrMaskImmediate;
1056 m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)),
1057 m_ConstantInt(PtrMaskImmediate)))) {
1058
1059 LHSKnown = computeKnownBits(InnerPtr, I, Depth + 1);
1060 if (!LHSKnown.isZero()) {
1061 const unsigned trailingZeros = LHSKnown.countMinTrailingZeros();
1062 uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1;
1063
1064 uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits;
1065 uint64_t MaskedLowBitsGEPIndex =
1066 GEPIndex & PointerAlignBits & PtrMaskImmediate;
1067
1068 uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;
1069
1070 if (MaskedGEPIndex != GEPIndex) {
1071 auto *GEP = cast<GEPOperator>(II->getArgOperand(0));
1072 Builder.SetInsertPoint(I);
1073 Type *GEPIndexType =
1074 DL.getIndexType(GEP->getPointerOperand()->getType());
1075 Value *MaskedGEP = Builder.CreateGEP(
1076 GEP->getSourceElementType(), InnerPtr,
1077 ConstantInt::get(GEPIndexType, MaskedGEPIndex),
1078 GEP->getName(), GEP->isInBounds());
1079
1080 replaceOperand(*I, 0, MaskedGEP);
1081 return I;
1082 }
1083 }
1084 }
1085
1086 break;
1087 }
1088
1089 case Intrinsic::fshr:
1090 case Intrinsic::fshl: {
1091 const APInt *SA;
1092 if (!match(I->getOperand(2), m_APInt(SA)))
1093 break;
1094
1095 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
1096 // defined, so no need to special-case zero shifts here.
1097 uint64_t ShiftAmt = SA->urem(BitWidth);
1098 if (II->getIntrinsicID() == Intrinsic::fshr)
1099 ShiftAmt = BitWidth - ShiftAmt;
1100
1101 APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
1102 APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
1103 if (I->getOperand(0) != I->getOperand(1)) {
1104 if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Q,
1105 Depth + 1) ||
1106 SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Q,
1107 Depth + 1)) {
1108 // Range attribute or metadata may no longer hold.
1109 I->dropPoisonGeneratingAnnotations();
1110 return I;
1111 }
1112 } else { // fshl is a rotate
1113 // Avoid converting rotate into funnel shift.
1114 // Only simplify if one operand is constant.
1115 LHSKnown = computeKnownBits(I->getOperand(0), I, Depth + 1);
1116 if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One) &&
1117 !match(I->getOperand(0), m_SpecificInt(LHSKnown.One))) {
1118 replaceOperand(*I, 0, Constant::getIntegerValue(VTy, LHSKnown.One));
1119 return I;
1120 }
1121
1122 RHSKnown = computeKnownBits(I->getOperand(1), I, Depth + 1);
1123 if (DemandedMaskRHS.isSubsetOf(RHSKnown.Zero | RHSKnown.One) &&
1124 !match(I->getOperand(1), m_SpecificInt(RHSKnown.One))) {
1125 replaceOperand(*I, 1, Constant::getIntegerValue(VTy, RHSKnown.One));
1126 return I;
1127 }
1128 }
1129
1130 LHSKnown <<= ShiftAmt;
1131 RHSKnown >>= BitWidth - ShiftAmt;
1132 Known = LHSKnown.unionWith(RHSKnown);
1133 KnownBitsComputed = true;
1134 break;
1135 }
1136 case Intrinsic::umax: {
1137 // UMax(A, C) == A if ...
1138 // The lowest non-zero bit of DemandMask is higher than the highest
1139 // non-zero bit of C.
1140 const APInt *C;
1141 unsigned CTZ = DemandedMask.countr_zero();
1142 if (match(II->getArgOperand(1), m_APInt(C)) &&
1143 CTZ >= C->getActiveBits())
1144 return II->getArgOperand(0);
1145 break;
1146 }
1147 case Intrinsic::umin: {
1148 // UMin(A, C) == A if ...
1149 // The lowest non-zero bit of DemandMask is higher than the highest
1150 // non-one bit of C.
1151 // This comes from using DeMorgans on the above umax example.
1152 const APInt *C;
1153 unsigned CTZ = DemandedMask.countr_zero();
1154 if (match(II->getArgOperand(1), m_APInt(C)) &&
1155 CTZ >= C->getBitWidth() - C->countl_one())
1156 return II->getArgOperand(0);
1157 break;
1158 }
1159 default: {
1160 // Handle target specific intrinsics
1161 std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
1162 *II, DemandedMask, Known, KnownBitsComputed);
1163 if (V)
1164 return *V;
1165 break;
1166 }
1167 }
1168 }
1169
1170 if (!KnownBitsComputed)
1171 llvm::computeKnownBits(I, Known, Q, Depth);
1172 break;
1173 }
1174 }
1175
1176 if (I->getType()->isPointerTy()) {
1177 Align Alignment = I->getPointerAlignment(DL);
1178 Known.Zero.setLowBits(Log2(Alignment));
1179 }
1180
1181 // If the client is only demanding bits that we know, return the known
1182 // constant. We can't directly simplify pointers as a constant because of
1183 // pointer provenance.
1184 // TODO: We could return `(inttoptr const)` for pointers.
1185 if (!I->getType()->isPointerTy() &&
1186 DemandedMask.isSubsetOf(Known.Zero | Known.One))
1187 return Constant::getIntegerValue(VTy, Known.One);
1188
1189 if (VerifyKnownBits) {
1190 KnownBits ReferenceKnown = llvm::computeKnownBits(I, Q, Depth);
1191 if (Known != ReferenceKnown) {
1192 errs() << "Mismatched known bits for " << *I << " in "
1193 << I->getFunction()->getName() << "\n";
1194 errs() << "computeKnownBits(): " << ReferenceKnown << "\n";
1195 errs() << "SimplifyDemandedBits(): " << Known << "\n";
1196 std::abort();
1197 }
1198 }
1199
1200 return nullptr;
1201}
1202
1203/// Helper routine of SimplifyDemandedUseBits. It computes Known
1204/// bits. It also tries to handle simplifications that can be done based on
1205/// DemandedMask, but without modifying the Instruction.
1207 Instruction *I, const APInt &DemandedMask, KnownBits &Known,
1208 const SimplifyQuery &Q, unsigned Depth) {
1209 unsigned BitWidth = DemandedMask.getBitWidth();
1210 Type *ITy = I->getType();
1211
1212 KnownBits LHSKnown(BitWidth);
1213 KnownBits RHSKnown(BitWidth);
1214
1215 // Despite the fact that we can't simplify this instruction in all User's
1216 // context, we can at least compute the known bits, and we can
1217 // do simplifications that apply to *just* the one user if we know that
1218 // this instruction has a simpler value in that context.
1219 switch (I->getOpcode()) {
1220 case Instruction::And: {
1221 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1222 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1223 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1224 Q, Depth);
1226
1227 // If the client is only demanding bits that we know, return the known
1228 // constant.
1229 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1230 return Constant::getIntegerValue(ITy, Known.One);
1231
1232 // If all of the demanded bits are known 1 on one side, return the other.
1233 // These bits cannot contribute to the result of the 'and' in this context.
1234 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
1235 return I->getOperand(0);
1236 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
1237 return I->getOperand(1);
1238
1239 break;
1240 }
1241 case Instruction::Or: {
1242 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1243 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1244 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1245 Q, Depth);
1247
1248 // If the client is only demanding bits that we know, return the known
1249 // constant.
1250 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1251 return Constant::getIntegerValue(ITy, Known.One);
1252
1253 // We can simplify (X|Y) -> X or Y in the user's context if we know that
1254 // only bits from X or Y are demanded.
1255 // If all of the demanded bits are known zero on one side, return the other.
1256 // These bits cannot contribute to the result of the 'or' in this context.
1257 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
1258 return I->getOperand(0);
1259 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
1260 return I->getOperand(1);
1261
1262 break;
1263 }
1264 case Instruction::Xor: {
1265 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1266 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1267 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1268 Q, Depth);
1270
1271 // If the client is only demanding bits that we know, return the known
1272 // constant.
1273 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1274 return Constant::getIntegerValue(ITy, Known.One);
1275
1276 // We can simplify (X^Y) -> X or Y in the user's context if we know that
1277 // only bits from X or Y are demanded.
1278 // If all of the demanded bits are known zero on one side, return the other.
1279 if (DemandedMask.isSubsetOf(RHSKnown.Zero))
1280 return I->getOperand(0);
1281 if (DemandedMask.isSubsetOf(LHSKnown.Zero))
1282 return I->getOperand(1);
1283
1284 break;
1285 }
1286 case Instruction::Add: {
1287 unsigned NLZ = DemandedMask.countl_zero();
1288 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
1289
1290 // If an operand adds zeros to every bit below the highest demanded bit,
1291 // that operand doesn't change the result. Return the other side.
1292 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1293 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
1294 return I->getOperand(0);
1295
1296 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1297 if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
1298 return I->getOperand(1);
1299
1300 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1301 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
1302 Known = KnownBits::add(LHSKnown, RHSKnown, NSW, NUW);
1304 break;
1305 }
1306 case Instruction::Sub: {
1307 unsigned NLZ = DemandedMask.countl_zero();
1308 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
1309
1310 // If an operand subtracts zeros from every bit below the highest demanded
1311 // bit, that operand doesn't change the result. Return the other side.
1312 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1313 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
1314 return I->getOperand(0);
1315
1316 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1317 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
1318 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1319 Known = KnownBits::sub(LHSKnown, RHSKnown, NSW, NUW);
1321 break;
1322 }
1323 case Instruction::AShr: {
1324 // Compute the Known bits to simplify things downstream.
1325 llvm::computeKnownBits(I, Known, Q, Depth);
1326
1327 // If this user is only demanding bits that we know, return the known
1328 // constant.
1329 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1330 return Constant::getIntegerValue(ITy, Known.One);
1331
1332 // If the right shift operand 0 is a result of a left shift by the same
1333 // amount, this is probably a zero/sign extension, which may be unnecessary,
1334 // if we do not demand any of the new sign bits. So, return the original
1335 // operand instead.
1336 const APInt *ShiftRC;
1337 const APInt *ShiftLC;
1338 Value *X;
1339 unsigned BitWidth = DemandedMask.getBitWidth();
1340 if (match(I,
1341 m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) &&
1342 ShiftLC == ShiftRC && ShiftLC->ult(BitWidth) &&
1343 DemandedMask.isSubsetOf(APInt::getLowBitsSet(
1344 BitWidth, BitWidth - ShiftRC->getZExtValue()))) {
1345 return X;
1346 }
1347
1348 break;
1349 }
1350 default:
1351 // Compute the Known bits to simplify things downstream.
1352 llvm::computeKnownBits(I, Known, Q, Depth);
1353
1354 // If this user is only demanding bits that we know, return the known
1355 // constant.
1356 if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
1357 return Constant::getIntegerValue(ITy, Known.One);
1358
1359 break;
1360 }
1361
1362 return nullptr;
1363}
1364
1365/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
1366/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
1367/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
1368/// of "C2-C1".
1369///
1370/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
1371/// ..., bn}, without considering the specific value X is holding.
1372/// This transformation is legal iff one of following conditions is hold:
1373/// 1) All the bit in S are 0, in this case E1 == E2.
1374/// 2) We don't care those bits in S, per the input DemandedMask.
1375/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
1376/// rest bits.
1377///
1378/// Currently we only test condition 2).
1379///
1380/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
1381/// not successful.
1383 Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
1384 const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {
1385 if (!ShlOp1 || !ShrOp1)
1386 return nullptr; // No-op.
1387
1388 Value *VarX = Shr->getOperand(0);
1389 Type *Ty = VarX->getType();
1390 unsigned BitWidth = Ty->getScalarSizeInBits();
1391 if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
1392 return nullptr; // Undef.
1393
1394 unsigned ShlAmt = ShlOp1.getZExtValue();
1395 unsigned ShrAmt = ShrOp1.getZExtValue();
1396
1397 Known.One.clearAllBits();
1398 Known.Zero.setLowBits(ShlAmt - 1);
1399 Known.Zero &= DemandedMask;
1400
1401 APInt BitMask1(APInt::getAllOnes(BitWidth));
1402 APInt BitMask2(APInt::getAllOnes(BitWidth));
1403
1404 bool isLshr = (Shr->getOpcode() == Instruction::LShr);
1405 BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
1406 (BitMask1.ashr(ShrAmt) << ShlAmt);
1407
1408 if (ShrAmt <= ShlAmt) {
1409 BitMask2 <<= (ShlAmt - ShrAmt);
1410 } else {
1411 BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
1412 BitMask2.ashr(ShrAmt - ShlAmt);
1413 }
1414
1415 // Check if condition-2 (see the comment to this function) is satified.
1416 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
1417 if (ShrAmt == ShlAmt)
1418 return VarX;
1419
1420 if (!Shr->hasOneUse())
1421 return nullptr;
1422
1423 BinaryOperator *New;
1424 if (ShrAmt < ShlAmt) {
1425 Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
1426 New = BinaryOperator::CreateShl(VarX, Amt);
1428 New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
1429 New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
1430 } else {
1431 Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
1432 New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
1433 BinaryOperator::CreateAShr(VarX, Amt);
1434 if (cast<BinaryOperator>(Shr)->isExact())
1435 New->setIsExact(true);
1436 }
1437
1438 return InsertNewInstWith(New, Shl->getIterator());
1439 }
1440
1441 return nullptr;
1442}
1443
1444/// The specified value produces a vector with any number of elements.
1445/// This method analyzes which elements of the operand are poison and
1446/// returns that information in PoisonElts.
1447///
1448/// DemandedElts contains the set of elements that are actually used by the
1449/// caller, and by default (AllowMultipleUsers equals false) the value is
1450/// simplified only if it has a single caller. If AllowMultipleUsers is set
1451/// to true, DemandedElts refers to the union of sets of elements that are
1452/// used by all callers.
1453///
1454/// If the information about demanded elements can be used to simplify the
1455/// operation, the operation is simplified, then the resultant value is
1456/// returned. This returns null if no change was made.
1458 APInt DemandedElts,
1459 APInt &PoisonElts,
1460 unsigned Depth,
1461 bool AllowMultipleUsers) {
1462 // Cannot analyze scalable type. The number of vector elements is not a
1463 // compile-time constant.
1464 if (isa<ScalableVectorType>(V->getType()))
1465 return nullptr;
1466
1467 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
1468 APInt EltMask(APInt::getAllOnes(VWidth));
1469 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
1470
1471 if (match(V, m_Poison())) {
1472 // If the entire vector is poison, just return this info.
1473 PoisonElts = EltMask;
1474 return nullptr;
1475 }
1476
1477 if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
1478 PoisonElts = EltMask;
1479 return PoisonValue::get(V->getType());
1480 }
1481
1482 PoisonElts = 0;
1483
1484 if (auto *C = dyn_cast<Constant>(V)) {
1485 // Check if this is identity. If so, return 0 since we are not simplifying
1486 // anything.
1487 if (DemandedElts.isAllOnes())
1488 return nullptr;
1489
1490 Type *EltTy = cast<VectorType>(V->getType())->getElementType();
1493 for (unsigned i = 0; i != VWidth; ++i) {
1494 if (!DemandedElts[i]) { // If not demanded, set to poison.
1495 Elts.push_back(Poison);
1496 PoisonElts.setBit(i);
1497 continue;
1498 }
1499
1500 Constant *Elt = C->getAggregateElement(i);
1501 if (!Elt) return nullptr;
1502
1503 Elts.push_back(Elt);
1504 if (isa<PoisonValue>(Elt)) // Already poison.
1505 PoisonElts.setBit(i);
1506 }
1507
1508 // If we changed the constant, return it.
1509 Constant *NewCV = ConstantVector::get(Elts);
1510 return NewCV != C ? NewCV : nullptr;
1511 }
1512
1513 // Limit search depth.
1515 return nullptr;
1516
1517 if (!AllowMultipleUsers) {
1518 // If multiple users are using the root value, proceed with
1519 // simplification conservatively assuming that all elements
1520 // are needed.
1521 if (!V->hasOneUse()) {
1522 // Quit if we find multiple users of a non-root value though.
1523 // They'll be handled when it's their turn to be visited by
1524 // the main instcombine process.
1525 if (Depth != 0)
1526 // TODO: Just compute the PoisonElts information recursively.
1527 return nullptr;
1528
1529 // Conservatively assume that all elements are needed.
1530 DemandedElts = EltMask;
1531 }
1532 }
1533
1535 if (!I) return nullptr; // Only analyze instructions.
1536
1537 bool MadeChange = false;
1538 auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
1539 APInt Demanded, APInt &Undef) {
1540 auto *II = dyn_cast<IntrinsicInst>(Inst);
1541 Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
1542 if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
1543 replaceOperand(*Inst, OpNum, V);
1544 MadeChange = true;
1545 }
1546 };
1547
1548 APInt PoisonElts2(VWidth, 0);
1549 APInt PoisonElts3(VWidth, 0);
1550 switch (I->getOpcode()) {
1551 default: break;
1552
1553 case Instruction::GetElementPtr: {
1554 // The LangRef requires that struct geps have all constant indices. As
1555 // such, we can't convert any operand to partial undef.
1556 auto mayIndexStructType = [](GetElementPtrInst &GEP) {
1557 for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
1558 I != E; I++)
1559 if (I.isStruct())
1560 return true;
1561 return false;
1562 };
1563 if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
1564 break;
1565
1566 // Conservatively track the demanded elements back through any vector
1567 // operands we may have. We know there must be at least one, or we
1568 // wouldn't have a vector result to get here. Note that we intentionally
1569 // merge the undef bits here since gepping with either an poison base or
1570 // index results in poison.
1571 for (unsigned i = 0; i < I->getNumOperands(); i++) {
1572 if (i == 0 ? match(I->getOperand(i), m_Undef())
1573 : match(I->getOperand(i), m_Poison())) {
1574 // If the entire vector is undefined, just return this info.
1575 PoisonElts = EltMask;
1576 return nullptr;
1577 }
1578 if (I->getOperand(i)->getType()->isVectorTy()) {
1579 APInt PoisonEltsOp(VWidth, 0);
1580 simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp);
1581 // gep(x, undef) is not undef, so skip considering idx ops here
1582 // Note that we could propagate poison, but we can't distinguish between
1583 // undef & poison bits ATM
1584 if (i == 0)
1585 PoisonElts |= PoisonEltsOp;
1586 }
1587 }
1588
1589 break;
1590 }
1591 case Instruction::InsertElement: {
1592 // If this is a variable index, we don't know which element it overwrites.
1593 // demand exactly the same input as we produce.
1594 ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
1595 if (!Idx) {
1596 // Note that we can't propagate undef elt info, because we don't know
1597 // which elt is getting updated.
1598 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2);
1599 break;
1600 }
1601
1602 // The element inserted overwrites whatever was there, so the input demanded
1603 // set is simpler than the output set.
1604 unsigned IdxNo = Idx->getZExtValue();
1605 APInt PreInsertDemandedElts = DemandedElts;
1606 if (IdxNo < VWidth)
1607 PreInsertDemandedElts.clearBit(IdxNo);
1608
1609 // If we only demand the element that is being inserted and that element
1610 // was extracted from the same index in another vector with the same type,
1611 // replace this insert with that other vector.
1612 // Note: This is attempted before the call to simplifyAndSetOp because that
1613 // may change PoisonElts to a value that does not match with Vec.
1614 Value *Vec;
1615 if (PreInsertDemandedElts == 0 &&
1616 match(I->getOperand(1),
1617 m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) &&
1618 Vec->getType() == I->getType()) {
1619 return Vec;
1620 }
1621
1622 simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts);
1623
1624 // If this is inserting an element that isn't demanded, remove this
1625 // insertelement.
1626 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
1627 Worklist.push(I);
1628 return I->getOperand(0);
1629 }
1630
1631 // The inserted element is defined.
1632 PoisonElts.clearBit(IdxNo);
1633 break;
1634 }
1635 case Instruction::ShuffleVector: {
1636 auto *Shuffle = cast<ShuffleVectorInst>(I);
1637 assert(Shuffle->getOperand(0)->getType() ==
1638 Shuffle->getOperand(1)->getType() &&
1639 "Expected shuffle operands to have same type");
1640 unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType())
1641 ->getNumElements();
1642 // Handle trivial case of a splat. Only check the first element of LHS
1643 // operand.
1644 if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&
1645 DemandedElts.isAllOnes()) {
1646 if (!isa<PoisonValue>(I->getOperand(1))) {
1647 I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType()));
1648 MadeChange = true;
1649 }
1650 APInt LeftDemanded(OpWidth, 1);
1651 APInt LHSPoisonElts(OpWidth, 0);
1652 simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
1653 if (LHSPoisonElts[0])
1654 PoisonElts = EltMask;
1655 else
1656 PoisonElts.clearAllBits();
1657 break;
1658 }
1659
1660 APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);
1661 for (unsigned i = 0; i < VWidth; i++) {
1662 if (DemandedElts[i]) {
1663 unsigned MaskVal = Shuffle->getMaskValue(i);
1664 if (MaskVal != -1u) {
1665 assert(MaskVal < OpWidth * 2 &&
1666 "shufflevector mask index out of range!");
1667 if (MaskVal < OpWidth)
1668 LeftDemanded.setBit(MaskVal);
1669 else
1670 RightDemanded.setBit(MaskVal - OpWidth);
1671 }
1672 }
1673 }
1674
1675 APInt LHSPoisonElts(OpWidth, 0);
1676 simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
1677
1678 APInt RHSPoisonElts(OpWidth, 0);
1679 simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts);
1680
1681 // If this shuffle does not change the vector length and the elements
1682 // demanded by this shuffle are an identity mask, then this shuffle is
1683 // unnecessary.
1684 //
1685 // We are assuming canonical form for the mask, so the source vector is
1686 // operand 0 and operand 1 is not used.
1687 //
1688 // Note that if an element is demanded and this shuffle mask is undefined
1689 // for that element, then the shuffle is not considered an identity
1690 // operation. The shuffle prevents poison from the operand vector from
1691 // leaking to the result by replacing poison with an undefined value.
1692 if (VWidth == OpWidth) {
1693 bool IsIdentityShuffle = true;
1694 for (unsigned i = 0; i < VWidth; i++) {
1695 unsigned MaskVal = Shuffle->getMaskValue(i);
1696 if (DemandedElts[i] && i != MaskVal) {
1697 IsIdentityShuffle = false;
1698 break;
1699 }
1700 }
1701 if (IsIdentityShuffle)
1702 return Shuffle->getOperand(0);
1703 }
1704
1705 bool NewPoisonElts = false;
1706 unsigned LHSIdx = -1u, LHSValIdx = -1u;
1707 unsigned RHSIdx = -1u, RHSValIdx = -1u;
1708 bool LHSUniform = true;
1709 bool RHSUniform = true;
1710 for (unsigned i = 0; i < VWidth; i++) {
1711 unsigned MaskVal = Shuffle->getMaskValue(i);
1712 if (MaskVal == -1u) {
1713 PoisonElts.setBit(i);
1714 } else if (!DemandedElts[i]) {
1715 NewPoisonElts = true;
1716 PoisonElts.setBit(i);
1717 } else if (MaskVal < OpWidth) {
1718 if (LHSPoisonElts[MaskVal]) {
1719 NewPoisonElts = true;
1720 PoisonElts.setBit(i);
1721 } else {
1722 LHSIdx = LHSIdx == -1u ? i : OpWidth;
1723 LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
1724 LHSUniform = LHSUniform && (MaskVal == i);
1725 }
1726 } else {
1727 if (RHSPoisonElts[MaskVal - OpWidth]) {
1728 NewPoisonElts = true;
1729 PoisonElts.setBit(i);
1730 } else {
1731 RHSIdx = RHSIdx == -1u ? i : OpWidth;
1732 RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
1733 RHSUniform = RHSUniform && (MaskVal - OpWidth == i);
1734 }
1735 }
1736 }
1737
1738 // Try to transform shuffle with constant vector and single element from
1739 // this constant vector to single insertelement instruction.
1740 // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1741 // insertelement V, C[ci], ci-n
1742 if (OpWidth ==
1743 cast<FixedVectorType>(Shuffle->getType())->getNumElements()) {
1744 Value *Op = nullptr;
1745 Constant *Value = nullptr;
1746 unsigned Idx = -1u;
1747
1748 // Find constant vector with the single element in shuffle (LHS or RHS).
1749 if (LHSIdx < OpWidth && RHSUniform) {
1750 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
1751 Op = Shuffle->getOperand(1);
1752 Value = CV->getOperand(LHSValIdx);
1753 Idx = LHSIdx;
1754 }
1755 }
1756 if (RHSIdx < OpWidth && LHSUniform) {
1757 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
1758 Op = Shuffle->getOperand(0);
1759 Value = CV->getOperand(RHSValIdx);
1760 Idx = RHSIdx;
1761 }
1762 }
1763 // Found constant vector with single element - convert to insertelement.
1764 if (Op && Value) {
1766 Op, Value, ConstantInt::get(Type::getInt64Ty(I->getContext()), Idx),
1767 Shuffle->getName());
1768 InsertNewInstWith(New, Shuffle->getIterator());
1769 return New;
1770 }
1771 }
1772 if (NewPoisonElts) {
1773 // Add additional discovered undefs.
1775 for (unsigned i = 0; i < VWidth; ++i) {
1776 if (PoisonElts[i])
1778 else
1779 Elts.push_back(Shuffle->getMaskValue(i));
1780 }
1781 Shuffle->setShuffleMask(Elts);
1782 MadeChange = true;
1783 }
1784 break;
1785 }
1786 case Instruction::Select: {
1787 // If this is a vector select, try to transform the select condition based
1788 // on the current demanded elements.
1790 if (Sel->getCondition()->getType()->isVectorTy()) {
1791 // TODO: We are not doing anything with PoisonElts based on this call.
1792 // It is overwritten below based on the other select operands. If an
1793 // element of the select condition is known undef, then we are free to
1794 // choose the output value from either arm of the select. If we know that
1795 // one of those values is undef, then the output can be undef.
1796 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1797 }
1798
1799 // Next, see if we can transform the arms of the select.
1800 APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
1801 if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
1802 for (unsigned i = 0; i < VWidth; i++) {
1803 Constant *CElt = CV->getAggregateElement(i);
1804
1805 // isNullValue() always returns false when called on a ConstantExpr.
1806 if (CElt->isNullValue())
1807 DemandedLHS.clearBit(i);
1808 else if (CElt->isOneValue())
1809 DemandedRHS.clearBit(i);
1810 }
1811 }
1812
1813 simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2);
1814 simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3);
1815
1816 // Output elements are undefined if the element from each arm is undefined.
1817 // TODO: This can be improved. See comment in select condition handling.
1818 PoisonElts = PoisonElts2 & PoisonElts3;
1819 break;
1820 }
1821 case Instruction::BitCast: {
1822 // Vector->vector casts only.
1823 VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
1824 if (!VTy) break;
1825 unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();
1826 APInt InputDemandedElts(InVWidth, 0);
1827 PoisonElts2 = APInt(InVWidth, 0);
1828 unsigned Ratio;
1829
1830 if (VWidth == InVWidth) {
1831 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1832 // elements as are demanded of us.
1833 Ratio = 1;
1834 InputDemandedElts = DemandedElts;
1835 } else if ((VWidth % InVWidth) == 0) {
1836 // If the number of elements in the output is a multiple of the number of
1837 // elements in the input then an input element is live if any of the
1838 // corresponding output elements are live.
1839 Ratio = VWidth / InVWidth;
1840 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1841 if (DemandedElts[OutIdx])
1842 InputDemandedElts.setBit(OutIdx / Ratio);
1843 } else if ((InVWidth % VWidth) == 0) {
1844 // If the number of elements in the input is a multiple of the number of
1845 // elements in the output then an input element is live if the
1846 // corresponding output element is live.
1847 Ratio = InVWidth / VWidth;
1848 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1849 if (DemandedElts[InIdx / Ratio])
1850 InputDemandedElts.setBit(InIdx);
1851 } else {
1852 // Unsupported so far.
1853 break;
1854 }
1855
1856 simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2);
1857
1858 if (VWidth == InVWidth) {
1859 PoisonElts = PoisonElts2;
1860 } else if ((VWidth % InVWidth) == 0) {
1861 // If the number of elements in the output is a multiple of the number of
1862 // elements in the input then an output element is undef if the
1863 // corresponding input element is undef.
1864 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1865 if (PoisonElts2[OutIdx / Ratio])
1866 PoisonElts.setBit(OutIdx);
1867 } else if ((InVWidth % VWidth) == 0) {
1868 // If the number of elements in the input is a multiple of the number of
1869 // elements in the output then an output element is undef if all of the
1870 // corresponding input elements are undef.
1871 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
1872 APInt SubUndef = PoisonElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
1873 if (SubUndef.popcount() == Ratio)
1874 PoisonElts.setBit(OutIdx);
1875 }
1876 } else {
1877 llvm_unreachable("Unimp");
1878 }
1879 break;
1880 }
1881 case Instruction::FPTrunc:
1882 case Instruction::FPExt:
1883 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1884 break;
1885
1886 case Instruction::Call: {
1888 if (!II) break;
1889 switch (II->getIntrinsicID()) {
1890 case Intrinsic::masked_gather: // fallthrough
1891 case Intrinsic::masked_load: {
1892 // Subtlety: If we load from a pointer, the pointer must be valid
1893 // regardless of whether the element is demanded. Doing otherwise risks
1894 // segfaults which didn't exist in the original program.
1895 APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
1896 DemandedPassThrough(DemandedElts);
1897 if (auto *CMask = dyn_cast<Constant>(II->getOperand(1))) {
1898 for (unsigned i = 0; i < VWidth; i++) {
1899 if (Constant *CElt = CMask->getAggregateElement(i)) {
1900 if (CElt->isNullValue())
1901 DemandedPtrs.clearBit(i);
1902 else if (CElt->isAllOnesValue())
1903 DemandedPassThrough.clearBit(i);
1904 }
1905 }
1906 }
1907
1908 if (II->getIntrinsicID() == Intrinsic::masked_gather)
1909 simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
1910 simplifyAndSetOp(II, 2, DemandedPassThrough, PoisonElts3);
1911
1912 // Output elements are undefined if the element from both sources are.
1913 // TODO: can strengthen via mask as well.
1914 PoisonElts = PoisonElts2 & PoisonElts3;
1915 break;
1916 }
1917 default: {
1918 // Handle target specific intrinsics
1919 std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
1920 *II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
1921 simplifyAndSetOp);
1922 if (V)
1923 return *V;
1924 break;
1925 }
1926 } // switch on IntrinsicID
1927 break;
1928 } // case Call
1929 } // switch on Opcode
1930
1931 // TODO: We bail completely on integer div/rem and shifts because they have
1932 // UB/poison potential, but that should be refined.
1933 BinaryOperator *BO;
1934 if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
1935 Value *X = BO->getOperand(0);
1936 Value *Y = BO->getOperand(1);
1937
1938 // Look for an equivalent binop except that one operand has been shuffled.
1939 // If the demand for this binop only includes elements that are the same as
1940 // the other binop, then we may be able to replace this binop with a use of
1941 // the earlier one.
1942 //
1943 // Example:
1944 // %other_bo = bo (shuf X, {0}), Y
1945 // %this_extracted_bo = extelt (bo X, Y), 0
1946 // -->
1947 // %other_bo = bo (shuf X, {0}), Y
1948 // %this_extracted_bo = extelt %other_bo, 0
1949 //
1950 // TODO: Handle demand of an arbitrary single element or more than one
1951 // element instead of just element 0.
1952 // TODO: Unlike general demanded elements transforms, this should be safe
1953 // for any (div/rem/shift) opcode too.
1954 if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() &&
1955 BO->hasOneUse() ) {
1956
1957 auto findShufBO = [&](bool MatchShufAsOp0) -> User * {
1958 // Try to use shuffle-of-operand in place of an operand:
1959 // bo X, Y --> bo (shuf X), Y
1960 // bo X, Y --> bo X, (shuf Y)
1961
1962 Value *OtherOp = MatchShufAsOp0 ? Y : X;
1963 if (!OtherOp->hasUseList())
1964 return nullptr;
1965
1966 BinaryOperator::BinaryOps Opcode = BO->getOpcode();
1967 Value *ShufOp = MatchShufAsOp0 ? X : Y;
1968
1969 for (User *U : OtherOp->users()) {
1970 ArrayRef<int> Mask;
1971 auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_Mask(Mask));
1972 if (BO->isCommutative()
1973 ? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
1974 : MatchShufAsOp0
1975 ? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
1976 : match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf)))
1977 if (match(Mask, m_ZeroMask()) && Mask[0] != PoisonMaskElem)
1978 if (DT.dominates(U, I))
1979 return U;
1980 }
1981 return nullptr;
1982 };
1983
1984 if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true))
1985 return ShufBO;
1986 if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false))
1987 return ShufBO;
1988 }
1989
1990 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1991 simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2);
1992
1993 // Output elements are undefined if both are undefined. Consider things
1994 // like undef & 0. The result is known zero, not undef.
1995 PoisonElts &= PoisonElts2;
1996 }
1997
1998 // If we've proven all of the lanes poison, return a poison value.
1999 // TODO: Intersect w/demanded lanes
2000 if (PoisonElts.isAllOnes())
2001 return PoisonValue::get(I->getType());
2002
2003 return MadeChange ? I : nullptr;
2004}
2005
2006/// For floating-point classes that resolve to a single bit pattern, return that
2007/// value.
2009 bool IsCanonicalizing = false) {
2010 if (Mask == fcNone)
2011 return PoisonValue::get(Ty);
2012
2013 if (Mask == fcPosZero)
2014 return Constant::getNullValue(Ty);
2015
2016 // Turn any possible snans into quiet if we can.
2017 if (Mask == fcNan && IsCanonicalizing)
2018 return ConstantFP::getQNaN(Ty);
2019
2020 // TODO: Support aggregate types that are allowed by FPMathOperator.
2021 if (Ty->isAggregateType())
2022 return nullptr;
2023
2024 switch (Mask) {
2025 case fcNegZero:
2026 return ConstantFP::getZero(Ty, true);
2027 case fcPosInf:
2028 return ConstantFP::getInfinity(Ty);
2029 case fcNegInf:
2030 return ConstantFP::getInfinity(Ty, true);
2031 default:
2032 return nullptr;
2033 }
2034}
2035
2037 FPClassTest DemandedMask,
2038 KnownFPClass &Known,
2039 Instruction *CxtI,
2040 unsigned Depth) {
2041 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2042 Type *VTy = V->getType();
2043
2044 assert(Known == KnownFPClass() && "expected uninitialized state");
2045
2046 if (DemandedMask == fcNone)
2047 return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);
2048
2050 return nullptr;
2051
2053 if (!I) {
2054 // Handle constants and arguments
2055 Known = computeKnownFPClass(V, fcAllFlags, CxtI, Depth + 1);
2056
2057 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2058 if (ValidResults == fcNone)
2059 return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);
2060
2061 // Do not try to replace values which are already constants (unless we are
2062 // folding to poison). Doing so could promote poison elements to non-poison
2063 // constants.
2064 if (isa<Constant>(V))
2065 return nullptr;
2066
2067 Value *FoldedToConst = getFPClassConstant(VTy, ValidResults);
2068 return FoldedToConst == V ? nullptr : FoldedToConst;
2069 }
2070
2071 if (!I->hasOneUse()) {
2072 Known = computeKnownFPClass(V, DemandedMask, CxtI, Depth + 1);
2073 return nullptr;
2074 }
2075
2076 FastMathFlags FMF;
2077 if (auto *FPOp = dyn_cast<FPMathOperator>(I)) {
2078 FMF = FPOp->getFastMathFlags();
2079 if (FMF.noNaNs())
2080 DemandedMask &= ~fcNan;
2081
2082 if (FMF.noInfs())
2083 DemandedMask &= ~fcInf;
2084 }
2085
2086 switch (I->getOpcode()) {
2087 case Instruction::FNeg: {
2088 if (SimplifyDemandedFPClass(I, 0, llvm::fneg(DemandedMask), Known,
2089 Depth + 1))
2090 return I;
2091 Known.fneg();
2092 break;
2093 }
2094 case Instruction::FMul: {
2095 KnownFPClass KnownLHS, KnownRHS;
2096
2097 Value *X = I->getOperand(0);
2098 Value *Y = I->getOperand(1);
2099
2100 FPClassTest SrcDemandedMask =
2101 DemandedMask & (fcNan | fcZero | fcSubnormal | fcNormal);
2102
2103 if (DemandedMask & fcInf) {
2104 // mul x, inf = inf
2105 // mul large_x, large_y = inf
2106 SrcDemandedMask |= fcSubnormal | fcNormal | fcInf;
2107 }
2108
2109 if (DemandedMask & fcNan) {
2110 // mul +/-inf, 0 => nan
2111 SrcDemandedMask |= fcZero | fcInf;
2112
2113 // TODO: Mode check
2114 // mul +/-inf, sub => nan if daz
2115 SrcDemandedMask |= fcSubnormal;
2116 }
2117
2118 // mul normal, subnormal = normal
2119 // Normal inputs may result in underflow.
2120 if (DemandedMask & (fcNormal | fcSubnormal))
2121 SrcDemandedMask |= fcNormal | fcSubnormal;
2122
2123 if (DemandedMask & fcZero)
2124 SrcDemandedMask |= fcNormal | fcSubnormal;
2125
2127 if (X == Y && isGuaranteedNotToBeUndef(X, SQ.AC, CxtI, SQ.DT, Depth + 1)) {
2128 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2129 return I;
2130 Type *EltTy = VTy->getScalarType();
2131
2132 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2133 Known = KnownFPClass::square(KnownLHS, Mode);
2134
2135 // Propagate known result to simplify edge case checks.
2136 if ((DemandedMask & fcNan) == fcNone)
2137 Known.knownNot(fcNan);
2138 if ((DemandedMask & fcPosInf) == fcNone)
2139 Known.knownNot(fcInf);
2140
2141 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2142 if (Constant *Folded =
2143 getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true))
2144 return Folded;
2145
2146 if (Known.isKnownAlways(fcPosZero | fcPosInf | fcNan)) {
2147 assert(KnownLHS.isKnownNever(fcPosNormal));
2148
2149 // We can skip the fabs if the source was already known positive.
2150 if (KnownLHS.isKnownAlways(fcPositive))
2151 return X;
2152
2153 // => fabs(x), in case this was a -inf or -0.
2154 // Note: Dropping canonicalize.
2156 Builder.SetInsertPoint(I);
2157 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FMF);
2158 Fabs->takeName(I);
2159 return Fabs;
2160 }
2161
2162 return nullptr;
2163 }
2164
2165 if (SimplifyDemandedFPClass(I, 1, SrcDemandedMask, KnownRHS, Depth + 1) ||
2166 SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2167 return I;
2168
2169 // Propagate nnan-ness to sources to simplify source checks.
2170 if ((DemandedMask & fcNan) == fcNone) {
2171 KnownLHS.knownNot(fcNan);
2172 KnownRHS.knownNot(fcNan);
2173 }
2174
2175 if (FMF.noInfs()) {
2176 // Flag implies inputs cannot be infinity.
2177 KnownLHS.knownNot(fcInf);
2178 KnownRHS.knownNot(fcInf);
2179 }
2180
2181 bool NonNanResult = (DemandedMask & fcNan) == fcNone;
2182
2183 // With no-nans/no-infs:
2184 // X * 0.0 --> copysign(0.0, X)
2185 // X * -0.0 --> copysign(0.0, -X)
2186 if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) &&
2187 KnownRHS.isKnownAlways(fcPosZero | fcNan)) {
2188 // => copysign(+0, lhs)
2189 // Note: Dropping canonicalize
2190 Value *Copysign = Builder.CreateCopySign(Y, X, FMF);
2191 Copysign->takeName(I);
2192 return Copysign;
2193 }
2194
2195 if (KnownLHS.isKnownAlways(fcPosZero | fcNan) &&
2196 (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) {
2197 // => copysign(+0, rhs)
2198 // Note: Dropping canonicalize
2199 Value *Copysign = Builder.CreateCopySign(X, Y, FMF);
2200 Copysign->takeName(I);
2201 return Copysign;
2202 }
2203
2204 if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) &&
2205 KnownRHS.isKnownAlways(fcNegZero | fcNan)) {
2206 // => copysign(0, fneg(lhs))
2207 // Note: Dropping canonicalize
2208 Value *Copysign =
2209 Builder.CreateCopySign(Y, Builder.CreateFNegFMF(X, FMF), FMF);
2210 Copysign->takeName(I);
2211 return Copysign;
2212 }
2213
2214 if (KnownLHS.isKnownAlways(fcNegZero | fcNan) &&
2215 (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) {
2216 // => copysign(+0, fneg(rhs))
2217 // Note: Dropping canonicalize
2218 Value *Copysign =
2219 Builder.CreateCopySign(X, Builder.CreateFNegFMF(Y, FMF), FMF);
2220 Copysign->takeName(I);
2221 return Copysign;
2222 }
2223
2224 Type *EltTy = VTy->getScalarType();
2225 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2226
2227 if (KnownLHS.isKnownAlways(fcInf | fcNan) &&
2228 (KnownRHS.isKnownNeverNaN() &&
2229 KnownRHS.cannotBeOrderedGreaterEqZero(Mode))) {
2230 // Note: Dropping canonicalize
2231 Value *Neg = Builder.CreateFNegFMF(X, FMF);
2232 Neg->takeName(I);
2233 return Neg;
2234 }
2235
2236 if (KnownRHS.isKnownAlways(fcInf | fcNan) &&
2237 (KnownLHS.isKnownNeverNaN() &&
2238 KnownLHS.cannotBeOrderedGreaterEqZero(Mode))) {
2239 // Note: Dropping canonicalize
2240 Value *Neg = Builder.CreateFNegFMF(Y, FMF);
2241 Neg->takeName(I);
2242 return Neg;
2243 }
2244
2245 Known = KnownFPClass::fmul(KnownLHS, KnownRHS, Mode);
2246
2247 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2248 return getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true);
2249 }
2250 case Instruction::Call: {
2251 CallInst *CI = cast<CallInst>(I);
2252 const Intrinsic::ID IID = CI->getIntrinsicID();
2253 switch (IID) {
2254 case Intrinsic::fabs:
2255 if (SimplifyDemandedFPClass(I, 0, llvm::inverse_fabs(DemandedMask), Known,
2256 Depth + 1))
2257 return I;
2258 Known.fabs();
2259 break;
2260 case Intrinsic::arithmetic_fence:
2261 if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))
2262 return I;
2263 break;
2264 case Intrinsic::copysign: {
2265 // Flip on more potentially demanded classes
2266 const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(DemandedMask);
2267 if (SimplifyDemandedFPClass(I, 0, DemandedMaskAnySign, Known, Depth + 1))
2268 return I;
2269
2270 if ((DemandedMask & fcNegative) == DemandedMask) {
2271 // Roundabout way of replacing with fneg(fabs)
2272 I->setOperand(1, ConstantFP::get(VTy, -1.0));
2273 return I;
2274 }
2275
2276 if ((DemandedMask & fcPositive) == DemandedMask) {
2277 // Roundabout way of replacing with fabs
2278 I->setOperand(1, ConstantFP::getZero(VTy));
2279 return I;
2280 }
2281
2282 KnownFPClass KnownSign =
2283 computeKnownFPClass(I->getOperand(1), fcAllFlags, CxtI, Depth + 1);
2284 Known.copysign(KnownSign);
2285 break;
2286 }
2287 case Intrinsic::maximum:
2288 case Intrinsic::minimum: {
2289 KnownFPClass KnownLHS, KnownRHS;
2290
2291 // We can't tell much based on the demanded result without inspecting the
2292 // operands (e.g., a known-positive result could have been clamped), but
2293 // we can still prune known-nan inputs.
2294 FPClassTest SrcDemandedMask = DemandedMask | ~fcNan;
2295
2296 if (SimplifyDemandedFPClass(CI, 1, SrcDemandedMask, KnownRHS,
2297 Depth + 1) ||
2298 SimplifyDemandedFPClass(CI, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2299 return I;
2300
2301 /// Propagate nnan-ness to simplify edge case checks.
2302 if ((DemandedMask & fcNan) == fcNone) {
2303 KnownLHS.knownNot(fcNan);
2304 KnownRHS.knownNot(fcNan);
2305 }
2306
2307 if (IID == Intrinsic::maximum) {
2308 // If at least one operand is known to be positive and the other
2309 // negative, the result must be the positive (unless the other operand
2310 // may be propagating a nan).
2311 if (KnownLHS.isKnownNever(fcNegative) &&
2312 KnownRHS.isKnownNever(fcPositive | fcNan))
2313 return CI->getArgOperand(0);
2314
2315 if (KnownRHS.isKnownNever(fcNegative) &&
2316 KnownLHS.isKnownNever(fcPositive | fcNan))
2317 return CI->getArgOperand(1);
2318
2319 // If one value must be pinf, the result is pinf or a propagated nan.
2320 if (KnownLHS.isKnownAlways(fcPosInf | fcNan) &&
2321 KnownRHS.isKnownNever(fcNan))
2322 return CI->getArgOperand(0);
2323
2324 if (KnownRHS.isKnownAlways(fcPosInf | fcNan) &&
2325 KnownLHS.isKnownNever(fcNan))
2326 return CI->getArgOperand(1);
2327 } else {
2328 // If one operand is known to be negative, and the other positive, the
2329 // result must be the negative (unless the other operand may be
2330 // propagating a nan).
2331 if (KnownLHS.isKnownNever(fcPositive) &&
2332 KnownRHS.isKnownNever(fcNegative | fcNan))
2333 return CI->getArgOperand(0);
2334
2335 if (KnownRHS.isKnownNever(fcPositive) &&
2336 KnownLHS.isKnownNever(fcNegative | fcNan))
2337 return CI->getArgOperand(1);
2338
2339 // If one value must be ninf, the result is ninf or a propagated nan.
2340 if (KnownLHS.isKnownAlways(fcNegInf | fcNan) &&
2341 KnownRHS.isKnownNever(fcNan))
2342 return CI->getArgOperand(0);
2343
2344 if (KnownRHS.isKnownAlways(fcNegInf | fcNan) &&
2345 KnownLHS.isKnownNever(fcNan))
2346 return CI->getArgOperand(1);
2347 }
2348
2349 Type *EltTy = VTy->getScalarType();
2350 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2351 Known = KnownFPClass::minMaxLike(KnownLHS, KnownRHS,
2352 IID == Intrinsic::maximum
2355 Mode);
2356
2357 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2358
2359 if (Constant *SingleVal =
2360 getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true))
2361 return SingleVal;
2362
2363 auto *FPOp = cast<FPMathOperator>(CI);
2364
2365 bool ChangedFlags = false;
2366
2367 // TODO: Add NSZ flag if we know the result will not be sensitive on the
2368 // sign of 0.
2369 if (!FPOp->hasNoNaNs() && (ValidResults & fcNan) == fcNone) {
2371 CI->setHasNoNaNs(true);
2372 ChangedFlags = true;
2373 }
2374
2375 if (ChangedFlags)
2376 return FPOp;
2377
2378 return nullptr;
2379 }
2380 case Intrinsic::exp:
2381 case Intrinsic::exp2:
2382 case Intrinsic::exp10: {
2383 if ((DemandedMask & fcPositive) == fcNone) {
2384 // Only returns positive values or nans.
2385 if ((DemandedMask & fcNan) == fcNone)
2386 return PoisonValue::get(VTy);
2387
2388 // Only need nan propagation.
2389 // Note: Dropping snan quieting.
2390 return CI->getArgOperand(0);
2391 }
2392
2393 FPClassTest SrcDemandedMask = DemandedMask & fcNan;
2394
2395 if (DemandedMask & fcZero) {
2396 // exp(-infinity) = 0
2397 SrcDemandedMask |= fcNegInf;
2398
2399 // exp(-largest_normal) = 0
2400 //
2401 // Negative numbers of sufficiently large magnitude underflow to 0. No
2402 // subnormal input has a 0 result.
2403 SrcDemandedMask |= fcNegNormal;
2404 }
2405
2406 if (DemandedMask & fcPosSubnormal) {
2407 // Negative numbers of sufficiently large magnitude underflow to 0. No
2408 // subnormal input has a 0 result.
2409 SrcDemandedMask |= fcNegNormal;
2410 }
2411
2412 if (DemandedMask & fcPosNormal) {
2413 // exp(0) = 1
2414 // exp(+/- smallest_normal) = 1
2415 // exp(+/- largest_denormal) = 1
2416 // exp(+/- smallest_denormal) = 1
2417 // exp(-1) = pos normal
2418 SrcDemandedMask |= fcNormal | fcSubnormal | fcZero;
2419 }
2420
2421 // exp(inf), exp(largest_normal) = inf
2422 if (DemandedMask & fcPosInf)
2423 SrcDemandedMask |= fcPosInf | fcPosNormal;
2424
2425 KnownFPClass KnownSrc;
2426
2427 // TODO: This could really make use of KnownFPClass of specific value
2428 // range, (i.e., close enough to 1)
2429 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
2430 return I;
2431
2432 /// Propagate nnan-ness to simplify edge case checks.
2433 if ((DemandedMask & fcNan) == fcNone)
2434 KnownSrc.knownNot(fcNan);
2435
2436 // exp(+/-0) = 1
2437 if (KnownSrc.isKnownAlways(fcZero))
2438 return ConstantFP::get(VTy, 1.0);
2439
2440 // Only perform nan propagation.
2441 // Note: Dropping canonicalize / quiet of signaling nan.
2442 if (KnownSrc.isKnownAlways(fcNan))
2443 return CI->getArgOperand(0);
2444
2445 // exp(0 | nan) => x == 0.0 ? 1.0 : x
2446 if (KnownSrc.isKnownAlways(fcZero | fcNan)) {
2448 Builder.SetInsertPoint(CI);
2449
2450 // fadd +/-0, 1.0 => 1.0
2451 // fadd nan, 1.0 => nan
2452 return Builder.CreateFAddFMF(CI->getArgOperand(0),
2453 ConstantFP::get(VTy, 1.0), FMF);
2454 }
2455
2456 if (KnownSrc.isKnownAlways(fcInf | fcNan)) {
2457 // exp(-inf) = 0
2458 // exp(+inf) = +inf
2460 Builder.SetInsertPoint(CI);
2461
2462 // Note: Dropping canonicalize / quiet of signaling nan.
2463 Value *X = CI->getArgOperand(0);
2464 Value *IsPosInfOrNan = Builder.CreateFCmpFMF(
2466 // We do not know whether an infinity or a NaN is more likely here,
2467 // so mark the branch weights as unkown.
2468 Value *ZeroOrInf = Builder.CreateSelectFMFWithUnknownProfile(
2469 IsPosInfOrNan, X, ConstantFP::getZero(VTy), FMF, DEBUG_TYPE);
2470 return ZeroOrInf;
2471 }
2472
2473 Known = KnownFPClass::exp(KnownSrc);
2474 break;
2475 }
2476 case Intrinsic::log:
2477 case Intrinsic::log2:
2478 case Intrinsic::log10: {
2479 FPClassTest DemandedSrcMask = DemandedMask & (fcNan | fcPosInf);
2480
2481 Type *EltTy = VTy->getScalarType();
2482 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2483
2484 // log(x < 0) = nan
2485 if (DemandedMask & fcNan)
2486 DemandedSrcMask |= (fcNegative & ~fcNegZero);
2487
2488 // log(0) = -inf
2489 if (DemandedMask & fcNegInf) {
2490 DemandedSrcMask |= fcZero;
2491
2492 // No value produces subnormal result.
2493 if (Mode.inputsMayBeZero())
2494 DemandedSrcMask |= fcSubnormal;
2495 }
2496
2497 if (DemandedMask & fcNormal)
2498 DemandedSrcMask |= fcNormal | fcSubnormal;
2499
2500 // log(1) = 0
2501 if (DemandedMask & fcZero)
2502 DemandedSrcMask |= fcPosNormal;
2503
2504 KnownFPClass KnownSrc;
2505 if (SimplifyDemandedFPClass(I, 0, DemandedSrcMask, KnownSrc, Depth + 1))
2506 return I;
2507
2508 Known = KnownFPClass::log(KnownSrc, Mode);
2509
2510 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2511 return getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true);
2512 }
2513 case Intrinsic::canonicalize: {
2514 Type *EltTy = VTy->getScalarType();
2515
2516 // TODO: This could have more refined support for PositiveZero denormal
2517 // mode.
2518 if (EltTy->isIEEELikeFPTy()) {
2519 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2520
2521 FPClassTest SrcDemandedMask = DemandedMask;
2522
2523 // A demanded quiet nan result may have come from a signaling nan, so we
2524 // need to expand the demanded mask.
2525 if ((DemandedMask & fcQNan) != fcNone)
2526 SrcDemandedMask |= fcSNan;
2527
2528 if (Mode != DenormalMode::getIEEE()) {
2529 // Any zero results may have come from flushed denormals.
2530 if (DemandedMask & fcPosZero)
2531 SrcDemandedMask |= fcPosSubnormal;
2532 if (DemandedMask & fcNegZero)
2533 SrcDemandedMask |= fcNegSubnormal;
2534 }
2535
2536 if (Mode == DenormalMode::getPreserveSign()) {
2537 // If a denormal input will be flushed, and we don't need zeros, we
2538 // don't need denormals either.
2539 if ((DemandedMask & fcPosZero) == fcNone)
2540 SrcDemandedMask &= ~fcPosSubnormal;
2541
2542 if ((DemandedMask & fcNegZero) == fcNone)
2543 SrcDemandedMask &= ~fcNegSubnormal;
2544 }
2545
2546 KnownFPClass KnownSrc;
2547
2548 // Simplify upstream operations before trying to simplify this call.
2549 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
2550 return I;
2551
2552 // Perform the canonicalization to see if this folded to a constant.
2553 Known = KnownFPClass::canonicalize(KnownSrc, Mode);
2554
2555 if (Constant *SingleVal =
2556 getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses))
2557 return SingleVal;
2558
2559 // For IEEE handling, there is only a bit change for nan inputs, so we
2560 // can drop it if we do not demand nan results or we know the input
2561 // isn't a nan.
2562 // Otherwise, we also need to avoid denormal inputs to drop the
2563 // canonicalize.
2564 if ((KnownSrc.isKnownNeverNaN() || (DemandedMask & fcNan) == fcNone) &&
2565 (Mode == DenormalMode::getIEEE() ||
2566 KnownSrc.isKnownNeverSubnormal()))
2567 return CI->getArgOperand(0);
2568
2569 return nullptr;
2570 }
2571
2572 [[fallthrough]];
2573 }
2574 default:
2575 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
2576 break;
2577 }
2578
2579 break;
2580 }
2581 case Instruction::Select: {
2582 KnownFPClass KnownLHS, KnownRHS;
2583 if (SimplifyDemandedFPClass(I, 2, DemandedMask, KnownRHS, Depth + 1) ||
2584 SimplifyDemandedFPClass(I, 1, DemandedMask, KnownLHS, Depth + 1))
2585 return I;
2586
2587 if (KnownLHS.isKnownNever(DemandedMask))
2588 return I->getOperand(2);
2589 if (KnownRHS.isKnownNever(DemandedMask))
2590 return I->getOperand(1);
2591
2592 // TODO: Recognize clamping patterns
2593 Known = KnownLHS | KnownRHS;
2594 break;
2595 }
2596 case Instruction::ExtractElement: {
2597 // TODO: Handle demanded element mask
2598 if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))
2599 return I;
2600 break;
2601 }
2602 case Instruction::InsertElement: {
2603 KnownFPClass KnownInserted, KnownVec;
2604 if (SimplifyDemandedFPClass(I, 1, DemandedMask, KnownInserted, Depth + 1) ||
2605 SimplifyDemandedFPClass(I, 0, DemandedMask, KnownVec, Depth + 1))
2606 return I;
2607
2608 // TODO: Use demanded elements logic from computeKnownFPClass
2609 Known = KnownVec | KnownInserted;
2610 break;
2611 }
2612 case Instruction::ShuffleVector: {
2613 KnownFPClass KnownLHS, KnownRHS;
2614 if (SimplifyDemandedFPClass(I, 1, DemandedMask, KnownRHS, Depth + 1) ||
2615 SimplifyDemandedFPClass(I, 0, DemandedMask, KnownLHS, Depth + 1))
2616 return I;
2617
2618 // TODO: This is overly conservative and should consider demanded elements,
2619 // and splats.
2620 Known = KnownLHS | KnownRHS;
2621 break;
2622 }
2623 default:
2624 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
2625 break;
2626 }
2627
2628 return getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses);
2629}
2630
2632 FPClassTest DemandedMask,
2633 KnownFPClass &Known,
2634 unsigned Depth) {
2635 Use &U = I->getOperandUse(OpNo);
2636 Value *NewVal =
2637 SimplifyDemandedUseFPClass(U.get(), DemandedMask, Known, I, Depth);
2638 if (!NewVal)
2639 return false;
2640 if (Instruction *OpInst = dyn_cast<Instruction>(U))
2641 salvageDebugInfo(*OpInst);
2642
2643 replaceUse(U, NewVal);
2644 return true;
2645}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static cl::opt< unsigned > SimplifyDemandedVectorEltsDepthLimit("instcombine-simplify-vector-elts-depth", cl::desc("Depth limit when simplifying vector instructions and their operands"), cl::Hidden, cl::init(10))
static Constant * getFPClassConstant(Type *Ty, FPClassTest Mask, bool IsCanonicalizing=false)
For floating-point classes that resolve to a single bit pattern, return that value.
static cl::opt< bool > VerifyKnownBits("instcombine-verify-known-bits", cl::desc("Verify that computeKnownBits() and " "SimplifyDemandedBits() are consistent"), cl::Hidden, cl::init(false))
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
static Value * simplifyShiftSelectingPackedElement(Instruction *I, const APInt &DemandedMask, InstCombinerImpl &IC, unsigned Depth)
Let N = 2 * M.
This file provides the interface for the instcombine pass implementation.
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1341
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1436
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:828
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition APInt.h:433
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
BinaryOps getOpcode() const
Definition InstrTypes.h:374
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
bool noInfs() const
Definition FMF.h:66
bool noNaNs() const
Definition FMF.h:65
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
LLVM_ABI Value * CreateSelectWithUnknownProfile(Value *C, Value *True, Value *False, StringRef PassName, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF, FPClassTest Interested=fcAllFlags, const Instruction *CtxI=nullptr, unsigned Depth=0) const
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
std::optional< std::pair< Intrinsic::ID, SmallVector< Value *, 3 > > > convertOrOfShiftsToFunnelShift(Instruction &Or)
Value * simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known)
Helper routine of SimplifyDemandedUseBits.
Value * SimplifyDemandedUseBits(Instruction *I, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Attempts to replace I with a simpler value based on the demanded bits.
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth=0)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Value * SimplifyMultipleUseDemandedBits(Instruction *I, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Helper routine of SimplifyDemandedUseBits.
Value * SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask, KnownFPClass &Known, Instruction *CxtI, unsigned Depth=0)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
SimplifyQuery SQ
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void setHasNoNaNs(bool B)
Set or clear the no-nans flag on this instruction, which must be an operator which supports this flag...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
bool isShift() const
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
const Value * getCondition() const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIEEELikeFPTy() const
Return true if this is a well-behaved IEEE-like type, which has a IEEE compatible layout,...
Definition Type.h:170
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:233
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
This class represents zero extension of integer types.
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
DisjointOr_match< LHS, RHS, true > m_c_DisjointOr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI void computeKnownBitsFromContext(const Value *V, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Merge bits known from context-dependent facts into Known.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1731
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI bool isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be undef, but may be poison.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI void adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond, Value *Arm, bool Invert, const SimplifyQuery &Q, unsigned Depth=0)
Adjust Known for the given select Arm to include information from the select Cond.
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
LLVM_ABI FPClassTest unknown_sign(FPClassTest Mask)
Return the test mask which returns true if the value could have the same set of classes,...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
LLVM_ABI KnownBits analyzeKnownBitsFromAndXorOr(const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS, const SimplifyQuery &SQ, unsigned Depth=0)
Using KnownBits LHS/RHS produce the known bits for logic op (and/xor/or).
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:301
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
void makeNonNegative()
Make this value non-negative.
Definition KnownBits.h:124
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:321
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
Definition KnownBits.h:196
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
static LLVM_ABI KnownBits srem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for srem(LHS, RHS).
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static KnownBits sub(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from subtraction of LHS and RHS.
Definition KnownBits.h:353
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
bool isKnownNeverInfOrNaN() const
Return true if it's known this can never be an infinity or nan.
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
void knownNot(FPClassTest RuleOut)
static LLVM_ABI KnownFPClass fmul(const KnownFPClass &LHS, const KnownFPClass &RHS, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fmul.
void copysign(const KnownFPClass &Sign)
static KnownFPClass square(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.
bool isKnownAlways(FPClassTest Mask) const
static LLVM_ABI KnownFPClass canonicalize(const KnownFPClass &Src, DenormalMode DenormMode=DenormalMode::getDynamic())
Apply the canonicalize intrinsic to this value.
static LLVM_ABI KnownFPClass log(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
Propagate known class for log/log2/log10.
static LLVM_ABI KnownFPClass minMaxLike(const KnownFPClass &LHS, const KnownFPClass &RHS, MinMaxKind Kind, DenormalMode DenormMode=DenormalMode::getDynamic())
static LLVM_ABI KnownFPClass exp(const KnownFPClass &Src)
Report known values for exp, exp2 and exp10.
bool isKnownNeverNaN() const
Return true if it's known this can never be a nan.
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
bool cannotBeOrderedGreaterEqZero(DenormalMode Mode) const
Return true if it's know this can never be a negative value or a logical 0.
Matching combinators.
const Instruction * CxtI