LLVM 23.0.0git
InstCombineSimplifyDemanded.cpp
Go to the documentation of this file.
1//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains logic for simplifying instructions based on information
10// about how they are used.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/ScopeExit.h"
23
24using namespace llvm;
25using namespace llvm::PatternMatch;
26
27#define DEBUG_TYPE "instcombine"
28
29static cl::opt<bool>
30 VerifyKnownBits("instcombine-verify-known-bits",
31 cl::desc("Verify that computeKnownBits() and "
32 "SimplifyDemandedBits() are consistent"),
33 cl::Hidden, cl::init(false));
34
36 "instcombine-simplify-vector-elts-depth",
38 "Depth limit when simplifying vector instructions and their operands"),
39 cl::Hidden, cl::init(10));
40
41/// Check to see if the specified operand of the specified instruction is a
42/// constant integer. If so, check to see if there are any bits set in the
43/// constant that are not demanded. If so, shrink the constant and return true.
44static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
45 const APInt &Demanded) {
46 assert(I && "No instruction?");
47 assert(OpNo < I->getNumOperands() && "Operand index too large");
48
49 // The operand must be a constant integer or splat integer.
50 Value *Op = I->getOperand(OpNo);
51 const APInt *C;
52 if (!match(Op, m_APInt(C)))
53 return false;
54
55 // If there are no bits set that aren't demanded, nothing to do.
56 if (C->isSubsetOf(Demanded))
57 return false;
58
59 // This instruction is producing bits that are not demanded. Shrink the RHS.
60 I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
61
62 return true;
63}
64
65/// Let N = 2 * M.
66/// Given an N-bit integer representing a pack of two M-bit integers,
67/// we can select one of the packed integers by right-shifting by either
68/// zero or M (which is the most straightforward to check if M is a power
69/// of 2), and then isolating the lower M bits. In this case, we can
70/// represent the shift as a select on whether the shr amount is nonzero.
72 const APInt &DemandedMask,
74 unsigned Depth) {
75 assert(I->getOpcode() == Instruction::LShr &&
76 "Only lshr instruction supported");
77
78 uint64_t ShlAmt;
79 Value *Upper, *Lower;
80 if (!match(I->getOperand(0),
83 m_Value(Lower)))))
84 return nullptr;
85
86 if (!isPowerOf2_64(ShlAmt))
87 return nullptr;
88
89 const uint64_t DemandedBitWidth = DemandedMask.getActiveBits();
90 if (DemandedBitWidth > ShlAmt)
91 return nullptr;
92
93 // Check that upper demanded bits are not lost from lshift.
94 if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth)
95 return nullptr;
96
97 KnownBits KnownLowerBits = IC.computeKnownBits(Lower, I, Depth);
98 if (!KnownLowerBits.getMaxValue().isIntN(ShlAmt))
99 return nullptr;
100
101 Value *ShrAmt = I->getOperand(1);
102 KnownBits KnownShrBits = IC.computeKnownBits(ShrAmt, I, Depth);
103
104 // Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or
105 // zero.
106 if (~KnownShrBits.Zero != ShlAmt)
107 return nullptr;
108
111 Value *ShrAmtZ =
113 ShrAmt->getName() + ".z");
114 // There is no existing !prof metadata we can derive the !prof metadata for
115 // this select.
118 Select->takeName(I);
119 return Select;
120}
121
122/// Returns the bitwidth of the given scalar or pointer type. For vector types,
123/// returns the element type's bitwidth.
124static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
125 if (unsigned BitWidth = Ty->getScalarSizeInBits())
126 return BitWidth;
127
128 return DL.getPointerTypeSizeInBits(Ty);
129}
130
131/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
132/// the instruction has any properties that allow us to simplify its operands.
134 KnownBits &Known) {
135 APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));
136 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
137 SQ.getWithInstruction(&Inst));
138 if (!V) return false;
139 if (V == &Inst) return true;
140 replaceInstUsesWith(Inst, V);
141 return true;
142}
143
144/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
145/// the instruction has any properties that allow us to simplify its operands.
150
151/// This form of SimplifyDemandedBits simplifies the specified instruction
152/// operand if possible, updating it in place. It returns true if it made any
153/// change and false otherwise.
155 const APInt &DemandedMask,
156 KnownBits &Known,
157 const SimplifyQuery &Q,
158 unsigned Depth) {
159 Use &U = I->getOperandUse(OpNo);
160 Value *V = U.get();
161 if (isa<Constant>(V)) {
162 llvm::computeKnownBits(V, Known, Q, Depth);
163 return false;
164 }
165
166 Known.resetAll();
167 if (DemandedMask.isZero()) {
168 // Not demanding any bits from V.
169 replaceUse(U, UndefValue::get(V->getType()));
170 return true;
171 }
172
174 if (!VInst) {
175 llvm::computeKnownBits(V, Known, Q, Depth);
176 return false;
177 }
178
180 return false;
181
182 Value *NewVal;
183 if (VInst->hasOneUse()) {
184 // If the instruction has one use, we can directly simplify it.
185 NewVal = SimplifyDemandedUseBits(VInst, DemandedMask, Known, Q, Depth);
186 } else {
187 // If there are multiple uses of this instruction, then we can simplify
188 // VInst to some other value, but not modify the instruction.
189 NewVal =
190 SimplifyMultipleUseDemandedBits(VInst, DemandedMask, Known, Q, Depth);
191 }
192 if (!NewVal) return false;
193 if (Instruction* OpInst = dyn_cast<Instruction>(U))
194 salvageDebugInfo(*OpInst);
195
196 replaceUse(U, NewVal);
197 return true;
198}
199
200/// This function attempts to replace V with a simpler value based on the
201/// demanded bits. When this function is called, it is known that only the bits
202/// set in DemandedMask of the result of V are ever used downstream.
203/// Consequently, depending on the mask and V, it may be possible to replace V
204/// with a constant or one of its operands. In such cases, this function does
205/// the replacement and returns true. In all other cases, it returns false after
206/// analyzing the expression and setting KnownOne and known to be one in the
207/// expression. Known.Zero contains all the bits that are known to be zero in
208/// the expression. These are provided to potentially allow the caller (which
209/// might recursively be SimplifyDemandedBits itself) to simplify the
210/// expression.
211/// Known.One and Known.Zero always follow the invariant that:
212/// Known.One & Known.Zero == 0.
213/// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero
214/// are accurate even for bits not in DemandedMask. Note
215/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
216/// be the same.
217///
218/// This returns null if it did not change anything and it permits no
219/// simplification. This returns V itself if it did some simplification of V's
220/// operands based on the information about what bits are demanded. This returns
221/// some other non-null value if it found out that V is equal to another value
222/// in the context where the specified bits are demanded, but not for all users.
224 const APInt &DemandedMask,
225 KnownBits &Known,
226 const SimplifyQuery &Q,
227 unsigned Depth) {
228 assert(I != nullptr && "Null pointer of Value???");
229 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
230 uint32_t BitWidth = DemandedMask.getBitWidth();
231 Type *VTy = I->getType();
232 assert(
233 (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
234 Known.getBitWidth() == BitWidth &&
235 "Value *V, DemandedMask and Known must have same BitWidth");
236
237 KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
238
239 // Update flags after simplifying an operand based on the fact that some high
240 // order bits are not demanded.
241 auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I,
242 unsigned NLZ) {
243 if (NLZ > 0) {
244 // Disable the nsw and nuw flags here: We can no longer guarantee that
245 // we won't wrap after simplification. Removing the nsw/nuw flags is
246 // legal here because the top bit is not demanded.
247 I->setHasNoSignedWrap(false);
248 I->setHasNoUnsignedWrap(false);
249 }
250 return I;
251 };
252
253 // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care
254 // about the high bits of the operands.
255 auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {
256 unsigned NLZ = DemandedMask.countl_zero();
257 // Right fill the mask of bits for the operands to demand the most
258 // significant bit and all those below it.
259 DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
260 if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
261 SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Q, Depth + 1) ||
262 ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
263 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1)) {
264 disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
265 return true;
266 }
267 return false;
268 };
269
270 switch (I->getOpcode()) {
271 default:
272 llvm::computeKnownBits(I, Known, Q, Depth);
273 break;
274 case Instruction::And: {
275 // If either the LHS or the RHS are Zero, the result is zero.
276 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
277 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, Q,
278 Depth + 1))
279 return I;
280
281 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
282 Q, Depth);
283
284 // If the client is only demanding bits that we know, return the known
285 // constant.
286 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
287 return Constant::getIntegerValue(VTy, Known.One);
288
289 // If all of the demanded bits are known 1 on one side, return the other.
290 // These bits cannot contribute to the result of the 'and'.
291 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
292 return I->getOperand(0);
293 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
294 return I->getOperand(1);
295
296 // If the RHS is a constant, see if we can simplify it.
297 if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
298 return I;
299
300 break;
301 }
302 case Instruction::Or: {
303 // If either the LHS or the RHS are One, the result is One.
304 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
305 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, Q,
306 Depth + 1)) {
307 // Disjoint flag may not longer hold.
308 I->dropPoisonGeneratingFlags();
309 return I;
310 }
311
312 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
313 Q, Depth);
314
315 // If the client is only demanding bits that we know, return the known
316 // constant.
317 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
318 return Constant::getIntegerValue(VTy, Known.One);
319
320 // If all of the demanded bits are known zero on one side, return the other.
321 // These bits cannot contribute to the result of the 'or'.
322 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
323 return I->getOperand(0);
324 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
325 return I->getOperand(1);
326
327 // If the RHS is a constant, see if we can simplify it.
328 if (ShrinkDemandedConstant(I, 1, DemandedMask))
329 return I;
330
331 // Infer disjoint flag if no common bits are set.
332 if (!cast<PossiblyDisjointInst>(I)->isDisjoint()) {
333 WithCache<const Value *> LHSCache(I->getOperand(0), LHSKnown),
334 RHSCache(I->getOperand(1), RHSKnown);
335 if (haveNoCommonBitsSet(LHSCache, RHSCache, Q)) {
336 cast<PossiblyDisjointInst>(I)->setIsDisjoint(true);
337 return I;
338 }
339 }
340
341 break;
342 }
343 case Instruction::Xor: {
344 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
345 SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Q, Depth + 1))
346 return I;
347 Value *LHS, *RHS;
348 if (DemandedMask == 1 &&
349 match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
350 match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
351 // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
353 Builder.SetInsertPoint(I);
354 auto *Xor = Builder.CreateXor(LHS, RHS);
355 return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);
356 }
357
358 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
359 Q, Depth);
360
361 // If the client is only demanding bits that we know, return the known
362 // constant.
363 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
364 return Constant::getIntegerValue(VTy, Known.One);
365
366 // If all of the demanded bits are known zero on one side, return the other.
367 // These bits cannot contribute to the result of the 'xor'.
368 if (DemandedMask.isSubsetOf(RHSKnown.Zero))
369 return I->getOperand(0);
370 if (DemandedMask.isSubsetOf(LHSKnown.Zero))
371 return I->getOperand(1);
372
373 // If all of the demanded bits are known to be zero on one side or the
374 // other, turn this into an *inclusive* or.
375 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
376 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
377 Instruction *Or =
378 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1));
379 if (DemandedMask.isAllOnes())
380 cast<PossiblyDisjointInst>(Or)->setIsDisjoint(true);
381 Or->takeName(I);
382 return InsertNewInstWith(Or, I->getIterator());
383 }
384
385 // If all of the demanded bits on one side are known, and all of the set
386 // bits on that side are also known to be set on the other side, turn this
387 // into an AND, as we know the bits will be cleared.
388 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
389 if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
390 RHSKnown.One.isSubsetOf(LHSKnown.One)) {
392 ~RHSKnown.One & DemandedMask);
393 Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
394 return InsertNewInstWith(And, I->getIterator());
395 }
396
397 // If the RHS is a constant, see if we can change it. Don't alter a -1
398 // constant because that's a canonical 'not' op, and that is better for
399 // combining, SCEV, and codegen.
400 const APInt *C;
401 if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnes()) {
402 if ((*C | ~DemandedMask).isAllOnes()) {
403 // Force bits to 1 to create a 'not' op.
404 I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
405 return I;
406 }
407 // If we can't turn this into a 'not', try to shrink the constant.
408 if (ShrinkDemandedConstant(I, 1, DemandedMask))
409 return I;
410 }
411
412 // If our LHS is an 'and' and if it has one use, and if any of the bits we
413 // are flipping are known to be set, then the xor is just resetting those
414 // bits to zero. We can just knock out bits from the 'and' and the 'xor',
415 // simplifying both of them.
416 if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) {
417 ConstantInt *AndRHS, *XorRHS;
418 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
419 match(I->getOperand(1), m_ConstantInt(XorRHS)) &&
420 match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) &&
421 (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
422 APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
423
424 Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue());
425 Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
426 InsertNewInstWith(NewAnd, I->getIterator());
427
428 Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue());
429 Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
430 return InsertNewInstWith(NewXor, I->getIterator());
431 }
432 }
433 break;
434 }
435 case Instruction::Select: {
436 if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Q, Depth + 1) ||
437 SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Q, Depth + 1))
438 return I;
439
440 // If the operands are constants, see if we can simplify them.
441 // This is similar to ShrinkDemandedConstant, but for a select we want to
442 // try to keep the selected constants the same as icmp value constants, if
443 // we can. This helps not break apart (or helps put back together)
444 // canonical patterns like min and max.
445 auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,
446 const APInt &DemandedMask) {
447 const APInt *SelC;
448 if (!match(I->getOperand(OpNo), m_APInt(SelC)))
449 return false;
450
451 // Get the constant out of the ICmp, if there is one.
452 // Only try this when exactly 1 operand is a constant (if both operands
453 // are constant, the icmp should eventually simplify). Otherwise, we may
454 // invert the transform that reduces set bits and infinite-loop.
455 Value *X;
456 const APInt *CmpC;
457 if (!match(I->getOperand(0), m_ICmp(m_Value(X), m_APInt(CmpC))) ||
458 isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
459 return ShrinkDemandedConstant(I, OpNo, DemandedMask);
460
461 // If the constant is already the same as the ICmp, leave it as-is.
462 if (*CmpC == *SelC)
463 return false;
464 // If the constants are not already the same, but can be with the demand
465 // mask, use the constant value from the ICmp.
466 if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) {
467 I->setOperand(OpNo, ConstantInt::get(I->getType(), *CmpC));
468 return true;
469 }
470 return ShrinkDemandedConstant(I, OpNo, DemandedMask);
471 };
472 if (CanonicalizeSelectConstant(I, 1, DemandedMask) ||
473 CanonicalizeSelectConstant(I, 2, DemandedMask))
474 return I;
475
476 // Only known if known in both the LHS and RHS.
477 adjustKnownBitsForSelectArm(LHSKnown, I->getOperand(0), I->getOperand(1),
478 /*Invert=*/false, Q, Depth);
479 adjustKnownBitsForSelectArm(RHSKnown, I->getOperand(0), I->getOperand(2),
480 /*Invert=*/true, Q, Depth);
481 Known = LHSKnown.intersectWith(RHSKnown);
482 break;
483 }
484 case Instruction::Trunc: {
485 // If we do not demand the high bits of a right-shifted and truncated value,
486 // then we may be able to truncate it before the shift.
487 Value *X;
488 const APInt *C;
489 if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
490 // The shift amount must be valid (not poison) in the narrow type, and
491 // it must not be greater than the high bits demanded of the result.
492 if (C->ult(VTy->getScalarSizeInBits()) &&
493 C->ule(DemandedMask.countl_zero())) {
494 // trunc (lshr X, C) --> lshr (trunc X), C
496 Builder.SetInsertPoint(I);
497 Value *Trunc = Builder.CreateTrunc(X, VTy);
498 return Builder.CreateLShr(Trunc, C->getZExtValue());
499 }
500 }
501 }
502 [[fallthrough]];
503 case Instruction::ZExt: {
504 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
505
506 APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
507 KnownBits InputKnown(SrcBitWidth);
508 if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Q,
509 Depth + 1)) {
510 // For zext nneg, we may have dropped the instruction which made the
511 // input non-negative.
512 I->dropPoisonGeneratingFlags();
513 return I;
514 }
515 assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
516 if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() &&
517 !InputKnown.isNegative())
518 InputKnown.makeNonNegative();
519 Known = InputKnown.zextOrTrunc(BitWidth);
520
521 break;
522 }
523 case Instruction::SExt: {
524 // Compute the bits in the result that are not present in the input.
525 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
526
527 APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
528
529 // If any of the sign extended bits are demanded, we know that the sign
530 // bit is demanded.
531 if (DemandedMask.getActiveBits() > SrcBitWidth)
532 InputDemandedBits.setBit(SrcBitWidth-1);
533
534 KnownBits InputKnown(SrcBitWidth);
535 if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Q, Depth + 1))
536 return I;
537
538 // If the input sign bit is known zero, or if the NewBits are not demanded
539 // convert this into a zero extension.
540 if (InputKnown.isNonNegative() ||
541 DemandedMask.getActiveBits() <= SrcBitWidth) {
542 // Convert to ZExt cast.
543 CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy);
544 NewCast->takeName(I);
545 return InsertNewInstWith(NewCast, I->getIterator());
546 }
547
548 // If the sign bit of the input is known set or clear, then we know the
549 // top bits of the result.
550 Known = InputKnown.sext(BitWidth);
551 break;
552 }
553 case Instruction::Add: {
554 if ((DemandedMask & 1) == 0) {
555 // If we do not need the low bit, try to convert bool math to logic:
556 // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
557 Value *X, *Y;
559 m_OneUse(m_SExt(m_Value(Y))))) &&
560 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
561 // Truth table for inputs and output signbits:
562 // X:0 | X:1
563 // ----------
564 // Y:0 | 0 | 0 |
565 // Y:1 | -1 | 0 |
566 // ----------
568 Builder.SetInsertPoint(I);
569 Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);
570 return Builder.CreateSExt(AndNot, VTy);
571 }
572
573 // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN
574 if (match(I, m_Add(m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
575 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
576 (I->getOperand(0)->hasOneUse() || I->getOperand(1)->hasOneUse())) {
577
578 // Truth table for inputs and output signbits:
579 // X:0 | X:1
580 // -----------
581 // Y:0 | -1 | -1 |
582 // Y:1 | -1 | 0 |
583 // -----------
585 Builder.SetInsertPoint(I);
586 Value *Or = Builder.CreateOr(X, Y);
587 return Builder.CreateSExt(Or, VTy);
588 }
589 }
590
591 // Right fill the mask of bits for the operands to demand the most
592 // significant bit and all those below it.
593 unsigned NLZ = DemandedMask.countl_zero();
594 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
595 if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
596 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1))
597 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
598
599 // If low order bits are not demanded and known to be zero in one operand,
600 // then we don't need to demand them from the other operand, since they
601 // can't cause overflow into any bits that are demanded in the result.
602 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
603 APInt DemandedFromLHS = DemandedFromOps;
604 DemandedFromLHS.clearLowBits(NTZ);
605 if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
606 SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Q, Depth + 1))
607 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
608
609 // If we are known to be adding zeros to every bit below
610 // the highest demanded bit, we just return the other side.
611 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
612 return I->getOperand(0);
613 if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
614 return I->getOperand(1);
615
616 // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask
617 {
618 const APInt *C;
619 if (match(I->getOperand(1), m_APInt(C)) &&
620 C->isOneBitSet(DemandedMask.getActiveBits() - 1)) {
622 Builder.SetInsertPoint(I);
623 return Builder.CreateXor(I->getOperand(0), ConstantInt::get(VTy, *C));
624 }
625 }
626
627 // Otherwise just compute the known bits of the result.
628 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
629 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
630 Known = KnownBits::add(LHSKnown, RHSKnown, NSW, NUW);
631 break;
632 }
633 case Instruction::Sub: {
634 // Right fill the mask of bits for the operands to demand the most
635 // significant bit and all those below it.
636 unsigned NLZ = DemandedMask.countl_zero();
637 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
638 if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
639 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1))
640 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
641
642 // If low order bits are not demanded and are known to be zero in RHS,
643 // then we don't need to demand them from LHS, since they can't cause a
644 // borrow from any bits that are demanded in the result.
645 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
646 APInt DemandedFromLHS = DemandedFromOps;
647 DemandedFromLHS.clearLowBits(NTZ);
648 if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
649 SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Q, Depth + 1))
650 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
651
652 // If we are known to be subtracting zeros from every bit below
653 // the highest demanded bit, we just return the other side.
654 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
655 return I->getOperand(0);
656 // We can't do this with the LHS for subtraction, unless we are only
657 // demanding the LSB.
658 if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(LHSKnown.Zero))
659 return I->getOperand(1);
660
661 // Canonicalize sub mask, X -> ~X
662 const APInt *LHSC;
663 if (match(I->getOperand(0), m_LowBitMask(LHSC)) &&
664 DemandedFromOps.isSubsetOf(*LHSC)) {
666 Builder.SetInsertPoint(I);
667 return Builder.CreateNot(I->getOperand(1));
668 }
669
670 // Otherwise just compute the known bits of the result.
671 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
672 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
673 Known = KnownBits::sub(LHSKnown, RHSKnown, NSW, NUW);
674 break;
675 }
676 case Instruction::Mul: {
677 APInt DemandedFromOps;
678 if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))
679 return I;
680
681 if (DemandedMask.isPowerOf2()) {
682 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
683 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
684 // odd (has LSB set), then the left-shifted low bit of X is the answer.
685 unsigned CTZ = DemandedMask.countr_zero();
686 const APInt *C;
687 if (match(I->getOperand(1), m_APInt(C)) && C->countr_zero() == CTZ) {
688 Constant *ShiftC = ConstantInt::get(VTy, CTZ);
689 Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);
690 return InsertNewInstWith(Shl, I->getIterator());
691 }
692 }
693 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
694 // X * X is odd iff X is odd.
695 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
696 if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) {
697 Constant *One = ConstantInt::get(VTy, 1);
698 Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One);
699 return InsertNewInstWith(And1, I->getIterator());
700 }
701
702 llvm::computeKnownBits(I, Known, Q, Depth);
703 break;
704 }
705 case Instruction::Shl: {
706 const APInt *SA;
707 if (match(I->getOperand(1), m_APInt(SA))) {
708 const APInt *ShrAmt;
709 if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt))))
710 if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0)))
711 if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA,
712 DemandedMask, Known))
713 return R;
714
715 // Do not simplify if shl is part of funnel-shift pattern
716 if (I->hasOneUse()) {
717 auto *Inst = dyn_cast<Instruction>(I->user_back());
718 if (Inst && Inst->getOpcode() == BinaryOperator::Or) {
719 if (auto Opt = convertOrOfShiftsToFunnelShift(*Inst)) {
720 auto [IID, FShiftArgs] = *Opt;
721 if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
722 FShiftArgs[0] == FShiftArgs[1]) {
723 llvm::computeKnownBits(I, Known, Q, Depth);
724 break;
725 }
726 }
727 }
728 }
729
730 // We only want bits that already match the signbit then we don't
731 // need to shift.
732 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth - 1);
733 if (DemandedMask.countr_zero() >= ShiftAmt) {
734 if (I->hasNoSignedWrap()) {
735 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
736 unsigned SignBits =
737 ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
738 if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits)
739 return I->getOperand(0);
740 }
741
742 // If we can pre-shift a right-shifted constant to the left without
743 // losing any high bits and we don't demand the low bits, then eliminate
744 // the left-shift:
745 // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X
746 Value *X;
747 Constant *C;
748 if (match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) {
749 Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
750 Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C,
751 LeftShiftAmtC, DL);
752 if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC,
753 LeftShiftAmtC, DL) == C) {
754 Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X);
755 return InsertNewInstWith(Lshr, I->getIterator());
756 }
757 }
758 }
759
760 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
761
762 // If the shift is NUW/NSW, then it does demand the high bits.
764 if (IOp->hasNoSignedWrap())
765 DemandedMaskIn.setHighBits(ShiftAmt+1);
766 else if (IOp->hasNoUnsignedWrap())
767 DemandedMaskIn.setHighBits(ShiftAmt);
768
769 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1))
770 return I;
771
772 Known = KnownBits::shl(Known,
774 /* NUW */ IOp->hasNoUnsignedWrap(),
775 /* NSW */ IOp->hasNoSignedWrap());
776 } else {
777 // This is a variable shift, so we can't shift the demand mask by a known
778 // amount. But if we are not demanding high bits, then we are not
779 // demanding those bits from the pre-shifted operand either.
780 if (unsigned CTLZ = DemandedMask.countl_zero()) {
781 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
782 if (SimplifyDemandedBits(I, 0, DemandedFromOp, Known, Q, Depth + 1)) {
783 // We can't guarantee that nsw/nuw hold after simplifying the operand.
784 I->dropPoisonGeneratingFlags();
785 return I;
786 }
787 }
788 llvm::computeKnownBits(I, Known, Q, Depth);
789 }
790 break;
791 }
792 case Instruction::LShr: {
793 const APInt *SA;
794 if (match(I->getOperand(1), m_APInt(SA))) {
795 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
796
797 // Do not simplify if lshr is part of funnel-shift pattern
798 if (I->hasOneUse()) {
799 auto *Inst = dyn_cast<Instruction>(I->user_back());
800 if (Inst && Inst->getOpcode() == BinaryOperator::Or) {
801 if (auto Opt = convertOrOfShiftsToFunnelShift(*Inst)) {
802 auto [IID, FShiftArgs] = *Opt;
803 if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
804 FShiftArgs[0] == FShiftArgs[1]) {
805 llvm::computeKnownBits(I, Known, Q, Depth);
806 break;
807 }
808 }
809 }
810 }
811
812 // If we are just demanding the shifted sign bit and below, then this can
813 // be treated as an ASHR in disguise.
814 if (DemandedMask.countl_zero() >= ShiftAmt) {
815 // If we only want bits that already match the signbit then we don't
816 // need to shift.
817 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
818 unsigned SignBits =
819 ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
820 if (SignBits >= NumHiDemandedBits)
821 return I->getOperand(0);
822
823 // If we can pre-shift a left-shifted constant to the right without
824 // losing any low bits (we already know we don't demand the high bits),
825 // then eliminate the right-shift:
826 // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
827 Value *X;
828 Constant *C;
829 if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {
830 Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
831 Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::LShr, C,
832 RightShiftAmtC, DL);
833 if (ConstantFoldBinaryOpOperands(Instruction::Shl, NewC,
834 RightShiftAmtC, DL) == C) {
835 Instruction *Shl = BinaryOperator::CreateShl(NewC, X);
836 return InsertNewInstWith(Shl, I->getIterator());
837 }
838 }
839
840 const APInt *Factor;
841 if (match(I->getOperand(0),
842 m_OneUse(m_Mul(m_Value(X), m_APInt(Factor)))) &&
843 Factor->countr_zero() >= ShiftAmt) {
844 BinaryOperator *Mul = BinaryOperator::CreateMul(
845 X, ConstantInt::get(X->getType(), Factor->lshr(ShiftAmt)));
846 return InsertNewInstWith(Mul, I->getIterator());
847 }
848 }
849
850 // Unsigned shift right.
851 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
852 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1)) {
853 // exact flag may not longer hold.
854 I->dropPoisonGeneratingFlags();
855 return I;
856 }
857 Known >>= ShiftAmt;
858 if (ShiftAmt)
859 Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
860 break;
861 }
862 if (Value *V =
863 simplifyShiftSelectingPackedElement(I, DemandedMask, *this, Depth))
864 return V;
865
866 llvm::computeKnownBits(I, Known, Q, Depth);
867 break;
868 }
869 case Instruction::AShr: {
870 unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
871
872 // If we only want bits that already match the signbit then we don't need
873 // to shift.
874 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
875 if (SignBits >= NumHiDemandedBits)
876 return I->getOperand(0);
877
878 // If this is an arithmetic shift right and only the low-bit is set, we can
879 // always convert this into a logical shr, even if the shift amount is
880 // variable. The low bit of the shift cannot be an input sign bit unless
881 // the shift amount is >= the size of the datatype, which is undefined.
882 if (DemandedMask.isOne()) {
883 // Perform the logical shift right.
884 Instruction *NewVal = BinaryOperator::CreateLShr(
885 I->getOperand(0), I->getOperand(1), I->getName());
886 return InsertNewInstWith(NewVal, I->getIterator());
887 }
888
889 const APInt *SA;
890 if (match(I->getOperand(1), m_APInt(SA))) {
891 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
892
893 // Signed shift right.
894 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
895 // If any of the bits being shifted in are demanded, then we should set
896 // the sign bit as demanded.
897 bool ShiftedInBitsDemanded = DemandedMask.countl_zero() < ShiftAmt;
898 if (ShiftedInBitsDemanded)
899 DemandedMaskIn.setSignBit();
900 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1)) {
901 // exact flag may not longer hold.
902 I->dropPoisonGeneratingFlags();
903 return I;
904 }
905
906 // If the input sign bit is known to be zero, or if none of the shifted in
907 // bits are demanded, turn this into an unsigned shift right.
908 if (Known.Zero[BitWidth - 1] || !ShiftedInBitsDemanded) {
909 BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
910 I->getOperand(1));
911 LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
912 LShr->takeName(I);
913 return InsertNewInstWith(LShr, I->getIterator());
914 }
915
916 Known = KnownBits::ashr(
917 Known, KnownBits::makeConstant(APInt(BitWidth, ShiftAmt)),
918 ShiftAmt != 0, I->isExact());
919 } else {
920 llvm::computeKnownBits(I, Known, Q, Depth);
921 }
922 break;
923 }
924 case Instruction::UDiv: {
925 // UDiv doesn't demand low bits that are zero in the divisor.
926 const APInt *SA;
927 if (match(I->getOperand(1), m_APInt(SA))) {
928 // TODO: Take the demanded mask of the result into account.
929 unsigned RHSTrailingZeros = SA->countr_zero();
930 APInt DemandedMaskIn =
931 APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros);
932 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Q, Depth + 1)) {
933 // We can't guarantee that "exact" is still true after changing the
934 // the dividend.
935 I->dropPoisonGeneratingFlags();
936 return I;
937 }
938
939 Known = KnownBits::udiv(LHSKnown, KnownBits::makeConstant(*SA),
940 cast<BinaryOperator>(I)->isExact());
941 } else {
942 llvm::computeKnownBits(I, Known, Q, Depth);
943 }
944 break;
945 }
946 case Instruction::SRem: {
947 const APInt *Rem;
948 if (match(I->getOperand(1), m_APInt(Rem)) && Rem->isPowerOf2()) {
949 if (DemandedMask.ult(*Rem)) // srem won't affect demanded bits
950 return I->getOperand(0);
951
952 APInt LowBits = *Rem - 1;
953 APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
954 if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Q, Depth + 1))
955 return I;
956 Known = KnownBits::srem(LHSKnown, KnownBits::makeConstant(*Rem));
957 break;
958 }
959
960 llvm::computeKnownBits(I, Known, Q, Depth);
961 break;
962 }
963 case Instruction::Call: {
964 bool KnownBitsComputed = false;
966 switch (II->getIntrinsicID()) {
967 case Intrinsic::abs: {
968 if (DemandedMask == 1)
969 return II->getArgOperand(0);
970 break;
971 }
972 case Intrinsic::ctpop: {
973 // Checking if the number of clear bits is odd (parity)? If the type has
974 // an even number of bits, that's the same as checking if the number of
975 // set bits is odd, so we can eliminate the 'not' op.
976 Value *X;
977 if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 &&
978 match(II->getArgOperand(0), m_Not(m_Value(X)))) {
980 II->getModule(), Intrinsic::ctpop, VTy);
981 return InsertNewInstWith(CallInst::Create(Ctpop, {X}), I->getIterator());
982 }
983 break;
984 }
985 case Intrinsic::bswap: {
986 // If the only bits demanded come from one byte of the bswap result,
987 // just shift the input byte into position to eliminate the bswap.
988 unsigned NLZ = DemandedMask.countl_zero();
989 unsigned NTZ = DemandedMask.countr_zero();
990
991 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
992 // we need all the bits down to bit 8. Likewise, round NLZ. If we
993 // have 14 leading zeros, round to 8.
994 NLZ = alignDown(NLZ, 8);
995 NTZ = alignDown(NTZ, 8);
996 // If we need exactly one byte, we can do this transformation.
997 if (BitWidth - NLZ - NTZ == 8) {
998 // Replace this with either a left or right shift to get the byte into
999 // the right place.
1000 Instruction *NewVal;
1001 if (NLZ > NTZ)
1002 NewVal = BinaryOperator::CreateLShr(
1003 II->getArgOperand(0), ConstantInt::get(VTy, NLZ - NTZ));
1004 else
1005 NewVal = BinaryOperator::CreateShl(
1006 II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ));
1007 NewVal->takeName(I);
1008 return InsertNewInstWith(NewVal, I->getIterator());
1009 }
1010 break;
1011 }
1012 case Intrinsic::ptrmask: {
1013 unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();
1014 RHSKnown = KnownBits(MaskWidth);
1015 // If either the LHS or the RHS are Zero, the result is zero.
1016 if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Q, Depth + 1) ||
1018 I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),
1019 RHSKnown, Q, Depth + 1))
1020 return I;
1021
1022 // TODO: Should be 1-extend
1023 RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);
1024
1025 Known = LHSKnown & RHSKnown;
1026 KnownBitsComputed = true;
1027
1028 // If the client is only demanding bits we know to be zero, return
1029 // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
1030 // provenance, but making the mask zero will be easily optimizable in
1031 // the backend.
1032 if (DemandedMask.isSubsetOf(Known.Zero) &&
1033 !match(I->getOperand(1), m_Zero()))
1034 return replaceOperand(
1035 *I, 1, Constant::getNullValue(I->getOperand(1)->getType()));
1036
1037 // Mask in demanded space does nothing.
1038 // NOTE: We may have attributes associated with the return value of the
1039 // llvm.ptrmask intrinsic that will be lost when we just return the
1040 // operand. We should try to preserve them.
1041 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
1042 return I->getOperand(0);
1043
1044 // If the RHS is a constant, see if we can simplify it.
1046 I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))
1047 return I;
1048
1049 // Combine:
1050 // (ptrmask (getelementptr i8, ptr p, imm i), imm mask)
1051 // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask)
1052 // where only the low bits known to be zero in the pointer are changed
1053 Value *InnerPtr;
1054 uint64_t GEPIndex;
1055 uint64_t PtrMaskImmediate;
1057 m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)),
1058 m_ConstantInt(PtrMaskImmediate)))) {
1059
1060 LHSKnown = computeKnownBits(InnerPtr, I, Depth + 1);
1061 if (!LHSKnown.isZero()) {
1062 const unsigned trailingZeros = LHSKnown.countMinTrailingZeros();
1063 uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1;
1064
1065 uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits;
1066 uint64_t MaskedLowBitsGEPIndex =
1067 GEPIndex & PointerAlignBits & PtrMaskImmediate;
1068
1069 uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;
1070
1071 if (MaskedGEPIndex != GEPIndex) {
1072 auto *GEP = cast<GEPOperator>(II->getArgOperand(0));
1073 Builder.SetInsertPoint(I);
1074 Type *GEPIndexType =
1075 DL.getIndexType(GEP->getPointerOperand()->getType());
1076 Value *MaskedGEP = Builder.CreateGEP(
1077 GEP->getSourceElementType(), InnerPtr,
1078 ConstantInt::get(GEPIndexType, MaskedGEPIndex),
1079 GEP->getName(), GEP->isInBounds());
1080
1081 replaceOperand(*I, 0, MaskedGEP);
1082 return I;
1083 }
1084 }
1085 }
1086
1087 break;
1088 }
1089
1090 case Intrinsic::fshr:
1091 case Intrinsic::fshl: {
1092 const APInt *SA;
1093 if (!match(I->getOperand(2), m_APInt(SA)))
1094 break;
1095
1096 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
1097 // defined, so no need to special-case zero shifts here.
1098 uint64_t ShiftAmt = SA->urem(BitWidth);
1099 if (II->getIntrinsicID() == Intrinsic::fshr)
1100 ShiftAmt = BitWidth - ShiftAmt;
1101
1102 APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
1103 APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
1104 if (I->getOperand(0) != I->getOperand(1)) {
1105 if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Q,
1106 Depth + 1) ||
1107 SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Q,
1108 Depth + 1)) {
1109 // Range attribute or metadata may no longer hold.
1110 I->dropPoisonGeneratingAnnotations();
1111 return I;
1112 }
1113 } else { // fshl is a rotate
1114 // Avoid converting rotate into funnel shift.
1115 // Only simplify if one operand is constant.
1116 LHSKnown = computeKnownBits(I->getOperand(0), I, Depth + 1);
1117 if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One) &&
1118 !match(I->getOperand(0), m_SpecificInt(LHSKnown.One))) {
1119 replaceOperand(*I, 0, Constant::getIntegerValue(VTy, LHSKnown.One));
1120 return I;
1121 }
1122
1123 RHSKnown = computeKnownBits(I->getOperand(1), I, Depth + 1);
1124 if (DemandedMaskRHS.isSubsetOf(RHSKnown.Zero | RHSKnown.One) &&
1125 !match(I->getOperand(1), m_SpecificInt(RHSKnown.One))) {
1126 replaceOperand(*I, 1, Constant::getIntegerValue(VTy, RHSKnown.One));
1127 return I;
1128 }
1129 }
1130
1131 LHSKnown <<= ShiftAmt;
1132 RHSKnown >>= BitWidth - ShiftAmt;
1133 Known = LHSKnown.unionWith(RHSKnown);
1134 KnownBitsComputed = true;
1135 break;
1136 }
1137 case Intrinsic::umax: {
1138 // UMax(A, C) == A if ...
1139 // The lowest non-zero bit of DemandMask is higher than the highest
1140 // non-zero bit of C.
1141 const APInt *C;
1142 unsigned CTZ = DemandedMask.countr_zero();
1143 if (match(II->getArgOperand(1), m_APInt(C)) &&
1144 CTZ >= C->getActiveBits())
1145 return II->getArgOperand(0);
1146 break;
1147 }
1148 case Intrinsic::umin: {
1149 // UMin(A, C) == A if ...
1150 // The lowest non-zero bit of DemandMask is higher than the highest
1151 // non-one bit of C.
1152 // This comes from using DeMorgans on the above umax example.
1153 const APInt *C;
1154 unsigned CTZ = DemandedMask.countr_zero();
1155 if (match(II->getArgOperand(1), m_APInt(C)) &&
1156 CTZ >= C->getBitWidth() - C->countl_one())
1157 return II->getArgOperand(0);
1158 break;
1159 }
1160 default: {
1161 // Handle target specific intrinsics
1162 std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
1163 *II, DemandedMask, Known, KnownBitsComputed);
1164 if (V)
1165 return *V;
1166 break;
1167 }
1168 }
1169 }
1170
1171 if (!KnownBitsComputed)
1172 llvm::computeKnownBits(I, Known, Q, Depth);
1173 break;
1174 }
1175 }
1176
1177 if (I->getType()->isPointerTy()) {
1178 Align Alignment = I->getPointerAlignment(DL);
1179 Known.Zero.setLowBits(Log2(Alignment));
1180 }
1181
1182 // If the client is only demanding bits that we know, return the known
1183 // constant. We can't directly simplify pointers as a constant because of
1184 // pointer provenance.
1185 // TODO: We could return `(inttoptr const)` for pointers.
1186 if (!I->getType()->isPointerTy() &&
1187 DemandedMask.isSubsetOf(Known.Zero | Known.One))
1188 return Constant::getIntegerValue(VTy, Known.One);
1189
1190 if (VerifyKnownBits) {
1191 KnownBits ReferenceKnown = llvm::computeKnownBits(I, Q, Depth);
1192 if (Known != ReferenceKnown) {
1193 errs() << "Mismatched known bits for " << *I << " in "
1194 << I->getFunction()->getName() << "\n";
1195 errs() << "computeKnownBits(): " << ReferenceKnown << "\n";
1196 errs() << "SimplifyDemandedBits(): " << Known << "\n";
1197 std::abort();
1198 }
1199 }
1200
1201 return nullptr;
1202}
1203
1204/// Helper routine of SimplifyDemandedUseBits. It computes Known
1205/// bits. It also tries to handle simplifications that can be done based on
1206/// DemandedMask, but without modifying the Instruction.
1208 Instruction *I, const APInt &DemandedMask, KnownBits &Known,
1209 const SimplifyQuery &Q, unsigned Depth) {
1210 unsigned BitWidth = DemandedMask.getBitWidth();
1211 Type *ITy = I->getType();
1212
1213 KnownBits LHSKnown(BitWidth);
1214 KnownBits RHSKnown(BitWidth);
1215
1216 // Despite the fact that we can't simplify this instruction in all User's
1217 // context, we can at least compute the known bits, and we can
1218 // do simplifications that apply to *just* the one user if we know that
1219 // this instruction has a simpler value in that context.
1220 switch (I->getOpcode()) {
1221 case Instruction::And: {
1222 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1223 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1224 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1225 Q, Depth);
1227
1228 // If the client is only demanding bits that we know, return the known
1229 // constant.
1230 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1231 return Constant::getIntegerValue(ITy, Known.One);
1232
1233 // If all of the demanded bits are known 1 on one side, return the other.
1234 // These bits cannot contribute to the result of the 'and' in this context.
1235 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
1236 return I->getOperand(0);
1237 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
1238 return I->getOperand(1);
1239
1240 break;
1241 }
1242 case Instruction::Or: {
1243 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1244 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1245 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1246 Q, Depth);
1248
1249 // If the client is only demanding bits that we know, return the known
1250 // constant.
1251 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1252 return Constant::getIntegerValue(ITy, Known.One);
1253
1254 // We can simplify (X|Y) -> X or Y in the user's context if we know that
1255 // only bits from X or Y are demanded.
1256 // If all of the demanded bits are known zero on one side, return the other.
1257 // These bits cannot contribute to the result of the 'or' in this context.
1258 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
1259 return I->getOperand(0);
1260 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
1261 return I->getOperand(1);
1262
1263 break;
1264 }
1265 case Instruction::Xor: {
1266 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1267 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1268 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1269 Q, Depth);
1271
1272 // If the client is only demanding bits that we know, return the known
1273 // constant.
1274 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1275 return Constant::getIntegerValue(ITy, Known.One);
1276
1277 // We can simplify (X^Y) -> X or Y in the user's context if we know that
1278 // only bits from X or Y are demanded.
1279 // If all of the demanded bits are known zero on one side, return the other.
1280 if (DemandedMask.isSubsetOf(RHSKnown.Zero))
1281 return I->getOperand(0);
1282 if (DemandedMask.isSubsetOf(LHSKnown.Zero))
1283 return I->getOperand(1);
1284
1285 break;
1286 }
1287 case Instruction::Add: {
1288 unsigned NLZ = DemandedMask.countl_zero();
1289 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
1290
1291 // If an operand adds zeros to every bit below the highest demanded bit,
1292 // that operand doesn't change the result. Return the other side.
1293 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1294 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
1295 return I->getOperand(0);
1296
1297 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1298 if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
1299 return I->getOperand(1);
1300
1301 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1302 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
1303 Known = KnownBits::add(LHSKnown, RHSKnown, NSW, NUW);
1305 break;
1306 }
1307 case Instruction::Sub: {
1308 unsigned NLZ = DemandedMask.countl_zero();
1309 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
1310
1311 // If an operand subtracts zeros from every bit below the highest demanded
1312 // bit, that operand doesn't change the result. Return the other side.
1313 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1314 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
1315 return I->getOperand(0);
1316
1317 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1318 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
1319 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1320 Known = KnownBits::sub(LHSKnown, RHSKnown, NSW, NUW);
1322 break;
1323 }
1324 case Instruction::AShr: {
1325 // Compute the Known bits to simplify things downstream.
1326 llvm::computeKnownBits(I, Known, Q, Depth);
1327
1328 // If this user is only demanding bits that we know, return the known
1329 // constant.
1330 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1331 return Constant::getIntegerValue(ITy, Known.One);
1332
1333 // If the right shift operand 0 is a result of a left shift by the same
1334 // amount, this is probably a zero/sign extension, which may be unnecessary,
1335 // if we do not demand any of the new sign bits. So, return the original
1336 // operand instead.
1337 const APInt *ShiftRC;
1338 const APInt *ShiftLC;
1339 Value *X;
1340 unsigned BitWidth = DemandedMask.getBitWidth();
1341 if (match(I,
1342 m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) &&
1343 ShiftLC == ShiftRC && ShiftLC->ult(BitWidth) &&
1344 DemandedMask.isSubsetOf(APInt::getLowBitsSet(
1345 BitWidth, BitWidth - ShiftRC->getZExtValue()))) {
1346 return X;
1347 }
1348
1349 break;
1350 }
1351 default:
1352 // Compute the Known bits to simplify things downstream.
1353 llvm::computeKnownBits(I, Known, Q, Depth);
1354
1355 // If this user is only demanding bits that we know, return the known
1356 // constant.
1357 if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
1358 return Constant::getIntegerValue(ITy, Known.One);
1359
1360 break;
1361 }
1362
1363 return nullptr;
1364}
1365
1366/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
1367/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
1368/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
1369/// of "C2-C1".
1370///
1371/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
1372/// ..., bn}, without considering the specific value X is holding.
1373/// This transformation is legal iff one of following conditions is hold:
1374/// 1) All the bit in S are 0, in this case E1 == E2.
1375/// 2) We don't care those bits in S, per the input DemandedMask.
1376/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
1377/// rest bits.
1378///
1379/// Currently we only test condition 2).
1380///
1381/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
1382/// not successful.
1384 Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
1385 const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {
1386 if (!ShlOp1 || !ShrOp1)
1387 return nullptr; // No-op.
1388
1389 Value *VarX = Shr->getOperand(0);
1390 Type *Ty = VarX->getType();
1391 unsigned BitWidth = Ty->getScalarSizeInBits();
1392 if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
1393 return nullptr; // Undef.
1394
1395 unsigned ShlAmt = ShlOp1.getZExtValue();
1396 unsigned ShrAmt = ShrOp1.getZExtValue();
1397
1398 Known.One.clearAllBits();
1399 Known.Zero.setLowBits(ShlAmt - 1);
1400 Known.Zero &= DemandedMask;
1401
1402 APInt BitMask1(APInt::getAllOnes(BitWidth));
1403 APInt BitMask2(APInt::getAllOnes(BitWidth));
1404
1405 bool isLshr = (Shr->getOpcode() == Instruction::LShr);
1406 BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
1407 (BitMask1.ashr(ShrAmt) << ShlAmt);
1408
1409 if (ShrAmt <= ShlAmt) {
1410 BitMask2 <<= (ShlAmt - ShrAmt);
1411 } else {
1412 BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
1413 BitMask2.ashr(ShrAmt - ShlAmt);
1414 }
1415
1416 // Check if condition-2 (see the comment to this function) is satified.
1417 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
1418 if (ShrAmt == ShlAmt)
1419 return VarX;
1420
1421 if (!Shr->hasOneUse())
1422 return nullptr;
1423
1424 BinaryOperator *New;
1425 if (ShrAmt < ShlAmt) {
1426 Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
1427 New = BinaryOperator::CreateShl(VarX, Amt);
1429 New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
1430 New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
1431 } else {
1432 Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
1433 New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
1434 BinaryOperator::CreateAShr(VarX, Amt);
1435 if (cast<BinaryOperator>(Shr)->isExact())
1436 New->setIsExact(true);
1437 }
1438
1439 return InsertNewInstWith(New, Shl->getIterator());
1440 }
1441
1442 return nullptr;
1443}
1444
1445/// The specified value produces a vector with any number of elements.
1446/// This method analyzes which elements of the operand are poison and
1447/// returns that information in PoisonElts.
1448///
1449/// DemandedElts contains the set of elements that are actually used by the
1450/// caller, and by default (AllowMultipleUsers equals false) the value is
1451/// simplified only if it has a single caller. If AllowMultipleUsers is set
1452/// to true, DemandedElts refers to the union of sets of elements that are
1453/// used by all callers.
1454///
1455/// If the information about demanded elements can be used to simplify the
1456/// operation, the operation is simplified, then the resultant value is
1457/// returned. This returns null if no change was made.
1459 APInt DemandedElts,
1460 APInt &PoisonElts,
1461 unsigned Depth,
1462 bool AllowMultipleUsers) {
1463 // Cannot analyze scalable type. The number of vector elements is not a
1464 // compile-time constant.
1465 if (isa<ScalableVectorType>(V->getType()))
1466 return nullptr;
1467
1468 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
1469 APInt EltMask(APInt::getAllOnes(VWidth));
1470 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
1471
1472 if (match(V, m_Poison())) {
1473 // If the entire vector is poison, just return this info.
1474 PoisonElts = EltMask;
1475 return nullptr;
1476 }
1477
1478 if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
1479 PoisonElts = EltMask;
1480 return PoisonValue::get(V->getType());
1481 }
1482
1483 PoisonElts = 0;
1484
1485 if (auto *C = dyn_cast<Constant>(V)) {
1486 // Check if this is identity. If so, return 0 since we are not simplifying
1487 // anything.
1488 if (DemandedElts.isAllOnes())
1489 return nullptr;
1490
1491 Type *EltTy = cast<VectorType>(V->getType())->getElementType();
1494 for (unsigned i = 0; i != VWidth; ++i) {
1495 if (!DemandedElts[i]) { // If not demanded, set to poison.
1496 Elts.push_back(Poison);
1497 PoisonElts.setBit(i);
1498 continue;
1499 }
1500
1501 Constant *Elt = C->getAggregateElement(i);
1502 if (!Elt) return nullptr;
1503
1504 Elts.push_back(Elt);
1505 if (isa<PoisonValue>(Elt)) // Already poison.
1506 PoisonElts.setBit(i);
1507 }
1508
1509 // If we changed the constant, return it.
1510 Constant *NewCV = ConstantVector::get(Elts);
1511 return NewCV != C ? NewCV : nullptr;
1512 }
1513
1514 // Limit search depth.
1516 return nullptr;
1517
1518 if (!AllowMultipleUsers) {
1519 // If multiple users are using the root value, proceed with
1520 // simplification conservatively assuming that all elements
1521 // are needed.
1522 if (!V->hasOneUse()) {
1523 // Quit if we find multiple users of a non-root value though.
1524 // They'll be handled when it's their turn to be visited by
1525 // the main instcombine process.
1526 if (Depth != 0)
1527 // TODO: Just compute the PoisonElts information recursively.
1528 return nullptr;
1529
1530 // Conservatively assume that all elements are needed.
1531 DemandedElts = EltMask;
1532 }
1533 }
1534
1536 if (!I) return nullptr; // Only analyze instructions.
1537
1538 bool MadeChange = false;
1539 auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
1540 APInt Demanded, APInt &Undef) {
1541 auto *II = dyn_cast<IntrinsicInst>(Inst);
1542 Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
1543 if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
1544 replaceOperand(*Inst, OpNum, V);
1545 MadeChange = true;
1546 }
1547 };
1548
1549 APInt PoisonElts2(VWidth, 0);
1550 APInt PoisonElts3(VWidth, 0);
1551 switch (I->getOpcode()) {
1552 default: break;
1553
1554 case Instruction::GetElementPtr: {
1555 // The LangRef requires that struct geps have all constant indices. As
1556 // such, we can't convert any operand to partial undef.
1557 auto mayIndexStructType = [](GetElementPtrInst &GEP) {
1558 for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
1559 I != E; I++)
1560 if (I.isStruct())
1561 return true;
1562 return false;
1563 };
1564 if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
1565 break;
1566
1567 // Conservatively track the demanded elements back through any vector
1568 // operands we may have. We know there must be at least one, or we
1569 // wouldn't have a vector result to get here. Note that we intentionally
1570 // merge the undef bits here since gepping with either an poison base or
1571 // index results in poison.
1572 for (unsigned i = 0; i < I->getNumOperands(); i++) {
1573 if (i == 0 ? match(I->getOperand(i), m_Undef())
1574 : match(I->getOperand(i), m_Poison())) {
1575 // If the entire vector is undefined, just return this info.
1576 PoisonElts = EltMask;
1577 return nullptr;
1578 }
1579 if (I->getOperand(i)->getType()->isVectorTy()) {
1580 APInt PoisonEltsOp(VWidth, 0);
1581 simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp);
1582 // gep(x, undef) is not undef, so skip considering idx ops here
1583 // Note that we could propagate poison, but we can't distinguish between
1584 // undef & poison bits ATM
1585 if (i == 0)
1586 PoisonElts |= PoisonEltsOp;
1587 }
1588 }
1589
1590 break;
1591 }
1592 case Instruction::InsertElement: {
1593 // If this is a variable index, we don't know which element it overwrites.
1594 // demand exactly the same input as we produce.
1595 ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
1596 if (!Idx) {
1597 // Note that we can't propagate undef elt info, because we don't know
1598 // which elt is getting updated.
1599 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2);
1600 break;
1601 }
1602
1603 // The element inserted overwrites whatever was there, so the input demanded
1604 // set is simpler than the output set.
1605 unsigned IdxNo = Idx->getZExtValue();
1606 APInt PreInsertDemandedElts = DemandedElts;
1607 if (IdxNo < VWidth)
1608 PreInsertDemandedElts.clearBit(IdxNo);
1609
1610 // If we only demand the element that is being inserted and that element
1611 // was extracted from the same index in another vector with the same type,
1612 // replace this insert with that other vector.
1613 // Note: This is attempted before the call to simplifyAndSetOp because that
1614 // may change PoisonElts to a value that does not match with Vec.
1615 Value *Vec;
1616 if (PreInsertDemandedElts == 0 &&
1617 match(I->getOperand(1),
1618 m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) &&
1619 Vec->getType() == I->getType()) {
1620 return Vec;
1621 }
1622
1623 simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts);
1624
1625 // If this is inserting an element that isn't demanded, remove this
1626 // insertelement.
1627 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
1628 Worklist.push(I);
1629 return I->getOperand(0);
1630 }
1631
1632 // The inserted element is defined.
1633 PoisonElts.clearBit(IdxNo);
1634 break;
1635 }
1636 case Instruction::ShuffleVector: {
1637 auto *Shuffle = cast<ShuffleVectorInst>(I);
1638 assert(Shuffle->getOperand(0)->getType() ==
1639 Shuffle->getOperand(1)->getType() &&
1640 "Expected shuffle operands to have same type");
1641 unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType())
1642 ->getNumElements();
1643 // Handle trivial case of a splat. Only check the first element of LHS
1644 // operand.
1645 if (all_of(Shuffle->getShuffleMask(), equal_to(0)) &&
1646 DemandedElts.isAllOnes()) {
1647 if (!isa<PoisonValue>(I->getOperand(1))) {
1648 I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType()));
1649 MadeChange = true;
1650 }
1651 APInt LeftDemanded(OpWidth, 1);
1652 APInt LHSPoisonElts(OpWidth, 0);
1653 simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
1654 if (LHSPoisonElts[0])
1655 PoisonElts = EltMask;
1656 else
1657 PoisonElts.clearAllBits();
1658 break;
1659 }
1660
1661 APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);
1662 for (unsigned i = 0; i < VWidth; i++) {
1663 if (DemandedElts[i]) {
1664 unsigned MaskVal = Shuffle->getMaskValue(i);
1665 if (MaskVal != -1u) {
1666 assert(MaskVal < OpWidth * 2 &&
1667 "shufflevector mask index out of range!");
1668 if (MaskVal < OpWidth)
1669 LeftDemanded.setBit(MaskVal);
1670 else
1671 RightDemanded.setBit(MaskVal - OpWidth);
1672 }
1673 }
1674 }
1675
1676 APInt LHSPoisonElts(OpWidth, 0);
1677 simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
1678
1679 APInt RHSPoisonElts(OpWidth, 0);
1680 simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts);
1681
1682 // If this shuffle does not change the vector length and the elements
1683 // demanded by this shuffle are an identity mask, then this shuffle is
1684 // unnecessary.
1685 //
1686 // We are assuming canonical form for the mask, so the source vector is
1687 // operand 0 and operand 1 is not used.
1688 //
1689 // Note that if an element is demanded and this shuffle mask is undefined
1690 // for that element, then the shuffle is not considered an identity
1691 // operation. The shuffle prevents poison from the operand vector from
1692 // leaking to the result by replacing poison with an undefined value.
1693 if (VWidth == OpWidth) {
1694 bool IsIdentityShuffle = true;
1695 for (unsigned i = 0; i < VWidth; i++) {
1696 unsigned MaskVal = Shuffle->getMaskValue(i);
1697 if (DemandedElts[i] && i != MaskVal) {
1698 IsIdentityShuffle = false;
1699 break;
1700 }
1701 }
1702 if (IsIdentityShuffle)
1703 return Shuffle->getOperand(0);
1704 }
1705
1706 bool NewPoisonElts = false;
1707 unsigned LHSIdx = -1u, LHSValIdx = -1u;
1708 unsigned RHSIdx = -1u, RHSValIdx = -1u;
1709 bool LHSUniform = true;
1710 bool RHSUniform = true;
1711 for (unsigned i = 0; i < VWidth; i++) {
1712 unsigned MaskVal = Shuffle->getMaskValue(i);
1713 if (MaskVal == -1u) {
1714 PoisonElts.setBit(i);
1715 } else if (!DemandedElts[i]) {
1716 NewPoisonElts = true;
1717 PoisonElts.setBit(i);
1718 } else if (MaskVal < OpWidth) {
1719 if (LHSPoisonElts[MaskVal]) {
1720 NewPoisonElts = true;
1721 PoisonElts.setBit(i);
1722 } else {
1723 LHSIdx = LHSIdx == -1u ? i : OpWidth;
1724 LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
1725 LHSUniform = LHSUniform && (MaskVal == i);
1726 }
1727 } else {
1728 if (RHSPoisonElts[MaskVal - OpWidth]) {
1729 NewPoisonElts = true;
1730 PoisonElts.setBit(i);
1731 } else {
1732 RHSIdx = RHSIdx == -1u ? i : OpWidth;
1733 RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
1734 RHSUniform = RHSUniform && (MaskVal - OpWidth == i);
1735 }
1736 }
1737 }
1738
1739 // Try to transform shuffle with constant vector and single element from
1740 // this constant vector to single insertelement instruction.
1741 // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1742 // insertelement V, C[ci], ci-n
1743 if (OpWidth ==
1744 cast<FixedVectorType>(Shuffle->getType())->getNumElements()) {
1745 Value *Op = nullptr;
1746 Constant *Value = nullptr;
1747 unsigned Idx = -1u;
1748
1749 // Find constant vector with the single element in shuffle (LHS or RHS).
1750 if (LHSIdx < OpWidth && RHSUniform) {
1751 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
1752 Op = Shuffle->getOperand(1);
1753 Value = CV->getOperand(LHSValIdx);
1754 Idx = LHSIdx;
1755 }
1756 }
1757 if (RHSIdx < OpWidth && LHSUniform) {
1758 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
1759 Op = Shuffle->getOperand(0);
1760 Value = CV->getOperand(RHSValIdx);
1761 Idx = RHSIdx;
1762 }
1763 }
1764 // Found constant vector with single element - convert to insertelement.
1765 if (Op && Value) {
1767 Op, Value, ConstantInt::get(Type::getInt64Ty(I->getContext()), Idx),
1768 Shuffle->getName());
1769 InsertNewInstWith(New, Shuffle->getIterator());
1770 return New;
1771 }
1772 }
1773 if (NewPoisonElts) {
1774 // Add additional discovered undefs.
1776 for (unsigned i = 0; i < VWidth; ++i) {
1777 if (PoisonElts[i])
1779 else
1780 Elts.push_back(Shuffle->getMaskValue(i));
1781 }
1782 Shuffle->setShuffleMask(Elts);
1783 MadeChange = true;
1784 }
1785 break;
1786 }
1787 case Instruction::Select: {
1788 // If this is a vector select, try to transform the select condition based
1789 // on the current demanded elements.
1791 if (Sel->getCondition()->getType()->isVectorTy()) {
1792 // TODO: We are not doing anything with PoisonElts based on this call.
1793 // It is overwritten below based on the other select operands. If an
1794 // element of the select condition is known undef, then we are free to
1795 // choose the output value from either arm of the select. If we know that
1796 // one of those values is undef, then the output can be undef.
1797 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1798 }
1799
1800 // Next, see if we can transform the arms of the select.
1801 APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
1802 if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
1803 for (unsigned i = 0; i < VWidth; i++) {
1804 Constant *CElt = CV->getAggregateElement(i);
1805
1806 // isNullValue() always returns false when called on a ConstantExpr.
1807 if (CElt->isNullValue())
1808 DemandedLHS.clearBit(i);
1809 else if (CElt->isOneValue())
1810 DemandedRHS.clearBit(i);
1811 }
1812 }
1813
1814 simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2);
1815 simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3);
1816
1817 // Output elements are undefined if the element from each arm is undefined.
1818 // TODO: This can be improved. See comment in select condition handling.
1819 PoisonElts = PoisonElts2 & PoisonElts3;
1820 break;
1821 }
1822 case Instruction::BitCast: {
1823 // Vector->vector casts only.
1824 VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
1825 if (!VTy) break;
1826 unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();
1827 APInt InputDemandedElts(InVWidth, 0);
1828 PoisonElts2 = APInt(InVWidth, 0);
1829 unsigned Ratio;
1830
1831 if (VWidth == InVWidth) {
1832 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1833 // elements as are demanded of us.
1834 Ratio = 1;
1835 InputDemandedElts = DemandedElts;
1836 } else if ((VWidth % InVWidth) == 0) {
1837 // If the number of elements in the output is a multiple of the number of
1838 // elements in the input then an input element is live if any of the
1839 // corresponding output elements are live.
1840 Ratio = VWidth / InVWidth;
1841 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1842 if (DemandedElts[OutIdx])
1843 InputDemandedElts.setBit(OutIdx / Ratio);
1844 } else if ((InVWidth % VWidth) == 0) {
1845 // If the number of elements in the input is a multiple of the number of
1846 // elements in the output then an input element is live if the
1847 // corresponding output element is live.
1848 Ratio = InVWidth / VWidth;
1849 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1850 if (DemandedElts[InIdx / Ratio])
1851 InputDemandedElts.setBit(InIdx);
1852 } else {
1853 // Unsupported so far.
1854 break;
1855 }
1856
1857 simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2);
1858
1859 if (VWidth == InVWidth) {
1860 PoisonElts = PoisonElts2;
1861 } else if ((VWidth % InVWidth) == 0) {
1862 // If the number of elements in the output is a multiple of the number of
1863 // elements in the input then an output element is undef if the
1864 // corresponding input element is undef.
1865 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1866 if (PoisonElts2[OutIdx / Ratio])
1867 PoisonElts.setBit(OutIdx);
1868 } else if ((InVWidth % VWidth) == 0) {
1869 // If the number of elements in the input is a multiple of the number of
1870 // elements in the output then an output element is undef if all of the
1871 // corresponding input elements are undef.
1872 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
1873 APInt SubUndef = PoisonElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
1874 if (SubUndef.popcount() == Ratio)
1875 PoisonElts.setBit(OutIdx);
1876 }
1877 } else {
1878 llvm_unreachable("Unimp");
1879 }
1880 break;
1881 }
1882 case Instruction::FPTrunc:
1883 case Instruction::FPExt:
1884 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1885 break;
1886
1887 case Instruction::Call: {
1889 if (!II) break;
1890 switch (II->getIntrinsicID()) {
1891 case Intrinsic::masked_gather: // fallthrough
1892 case Intrinsic::masked_load: {
1893 // Subtlety: If we load from a pointer, the pointer must be valid
1894 // regardless of whether the element is demanded. Doing otherwise risks
1895 // segfaults which didn't exist in the original program.
1896 APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
1897 DemandedPassThrough(DemandedElts);
1898 if (auto *CMask = dyn_cast<Constant>(II->getOperand(1))) {
1899 for (unsigned i = 0; i < VWidth; i++) {
1900 if (Constant *CElt = CMask->getAggregateElement(i)) {
1901 if (CElt->isNullValue())
1902 DemandedPtrs.clearBit(i);
1903 else if (CElt->isAllOnesValue())
1904 DemandedPassThrough.clearBit(i);
1905 }
1906 }
1907 }
1908
1909 if (II->getIntrinsicID() == Intrinsic::masked_gather)
1910 simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
1911 simplifyAndSetOp(II, 2, DemandedPassThrough, PoisonElts3);
1912
1913 // Output elements are undefined if the element from both sources are.
1914 // TODO: can strengthen via mask as well.
1915 PoisonElts = PoisonElts2 & PoisonElts3;
1916 break;
1917 }
1918 default: {
1919 // Handle target specific intrinsics
1920 std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
1921 *II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
1922 simplifyAndSetOp);
1923 if (V)
1924 return *V;
1925 break;
1926 }
1927 } // switch on IntrinsicID
1928 break;
1929 } // case Call
1930 } // switch on Opcode
1931
1932 // TODO: We bail completely on integer div/rem and shifts because they have
1933 // UB/poison potential, but that should be refined.
1934 BinaryOperator *BO;
1935 if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
1936 Value *X = BO->getOperand(0);
1937 Value *Y = BO->getOperand(1);
1938
1939 // Look for an equivalent binop except that one operand has been shuffled.
1940 // If the demand for this binop only includes elements that are the same as
1941 // the other binop, then we may be able to replace this binop with a use of
1942 // the earlier one.
1943 //
1944 // Example:
1945 // %other_bo = bo (shuf X, {0}), Y
1946 // %this_extracted_bo = extelt (bo X, Y), 0
1947 // -->
1948 // %other_bo = bo (shuf X, {0}), Y
1949 // %this_extracted_bo = extelt %other_bo, 0
1950 //
1951 // TODO: Handle demand of an arbitrary single element or more than one
1952 // element instead of just element 0.
1953 // TODO: Unlike general demanded elements transforms, this should be safe
1954 // for any (div/rem/shift) opcode too.
1955 if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() &&
1956 BO->hasOneUse() ) {
1957
1958 auto findShufBO = [&](bool MatchShufAsOp0) -> User * {
1959 // Try to use shuffle-of-operand in place of an operand:
1960 // bo X, Y --> bo (shuf X), Y
1961 // bo X, Y --> bo X, (shuf Y)
1962
1963 Value *OtherOp = MatchShufAsOp0 ? Y : X;
1964 if (!OtherOp->hasUseList())
1965 return nullptr;
1966
1967 BinaryOperator::BinaryOps Opcode = BO->getOpcode();
1968 Value *ShufOp = MatchShufAsOp0 ? X : Y;
1969
1970 for (User *U : OtherOp->users()) {
1971 ArrayRef<int> Mask;
1972 auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_Mask(Mask));
1973 if (BO->isCommutative()
1974 ? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
1975 : MatchShufAsOp0
1976 ? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
1977 : match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf)))
1978 if (match(Mask, m_ZeroMask()) && Mask[0] != PoisonMaskElem)
1979 if (DT.dominates(U, I))
1980 return U;
1981 }
1982 return nullptr;
1983 };
1984
1985 if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true))
1986 return ShufBO;
1987 if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false))
1988 return ShufBO;
1989 }
1990
1991 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1992 simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2);
1993
1994 // Output elements are undefined if both are undefined. Consider things
1995 // like undef & 0. The result is known zero, not undef.
1996 PoisonElts &= PoisonElts2;
1997 }
1998
1999 // If we've proven all of the lanes poison, return a poison value.
2000 // TODO: Intersect w/demanded lanes
2001 if (PoisonElts.isAllOnes())
2002 return PoisonValue::get(I->getType());
2003
2004 return MadeChange ? I : nullptr;
2005}
2006
2007/// For floating-point classes that resolve to a single bit pattern, return that
2008/// value.
2010 bool IsCanonicalizing = false) {
2011 if (Mask == fcNone)
2012 return PoisonValue::get(Ty);
2013
2014 if (Mask == fcPosZero)
2015 return Constant::getNullValue(Ty);
2016
2017 // Turn any possible snans into quiet if we can.
2018 if (Mask == fcNan && IsCanonicalizing)
2019 return ConstantFP::getQNaN(Ty);
2020
2021 // TODO: Support aggregate types that are allowed by FPMathOperator.
2022 if (Ty->isAggregateType())
2023 return nullptr;
2024
2025 switch (Mask) {
2026 case fcNegZero:
2027 return ConstantFP::getZero(Ty, true);
2028 case fcPosInf:
2029 return ConstantFP::getInfinity(Ty);
2030 case fcNegInf:
2031 return ConstantFP::getInfinity(Ty, true);
2032 default:
2033 return nullptr;
2034 }
2035}
2036
2037/// Perform multiple-use aware simplfications for fabs(\p Src). Returns a
2038/// replacement value if it's simplified, otherwise nullptr. Updates \p Known
2039/// with the known fpclass if not simplified.
2041 FPClassTest DemandedMask,
2042 KnownFPClass KnownSrc, bool NSZ) {
2043 if ((DemandedMask & fcNan) == fcNone)
2044 KnownSrc.knownNot(fcNan);
2045 if ((DemandedMask & fcInf) == fcNone)
2046 KnownSrc.knownNot(fcInf);
2047
2048 if (KnownSrc.SignBit == false ||
2049 ((DemandedMask & fcNan) == fcNone && KnownSrc.isKnownNever(fcNegative)))
2050 return Src;
2051
2052 // If the only sign bit difference is due to -0, ignore it with nsz
2053 if (NSZ &&
2055 return Src;
2056
2057 Known = KnownFPClass::fabs(KnownSrc);
2058 return nullptr;
2059}
2060
2061static Value *
2063 const CallInst *CI, FPClassTest DemandedMask,
2064 KnownFPClass KnownLHS, KnownFPClass KnownRHS,
2065 const Function &F, bool NSZ) {
2066 const bool PropagateNaN =
2067 IID == Intrinsic::maximum || IID == Intrinsic::minimum;
2068
2069 /// Propagate nnan-ness to simplify edge case checks.
2070 if (PropagateNaN && (DemandedMask & fcNan) == fcNone) {
2071 KnownLHS.knownNot(fcNan);
2072 KnownRHS.knownNot(fcNan);
2073 }
2074
2075 bool OrderedZeroSign = !NSZ;
2076
2078 switch (IID) {
2079 case Intrinsic::maximum: {
2081
2082 // If one operand is known greater than the other, it must be that
2083 // operand unless the other is a nan.
2085 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2086 KnownRHS.isKnownNever(fcNan))
2087 return CI->getArgOperand(0);
2088
2090 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2091 KnownLHS.isKnownNever(fcNan))
2092 return CI->getArgOperand(1);
2093
2094 break;
2095 }
2096 case Intrinsic::minimum: {
2098
2099 // If one operand is known less than the other, it must be that operand
2100 // unless the other is a nan.
2102 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2103 KnownRHS.isKnownNever(fcNan))
2104 return CI->getArgOperand(0);
2105
2107 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2108 KnownLHS.isKnownNever(fcNan))
2109 return CI->getArgOperand(1);
2110
2111 break;
2112 }
2113 case Intrinsic::maximumnum: {
2115
2117 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2118 KnownLHS.isKnownNever(fcNan))
2119 return CI->getArgOperand(0);
2120
2122 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2123 KnownRHS.isKnownNever(fcNan))
2124 return CI->getArgOperand(1);
2125
2126 break;
2127 }
2128 case Intrinsic::minimumnum: {
2130
2132 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2133 KnownLHS.isKnownNever(fcNan))
2134 return CI->getArgOperand(0);
2135
2137 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2138 KnownRHS.isKnownNever(fcNan))
2139 return CI->getArgOperand(1);
2140
2141 break;
2142 }
2143 default:
2144 llvm_unreachable("not a min/max intrinsic");
2145 }
2146
2147 Type *EltTy = CI->getType()->getScalarType();
2148 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2149 Known = KnownFPClass::minMaxLike(KnownLHS, KnownRHS, OpKind, Mode);
2150
2151 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2152 return getFPClassConstant(CI->getType(), ValidResults,
2153 /*IsCanonicalizing=*/true);
2154}
2155
2156/// Try to set an inferred no-nans or no-infs in \p FMF. \p
2157/// ValidResults is a mask of known valid results for the operator
2158/// (already computed from the result, and the known operand inputs,
2159/// \p KnownLHS and \p KnownRHS)
2160static FastMathFlags
2162 const KnownFPClass &KnownLHS,
2163 const KnownFPClass &KnownRHS) {
2164 if (!FMF.noNaNs() && (ValidResults & fcNan) == fcNone &&
2165 KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN())
2166 FMF.setNoNaNs();
2167
2168 if (!FMF.noInfs() && (ValidResults & fcInf) == fcNone &&
2169 KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity())
2170 FMF.setNoInfs();
2171
2172 return FMF;
2173}
2174
2176 FastMathFlags FMF) {
2177 if (FMF.noNaNs())
2178 DemandedMask &= ~fcNan;
2179
2180 if (FMF.noInfs())
2181 DemandedMask &= ~fcInf;
2182 return DemandedMask;
2183}
2184
2186 FPClassTest DemandedMask,
2187 KnownFPClass &Known,
2188 Instruction *CxtI,
2189 unsigned Depth) {
2190 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2191 assert(Known == KnownFPClass() && "expected uninitialized state");
2192 assert(I->hasOneUse() && "wrong version called");
2193
2194 Type *VTy = I->getType();
2195
2196 FastMathFlags FMF;
2197 if (auto *FPOp = dyn_cast<FPMathOperator>(I)) {
2198 FMF = FPOp->getFastMathFlags();
2199 DemandedMask = adjustDemandedMaskFromFlags(DemandedMask, FMF);
2200 }
2201
2202 // Remove unwanted results from the computed result
2203 scope_exit ApplyDemandedMask(
2204 [=, &Known]() { Known.knownNot(~DemandedMask); });
2205
2206 switch (I->getOpcode()) {
2207 case Instruction::FNeg: {
2208 if (SimplifyDemandedFPClass(I, 0, llvm::fneg(DemandedMask), Known,
2209 Depth + 1))
2210 return I;
2211 Known.fneg();
2212 break;
2213 }
2214 case Instruction::FAdd: {
2215 KnownFPClass KnownLHS, KnownRHS;
2216
2218
2219 // fadd x, x can be handled more aggressively.
2220 if (I->getOperand(0) == I->getOperand(1) &&
2221 isGuaranteedNotToBeUndef(I->getOperand(0), SQ.AC, CxtI, SQ.DT,
2222 Depth + 1)) {
2223 Type *EltTy = VTy->getScalarType();
2224 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2225
2226 FPClassTest SrcDemandedMask = DemandedMask;
2227
2228 // Doubling a subnormal could have resulted in a normal value.
2229 if (DemandedMask & fcPosNormal)
2230 SrcDemandedMask |= fcPosSubnormal;
2231 if (DemandedMask & fcNegNormal)
2232 SrcDemandedMask |= fcNegSubnormal;
2233
2234 // Doubling a subnormal may produce 0 if FTZ/DAZ.
2235 if (Mode != DenormalMode::getIEEE()) {
2236 if (DemandedMask & fcPosZero) {
2237 SrcDemandedMask |= fcPosSubnormal;
2238
2239 if (Mode.inputsMayBePositiveZero() || Mode.outputsMayBePositiveZero())
2240 SrcDemandedMask |= fcNegSubnormal;
2241 }
2242
2243 if (DemandedMask & fcNegZero)
2244 SrcDemandedMask |= fcNegSubnormal;
2245 }
2246
2247 // Doubling a normal could have resulted in an infinity.
2248 if (DemandedMask & fcPosInf)
2249 SrcDemandedMask |= fcPosNormal;
2250 if (DemandedMask & fcNegInf)
2251 SrcDemandedMask |= fcNegNormal;
2252
2253 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2254 return I;
2255
2256 Known = KnownFPClass::fadd_self(KnownLHS, Mode);
2257 KnownRHS = KnownLHS;
2258 } else {
2259 FPClassTest SrcDemandedMask = fcFinite;
2260
2261 // inf + (-inf) = nan
2262 if (DemandedMask & fcNan)
2263 SrcDemandedMask |= fcNan | fcInf;
2264
2265 if (DemandedMask & fcInf)
2266 SrcDemandedMask |= fcInf;
2267
2268 if (SimplifyDemandedFPClass(I, 1, SrcDemandedMask, KnownRHS, Depth + 1) ||
2269 SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2270 return I;
2271
2272 Type *EltTy = VTy->getScalarType();
2273 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2274 Known = KnownFPClass::fadd(KnownLHS, KnownRHS, Mode);
2275 }
2276
2277 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2278 if (Constant *SingleVal =
2279 getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true))
2280 return SingleVal;
2281
2282 // Propagate known result to simplify edge case checks.
2283 bool ResultNotNan = (DemandedMask & fcNan) == fcNone;
2284 if (ResultNotNan) {
2285 KnownLHS.knownNot(fcNan);
2286 KnownRHS.knownNot(fcNan);
2287 }
2288
2289 // With nnan: X + {+/-}Inf --> {+/-}Inf
2290 if (ResultNotNan && KnownRHS.isKnownAlways(fcInf | fcNan) &&
2291 KnownLHS.isKnownNever(fcNan))
2292 return I->getOperand(1);
2293
2294 // With nnan: {+/-}Inf + X --> {+/-}Inf
2295 if (ResultNotNan && KnownLHS.isKnownAlways(fcInf | fcNan) &&
2296 KnownRHS.isKnownNever(fcNan))
2297 return I->getOperand(0);
2298
2299 FastMathFlags InferredFMF =
2300 inferFastMathValueFlagsBinOp(FMF, ValidResults, KnownLHS, KnownRHS);
2301 if (InferredFMF != FMF) {
2302 I->setFastMathFlags(InferredFMF);
2303 return I;
2304 }
2305
2306 return nullptr;
2307 }
2308 case Instruction::FMul: {
2309 KnownFPClass KnownLHS, KnownRHS;
2310
2311 Value *X = I->getOperand(0);
2312 Value *Y = I->getOperand(1);
2313
2314 FPClassTest SrcDemandedMask =
2315 DemandedMask & (fcNan | fcZero | fcSubnormal | fcNormal);
2316
2317 if (DemandedMask & fcInf) {
2318 // mul x, inf = inf
2319 // mul large_x, large_y = inf
2320 SrcDemandedMask |= fcSubnormal | fcNormal | fcInf;
2321 }
2322
2323 if (DemandedMask & fcNan) {
2324 // mul +/-inf, 0 => nan
2325 SrcDemandedMask |= fcZero | fcInf;
2326
2327 // TODO: Mode check
2328 // mul +/-inf, sub => nan if daz
2329 SrcDemandedMask |= fcSubnormal;
2330 }
2331
2332 // mul normal, subnormal = normal
2333 // Normal inputs may result in underflow.
2334 if (DemandedMask & (fcNormal | fcSubnormal))
2335 SrcDemandedMask |= fcNormal | fcSubnormal;
2336
2337 if (DemandedMask & fcZero)
2338 SrcDemandedMask |= fcNormal | fcSubnormal;
2339
2341 if (X == Y && isGuaranteedNotToBeUndef(X, SQ.AC, CxtI, SQ.DT, Depth + 1)) {
2342 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2343 return I;
2344 Type *EltTy = VTy->getScalarType();
2345
2346 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2347 Known = KnownFPClass::square(KnownLHS, Mode);
2348
2349 // Propagate known result to simplify edge case checks.
2350 if ((DemandedMask & fcNan) == fcNone)
2351 Known.knownNot(fcNan);
2352 if ((DemandedMask & fcPosInf) == fcNone)
2353 Known.knownNot(fcInf);
2354
2355 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2356 if (Constant *Folded =
2357 getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true))
2358 return Folded;
2359
2360 if (Known.isKnownAlways(fcPosZero | fcPosInf | fcNan)) {
2361 assert(KnownLHS.isKnownNever(fcPosNormal));
2362
2363 // We can skip the fabs if the source was already known positive.
2364 if (KnownLHS.isKnownAlways(fcPositive))
2365 return X;
2366
2367 // => fabs(x), in case this was a -inf or -0.
2368 // Note: Dropping canonicalize.
2370 Builder.SetInsertPoint(I);
2371 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FMF);
2372 Fabs->takeName(I);
2373 return Fabs;
2374 }
2375
2376 return nullptr;
2377 }
2378
2379 if (SimplifyDemandedFPClass(I, 1, SrcDemandedMask, KnownRHS, Depth + 1) ||
2380 SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2381 return I;
2382
2383 // Propagate nnan-ness to sources to simplify source checks.
2384 if ((DemandedMask & fcNan) == fcNone) {
2385 KnownLHS.knownNot(fcNan);
2386 KnownRHS.knownNot(fcNan);
2387 }
2388
2389 if (FMF.noInfs()) {
2390 // Flag implies inputs cannot be infinity.
2391 KnownLHS.knownNot(fcInf);
2392 KnownRHS.knownNot(fcInf);
2393 }
2394
2395 bool NonNanResult = (DemandedMask & fcNan) == fcNone;
2396
2397 // With no-nans/no-infs:
2398 // X * 0.0 --> copysign(0.0, X)
2399 // X * -0.0 --> copysign(0.0, -X)
2400 if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) &&
2401 KnownRHS.isKnownAlways(fcPosZero | fcNan)) {
2402 // => copysign(+0, lhs)
2403 // Note: Dropping canonicalize
2404 Value *Copysign = Builder.CreateCopySign(Y, X, FMF);
2405 Copysign->takeName(I);
2406 return Copysign;
2407 }
2408
2409 if (KnownLHS.isKnownAlways(fcPosZero | fcNan) &&
2410 (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) {
2411 // => copysign(+0, rhs)
2412 // Note: Dropping canonicalize
2413 Value *Copysign = Builder.CreateCopySign(X, Y, FMF);
2414 Copysign->takeName(I);
2415 return Copysign;
2416 }
2417
2418 if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) &&
2419 KnownRHS.isKnownAlways(fcNegZero | fcNan)) {
2420 // => copysign(0, fneg(lhs))
2421 // Note: Dropping canonicalize
2422 Value *Copysign =
2423 Builder.CreateCopySign(Y, Builder.CreateFNegFMF(X, FMF), FMF);
2424 Copysign->takeName(I);
2425 return Copysign;
2426 }
2427
2428 if (KnownLHS.isKnownAlways(fcNegZero | fcNan) &&
2429 (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) {
2430 // => copysign(+0, fneg(rhs))
2431 // Note: Dropping canonicalize
2432 Value *Copysign =
2433 Builder.CreateCopySign(X, Builder.CreateFNegFMF(Y, FMF), FMF);
2434 Copysign->takeName(I);
2435 return Copysign;
2436 }
2437
2438 Type *EltTy = VTy->getScalarType();
2439 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2440
2441 if (KnownLHS.isKnownAlways(fcInf | fcNan) &&
2442 (KnownRHS.isKnownNeverNaN() &&
2443 KnownRHS.cannotBeOrderedGreaterEqZero(Mode))) {
2444 // Note: Dropping canonicalize
2445 Value *Neg = Builder.CreateFNegFMF(X, FMF);
2446 Neg->takeName(I);
2447 return Neg;
2448 }
2449
2450 if (KnownRHS.isKnownAlways(fcInf | fcNan) &&
2451 (KnownLHS.isKnownNeverNaN() &&
2452 KnownLHS.cannotBeOrderedGreaterEqZero(Mode))) {
2453 // Note: Dropping canonicalize
2454 Value *Neg = Builder.CreateFNegFMF(Y, FMF);
2455 Neg->takeName(I);
2456 return Neg;
2457 }
2458
2459 Known = KnownFPClass::fmul(KnownLHS, KnownRHS, Mode);
2460
2461 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2462 if (Constant *SingleVal =
2463 getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true))
2464 return SingleVal;
2465
2466 FastMathFlags InferredFMF =
2467 inferFastMathValueFlagsBinOp(FMF, ValidResults, KnownLHS, KnownRHS);
2468 if (InferredFMF != FMF) {
2469 I->setFastMathFlags(InferredFMF);
2470 return I;
2471 }
2472
2473 return nullptr;
2474 }
2475 case Instruction::FPExt: {
2476 FPClassTest SrcDemandedMask = DemandedMask;
2477
2478 // No subnormal result does not imply not-subnormal in the source type.
2479 if ((DemandedMask & fcNegNormal) != fcNone)
2480 SrcDemandedMask |= fcNegSubnormal;
2481 if ((DemandedMask & fcPosNormal) != fcNone)
2482 SrcDemandedMask |= fcPosSubnormal;
2483
2484 KnownFPClass KnownSrc;
2485 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
2486 return I;
2487
2488 const fltSemantics &DstTy = VTy->getScalarType()->getFltSemantics();
2489 const fltSemantics &SrcTy =
2490 I->getOperand(0)->getType()->getScalarType()->getFltSemantics();
2491
2492 Known = KnownFPClass::fpext(KnownSrc, DstTy, SrcTy);
2493 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2494
2495 return getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true);
2496 }
2497 case Instruction::Call: {
2498 CallInst *CI = cast<CallInst>(I);
2499 const Intrinsic::ID IID = CI->getIntrinsicID();
2500 switch (IID) {
2501 case Intrinsic::fabs: {
2502 KnownFPClass KnownSrc;
2503 if (SimplifyDemandedFPClass(I, 0, llvm::inverse_fabs(DemandedMask),
2504 KnownSrc, Depth + 1))
2505 return I;
2506
2507 if (Value *Simplified = simplifyDemandedFPClassFabs(
2508 Known, CI->getArgOperand(0), DemandedMask, KnownSrc,
2509 FMF.noSignedZeros()))
2510 return Simplified;
2511 break;
2512 }
2513 case Intrinsic::arithmetic_fence:
2514 if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))
2515 return I;
2516 break;
2517 case Intrinsic::copysign: {
2518 // Flip on more potentially demanded classes
2519 const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(DemandedMask);
2520 if (SimplifyDemandedFPClass(I, 0, DemandedMaskAnySign, Known, Depth + 1))
2521 return I;
2522
2523 if ((DemandedMask & fcNegative) == DemandedMask) {
2524 // Roundabout way of replacing with fneg(fabs)
2525 CI->setOperand(1, ConstantFP::get(VTy, -1.0));
2526 return I;
2527 }
2528
2529 if ((DemandedMask & fcPositive) == DemandedMask) {
2530 // Roundabout way of replacing with fabs
2531 CI->setOperand(1, ConstantFP::getZero(VTy));
2532 return I;
2533 }
2534
2536 fcAllFlags, CxtI, Depth + 1);
2537
2538 if (Known.SignBit && KnownSign.SignBit &&
2539 *Known.SignBit == *KnownSign.SignBit)
2540 return CI->getOperand(0);
2541
2542 // TODO: Call argument attribute not considered
2543 // Input implied not-nan from flag.
2544 if (FMF.noNaNs())
2545 KnownSign.knownNot(fcNan);
2546
2547 if (KnownSign.SignBit == false) {
2549 CI->setOperand(1, ConstantFP::getZero(VTy));
2550 return I;
2551 }
2552
2553 if (KnownSign.SignBit == true) {
2555 CI->setOperand(1, ConstantFP::get(VTy, -1.0));
2556 return I;
2557 }
2558
2559 Known.copysign(KnownSign);
2560 break;
2561 }
2562 case Intrinsic::maximum:
2563 case Intrinsic::minimum:
2564 case Intrinsic::maximumnum:
2565 case Intrinsic::minimumnum: {
2566 const bool PropagateNaN =
2567 IID == Intrinsic::maximum || IID == Intrinsic::minimum;
2568
2569 // We can't tell much based on the demanded result without inspecting the
2570 // operands (e.g., a known-positive result could have been clamped), but
2571 // we can still prune known-nan inputs.
2572 FPClassTest SrcDemandedMask =
2573 PropagateNaN ? DemandedMask | ~fcNan : fcAllFlags;
2574
2575 KnownFPClass KnownLHS, KnownRHS;
2576 if (SimplifyDemandedFPClass(CI, 1, SrcDemandedMask, KnownRHS,
2577 Depth + 1) ||
2578 SimplifyDemandedFPClass(CI, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2579 return I;
2580
2581 Value *Simplified =
2582 simplifyDemandedFPClassMinMax(Known, IID, CI, DemandedMask, KnownLHS,
2583 KnownRHS, F, FMF.noSignedZeros());
2584 if (Simplified)
2585 return Simplified;
2586
2587 auto *FPOp = cast<FPMathOperator>(CI);
2588
2589 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2590 FastMathFlags InferredFMF = FMF;
2591
2592 if (!FMF.noSignedZeros()) {
2593 // Add NSZ flag if we know the result will not be sensitive to the sign
2594 // of 0.
2595 FPClassTest ZeroMask = fcZero;
2596
2597 Type *EltTy = VTy->getScalarType();
2598 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2599 if (Mode != DenormalMode::getIEEE())
2600 ZeroMask |= fcSubnormal;
2601
2602 bool ResultNotLogical0 = (ValidResults & ZeroMask) == fcNone;
2603 if (ResultNotLogical0 || ((KnownLHS.isKnownNeverLogicalNegZero(Mode) ||
2604 KnownRHS.isKnownNeverLogicalPosZero(Mode)) &&
2605 (KnownLHS.isKnownNeverLogicalPosZero(Mode) ||
2606 KnownRHS.isKnownNeverLogicalNegZero(Mode))))
2607 InferredFMF.setNoSignedZeros(true);
2608 }
2609
2610 if (!FMF.noNaNs() &&
2611 ((PropagateNaN && (ValidResults & fcNan) == fcNone) ||
2612 (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN()))) {
2614 InferredFMF.setNoNaNs(true);
2615 }
2616
2617 if (InferredFMF != FMF) {
2618 CI->setFastMathFlags(InferredFMF);
2619 return FPOp;
2620 }
2621
2622 return nullptr;
2623 }
2624 case Intrinsic::exp:
2625 case Intrinsic::exp2:
2626 case Intrinsic::exp10: {
2627 if ((DemandedMask & fcPositive) == fcNone) {
2628 // Only returns positive values or nans.
2629 if ((DemandedMask & fcNan) == fcNone)
2630 return PoisonValue::get(VTy);
2631
2632 // Only need nan propagation.
2633 if ((DemandedMask & ~fcNan) == fcNone)
2634 return ConstantFP::getQNaN(VTy);
2635
2636 return CI->getArgOperand(0);
2637 }
2638
2639 FPClassTest SrcDemandedMask = DemandedMask & fcNan;
2640
2641 if (DemandedMask & fcZero) {
2642 // exp(-infinity) = 0
2643 SrcDemandedMask |= fcNegInf;
2644
2645 // exp(-largest_normal) = 0
2646 //
2647 // Negative numbers of sufficiently large magnitude underflow to 0. No
2648 // subnormal input has a 0 result.
2649 SrcDemandedMask |= fcNegNormal;
2650 }
2651
2652 if (DemandedMask & fcPosSubnormal) {
2653 // Negative numbers of sufficiently large magnitude underflow to 0. No
2654 // subnormal input has a 0 result.
2655 SrcDemandedMask |= fcNegNormal;
2656 }
2657
2658 if (DemandedMask & fcPosNormal) {
2659 // exp(0) = 1
2660 // exp(+/- smallest_normal) = 1
2661 // exp(+/- largest_denormal) = 1
2662 // exp(+/- smallest_denormal) = 1
2663 // exp(-1) = pos normal
2664 SrcDemandedMask |= fcNormal | fcSubnormal | fcZero;
2665 }
2666
2667 // exp(inf), exp(largest_normal) = inf
2668 if (DemandedMask & fcPosInf)
2669 SrcDemandedMask |= fcPosInf | fcPosNormal;
2670
2671 KnownFPClass KnownSrc;
2672
2673 // TODO: This could really make use of KnownFPClass of specific value
2674 // range, (i.e., close enough to 1)
2675 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
2676 return I;
2677
2678 /// Propagate nnan-ness to simplify edge case checks.
2679 if ((DemandedMask & fcNan) == fcNone)
2680 KnownSrc.knownNot(fcNan);
2681
2682 // exp(+/-0) = 1
2683 if (KnownSrc.isKnownAlways(fcZero))
2684 return ConstantFP::get(VTy, 1.0);
2685
2686 // Only perform nan propagation.
2687 // Note: Dropping canonicalize / quiet of signaling nan.
2688 if (KnownSrc.isKnownAlways(fcNan))
2689 return CI->getArgOperand(0);
2690
2691 // exp(0 | nan) => x == 0.0 ? 1.0 : x
2692 if (KnownSrc.isKnownAlways(fcZero | fcNan)) {
2694 Builder.SetInsertPoint(CI);
2695
2696 // fadd +/-0, 1.0 => 1.0
2697 // fadd nan, 1.0 => nan
2698 return Builder.CreateFAddFMF(CI->getArgOperand(0),
2699 ConstantFP::get(VTy, 1.0), FMF);
2700 }
2701
2702 if (KnownSrc.isKnownAlways(fcInf | fcNan)) {
2703 // exp(-inf) = 0
2704 // exp(+inf) = +inf
2706 Builder.SetInsertPoint(CI);
2707
2708 // Note: Dropping canonicalize / quiet of signaling nan.
2709 Value *X = CI->getArgOperand(0);
2710 Value *IsPosInfOrNan = Builder.CreateFCmpFMF(
2712 // We do not know whether an infinity or a NaN is more likely here,
2713 // so mark the branch weights as unkown.
2714 Value *ZeroOrInf = Builder.CreateSelectFMFWithUnknownProfile(
2715 IsPosInfOrNan, X, ConstantFP::getZero(VTy), FMF, DEBUG_TYPE);
2716 return ZeroOrInf;
2717 }
2718
2719 Known = KnownFPClass::exp(KnownSrc);
2720
2721 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2722 return getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true);
2723 }
2724 case Intrinsic::log:
2725 case Intrinsic::log2:
2726 case Intrinsic::log10: {
2727 FPClassTest DemandedSrcMask = DemandedMask & (fcNan | fcPosInf);
2728
2729 Type *EltTy = VTy->getScalarType();
2730 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2731
2732 // log(x < 0) = nan
2733 if (DemandedMask & fcNan)
2734 DemandedSrcMask |= (fcNegative & ~fcNegZero);
2735
2736 // log(0) = -inf
2737 if (DemandedMask & fcNegInf) {
2738 DemandedSrcMask |= fcZero;
2739
2740 // No value produces subnormal result.
2741 if (Mode.inputsMayBeZero())
2742 DemandedSrcMask |= fcSubnormal;
2743 }
2744
2745 if (DemandedMask & fcNormal)
2746 DemandedSrcMask |= fcNormal | fcSubnormal;
2747
2748 // log(1) = 0
2749 if (DemandedMask & fcZero)
2750 DemandedSrcMask |= fcPosNormal;
2751
2752 KnownFPClass KnownSrc;
2753 if (SimplifyDemandedFPClass(I, 0, DemandedSrcMask, KnownSrc, Depth + 1))
2754 return I;
2755
2756 Known = KnownFPClass::log(KnownSrc, Mode);
2757
2758 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2759 return getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true);
2760 }
2761 case Intrinsic::sqrt: {
2762 FPClassTest DemandedSrcMask =
2763 DemandedMask & (fcNegZero | fcPositive | fcNan);
2764
2765 if (DemandedMask & fcNan)
2766 DemandedSrcMask |= (fcNegative & ~fcNegZero);
2767
2768 // sqrt(max_subnormal) is a normal value
2769 if (DemandedMask & fcPosNormal)
2770 DemandedSrcMask |= fcPosSubnormal;
2771
2772 KnownFPClass KnownSrc;
2773 if (SimplifyDemandedFPClass(I, 0, DemandedSrcMask, KnownSrc, Depth + 1))
2774 return I;
2775
2776 Type *EltTy = VTy->getScalarType();
2777 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2778
2779 // sqrt(-x) = nan, but be careful of negative subnormals flushed to 0.
2780 if (KnownSrc.isKnownNever(fcPositive) &&
2781 KnownSrc.isKnownNeverLogicalZero(Mode))
2782 return ConstantFP::getQNaN(VTy);
2783
2784 Known = KnownFPClass::sqrt(KnownSrc, Mode);
2785 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2786
2787 if (ValidResults == fcZero) {
2788 if (FMF.noSignedZeros())
2789 return ConstantFP::getZero(VTy);
2790
2791 Value *Copysign = Builder.CreateCopySign(ConstantFP::getZero(VTy),
2792 CI->getArgOperand(0), FMF);
2793 Copysign->takeName(CI);
2794 return Copysign;
2795 }
2796
2797 return getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true);
2798 }
2799 case Intrinsic::trunc:
2800 case Intrinsic::floor:
2801 case Intrinsic::ceil:
2802 case Intrinsic::rint:
2803 case Intrinsic::nearbyint:
2804 case Intrinsic::round:
2805 case Intrinsic::roundeven: {
2806 FPClassTest DemandedSrcMask = DemandedMask;
2807
2808 // Zero results imply valid subnormal sources.
2809 if (DemandedMask & fcNegZero)
2810 DemandedSrcMask |= fcNegSubnormal | fcNegNormal;
2811
2812 if (DemandedMask & fcPosZero)
2813 DemandedSrcMask |= fcPosSubnormal | fcPosNormal;
2814
2815 KnownFPClass KnownSrc;
2816 if (SimplifyDemandedFPClass(CI, 0, DemandedSrcMask, KnownSrc, Depth + 1))
2817 return I;
2818
2819 // Note: Possibly dropping snan quiet.
2820 if (KnownSrc.isKnownAlways(fcInf | fcNan | fcZero))
2821 return CI->getArgOperand(0);
2822
2823 // Propagate nnan-ness to source to simplify source checks.
2824 if ((DemandedMask & fcNan) == fcNone)
2825 KnownSrc.knownNot(fcNan);
2826
2827 bool IsRoundNearestOrTrunc =
2828 IID == Intrinsic::round || IID == Intrinsic::roundeven ||
2829 IID == Intrinsic::nearbyint || IID == Intrinsic::rint ||
2830 IID == Intrinsic::trunc;
2831
2832 // Ignore denormals-as-zero, as canonicalization is not mandated.
2833 if ((IID == Intrinsic::floor || IsRoundNearestOrTrunc) &&
2835 return ConstantFP::getZero(VTy);
2836
2837 if ((IID == Intrinsic::ceil || IsRoundNearestOrTrunc) &&
2839 return ConstantFP::getZero(VTy, true);
2840
2841 if (IID == Intrinsic::floor && KnownSrc.isKnownAlways(fcNegSubnormal))
2842 return ConstantFP::get(VTy, -1.0);
2843
2844 if (IID == Intrinsic::ceil && KnownSrc.isKnownAlways(fcPosSubnormal))
2845 return ConstantFP::get(VTy, 1.0);
2846
2848 KnownSrc, IID == Intrinsic::trunc,
2850
2851 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2852 if (Constant *SingleVal =
2853 getFPClassConstant(VTy, ValidResults, /*IsCanonicalizing=*/true))
2854 return SingleVal;
2855
2856 if ((IID == Intrinsic::trunc || IsRoundNearestOrTrunc) &&
2857 KnownSrc.isKnownAlways(fcZero | fcSubnormal)) {
2858 Value *Copysign = Builder.CreateCopySign(ConstantFP::getZero(VTy),
2859 CI->getArgOperand(0));
2860 Copysign->takeName(CI);
2861 return Copysign;
2862 }
2863
2864 return nullptr;
2865 }
2866 case Intrinsic::canonicalize: {
2867 Type *EltTy = VTy->getScalarType();
2868
2869 // TODO: This could have more refined support for PositiveZero denormal
2870 // mode.
2871 if (EltTy->isIEEELikeFPTy()) {
2872 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2873
2874 FPClassTest SrcDemandedMask = DemandedMask;
2875
2876 // A demanded quiet nan result may have come from a signaling nan, so we
2877 // need to expand the demanded mask.
2878 if ((DemandedMask & fcQNan) != fcNone)
2879 SrcDemandedMask |= fcSNan;
2880
2881 if (Mode != DenormalMode::getIEEE()) {
2882 // Any zero results may have come from flushed denormals.
2883 if (DemandedMask & fcPosZero)
2884 SrcDemandedMask |= fcPosSubnormal;
2885 if (DemandedMask & fcNegZero)
2886 SrcDemandedMask |= fcNegSubnormal;
2887 }
2888
2889 if (Mode == DenormalMode::getPreserveSign()) {
2890 // If a denormal input will be flushed, and we don't need zeros, we
2891 // don't need denormals either.
2892 if ((DemandedMask & fcPosZero) == fcNone)
2893 SrcDemandedMask &= ~fcPosSubnormal;
2894
2895 if ((DemandedMask & fcNegZero) == fcNone)
2896 SrcDemandedMask &= ~fcNegSubnormal;
2897 }
2898
2899 KnownFPClass KnownSrc;
2900
2901 // Simplify upstream operations before trying to simplify this call.
2902 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
2903 return I;
2904
2905 // Perform the canonicalization to see if this folded to a constant.
2906 Known = KnownFPClass::canonicalize(KnownSrc, Mode);
2907
2908 if (Constant *SingleVal =
2909 getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses))
2910 return SingleVal;
2911
2912 // For IEEE handling, there is only a bit change for nan inputs, so we
2913 // can drop it if we do not demand nan results or we know the input
2914 // isn't a nan.
2915 // Otherwise, we also need to avoid denormal inputs to drop the
2916 // canonicalize.
2917 if ((KnownSrc.isKnownNeverNaN() || (DemandedMask & fcNan) == fcNone) &&
2918 (Mode == DenormalMode::getIEEE() ||
2919 KnownSrc.isKnownNeverSubnormal()))
2920 return CI->getArgOperand(0);
2921
2922 return nullptr;
2923 }
2924
2925 [[fallthrough]];
2926 }
2927 default:
2928 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
2929 break;
2930 }
2931
2932 break;
2933 }
2934 case Instruction::Select: {
2935 KnownFPClass KnownLHS, KnownRHS;
2936 if (SimplifyDemandedFPClass(I, 2, DemandedMask, KnownRHS, Depth + 1) ||
2937 SimplifyDemandedFPClass(I, 1, DemandedMask, KnownLHS, Depth + 1))
2938 return I;
2939
2940 if (KnownLHS.isKnownNever(DemandedMask))
2941 return I->getOperand(2);
2942 if (KnownRHS.isKnownNever(DemandedMask))
2943 return I->getOperand(1);
2944
2946 adjustKnownFPClassForSelectArm(KnownLHS, I->getOperand(0), I->getOperand(1),
2947 /*Invert=*/false, SQ, Depth);
2948 adjustKnownFPClassForSelectArm(KnownRHS, I->getOperand(0), I->getOperand(2),
2949 /*Invert=*/true, SQ, Depth);
2950 Known = KnownLHS.intersectWith(KnownRHS);
2951 break;
2952 }
2953 case Instruction::ExtractElement: {
2954 // TODO: Handle demanded element mask
2955 if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))
2956 return I;
2957 break;
2958 }
2959 case Instruction::InsertElement: {
2960 KnownFPClass KnownInserted, KnownVec;
2961 if (SimplifyDemandedFPClass(I, 1, DemandedMask, KnownInserted, Depth + 1) ||
2962 SimplifyDemandedFPClass(I, 0, DemandedMask, KnownVec, Depth + 1))
2963 return I;
2964
2965 // TODO: Use demanded elements logic from computeKnownFPClass
2966 Known = KnownVec | KnownInserted;
2967 break;
2968 }
2969 case Instruction::ShuffleVector: {
2970 KnownFPClass KnownLHS, KnownRHS;
2971 if (SimplifyDemandedFPClass(I, 1, DemandedMask, KnownRHS, Depth + 1) ||
2972 SimplifyDemandedFPClass(I, 0, DemandedMask, KnownLHS, Depth + 1))
2973 return I;
2974
2975 // TODO: This is overly conservative and should consider demanded elements,
2976 // and splats.
2977 Known = KnownLHS | KnownRHS;
2978 break;
2979 }
2980 default:
2981 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
2982 break;
2983 }
2984
2985 return getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses);
2986}
2987
2988/// Helper routine of SimplifyDemandedUseFPClass. It computes Known
2989/// floating-point classes. It also tries to handle simplifications that can be
2990/// done based on DemandedMask, but without modifying the Instruction.
2992 Instruction *I, FPClassTest DemandedMask, KnownFPClass &Known,
2993 Instruction *CxtI, unsigned Depth) {
2994 FastMathFlags FMF;
2995 if (auto *FPOp = dyn_cast<FPMathOperator>(I)) {
2996 FMF = FPOp->getFastMathFlags();
2997 DemandedMask = adjustDemandedMaskFromFlags(DemandedMask, FMF);
2998 }
2999
3000 // Remove unwanted results from the computed result
3001 scope_exit ApplyDemandedMask(
3002 [=, &Known]() { Known.knownNot(~DemandedMask); });
3003
3004 switch (I->getOpcode()) {
3005 case Instruction::Select: {
3006 // TODO: Can we infer which side it came from based on adjusted result
3007 // class?
3008 KnownFPClass KnownRHS =
3009 computeKnownFPClass(I->getOperand(2), DemandedMask, CxtI, Depth + 1);
3010 if (KnownRHS.isKnownNever(DemandedMask))
3011 return I->getOperand(1);
3012
3013 KnownFPClass KnownLHS =
3014 computeKnownFPClass(I->getOperand(1), DemandedMask, CxtI, Depth + 1);
3015 if (KnownLHS.isKnownNever(DemandedMask))
3016 return I->getOperand(2);
3017
3019 adjustKnownFPClassForSelectArm(KnownLHS, I->getOperand(0), I->getOperand(1),
3020 /*Invert=*/false, SQ, Depth);
3021 adjustKnownFPClassForSelectArm(KnownRHS, I->getOperand(0), I->getOperand(2),
3022 /*Invert=*/true, SQ, Depth);
3023 Known = KnownLHS.intersectWith(KnownRHS);
3024 break;
3025 }
3026 case Instruction::FNeg: {
3027 // Special case fneg(fabs(x))
3028 Value *Src;
3029
3030 Value *FNegSrc = I->getOperand(0);
3031 if (!match(FNegSrc, m_FAbs(m_Value(Src)))) {
3032 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
3033 break;
3034 }
3035
3036 FastMathFlags FabsFMF = cast<FPMathOperator>(FNegSrc)->getFastMathFlags();
3037 FPClassTest ThisDemandedMask =
3038 adjustDemandedMaskFromFlags(DemandedMask, FabsFMF);
3039
3040 KnownFPClass KnownSrc =
3041 computeKnownFPClass(Src, fcAllFlags, CxtI, Depth + 1);
3042
3043 if ((ThisDemandedMask & fcNan) == fcNone)
3044 KnownSrc.knownNot(fcNan);
3045 if ((ThisDemandedMask & fcInf) == fcNone)
3046 KnownSrc.knownNot(fcInf);
3047
3048 // If the source value is known negative, we can directly fold to it.
3049 if (KnownSrc.SignBit == true)
3050 return Src;
3051
3052 // If the only sign bit difference is for 0, ignore it with nsz.
3053 if ((FMF.noSignedZeros() || FabsFMF.noSignedZeros()) &&
3055 return Src;
3056
3057 Known = KnownFPClass::fneg(KnownFPClass::fabs(KnownSrc));
3058 break;
3059 }
3060 case Instruction::Call: {
3061 const CallInst *CI = cast<CallInst>(I);
3062 const Intrinsic::ID IID = CI->getIntrinsicID();
3063 switch (IID) {
3064 case Intrinsic::fabs: {
3065 Value *Src = CI->getArgOperand(0);
3066 KnownFPClass KnownSrc =
3067 computeKnownFPClass(Src, fcAllFlags, CxtI, Depth + 1);
3068
3069 if (Value *Simplified = simplifyDemandedFPClassFabs(
3070 Known, CI->getArgOperand(0), DemandedMask, KnownSrc,
3071 FMF.noSignedZeros()))
3072 return Simplified;
3073 break;
3074 }
3075 case Intrinsic::maximum:
3076 case Intrinsic::minimum:
3077 case Intrinsic::maximumnum:
3078 case Intrinsic::minimumnum: {
3080 CI->getArgOperand(1), DemandedMask, CxtI, Depth + 1);
3081 if (KnownRHS.isUnknown())
3082 return nullptr;
3083
3085 CI->getArgOperand(0), DemandedMask, CxtI, Depth + 1);
3086
3088 Known, IID, CI, DemandedMask, KnownLHS, KnownRHS, F,
3089 cast<FPMathOperator>(CI)->hasNoSignedZeros());
3090 }
3091 default:
3092 break;
3093 }
3094
3095 [[fallthrough]];
3096 }
3097 default:
3098 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
3099 break;
3100 }
3101
3102 return getFPClassConstant(I->getType(), DemandedMask & Known.KnownFPClasses);
3103}
3104
3106 FPClassTest DemandedMask,
3107 KnownFPClass &Known,
3108 unsigned Depth) {
3109 Use &U = I->getOperandUse(OpNo);
3110 Value *V = U.get();
3111 Type *VTy = V->getType();
3112
3113 if (DemandedMask == fcNone) {
3114 if (isa<UndefValue>(V))
3115 return false;
3117 return true;
3118 }
3119
3120 // Handle constant
3122 if (!VInst) {
3123 // Handle constants and arguments
3124 Known = computeKnownFPClass(V, fcAllFlags, I, Depth);
3125 Known.knownNot(~DemandedMask);
3126
3127 if (Known.KnownFPClasses == fcNone) {
3128 if (isa<UndefValue>(V))
3129 return false;
3131 return true;
3132 }
3133
3134 // Do not try to replace values which are already constants (unless we are
3135 // folding to poison). Doing so could promote poison elements to non-poison
3136 // constants.
3137 if (isa<Constant>(V))
3138 return false;
3139
3140 Value *FoldedToConst = getFPClassConstant(VTy, Known.KnownFPClasses);
3141 if (!FoldedToConst || FoldedToConst == V)
3142 return false;
3143
3144 replaceUse(U, FoldedToConst);
3145 return true;
3146 }
3147
3149 return false;
3150
3151 Value *NewVal;
3152
3153 if (VInst->hasOneUse()) {
3154 // If the instruction has one use, we can directly simplify it.
3155 NewVal = SimplifyDemandedUseFPClass(VInst, DemandedMask, Known, I, Depth);
3156 } else {
3157 // If there are multiple uses of this instruction, then we can simplify
3158 // VInst to some other value, but not modify the instruction.
3159 NewVal = SimplifyMultipleUseDemandedFPClass(VInst, DemandedMask, Known, I,
3160 Depth);
3161 }
3162
3163 if (!NewVal)
3164 return false;
3165 if (Instruction *OpInst = dyn_cast<Instruction>(U))
3166 salvageDebugInfo(*OpInst);
3167
3168 replaceUse(U, NewVal);
3169 return true;
3170}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static cl::opt< unsigned > SimplifyDemandedVectorEltsDepthLimit("instcombine-simplify-vector-elts-depth", cl::desc("Depth limit when simplifying vector instructions and their operands"), cl::Hidden, cl::init(10))
static FastMathFlags inferFastMathValueFlagsBinOp(FastMathFlags FMF, FPClassTest ValidResults, const KnownFPClass &KnownLHS, const KnownFPClass &KnownRHS)
Try to set an inferred no-nans or no-infs in FMF.
static Constant * getFPClassConstant(Type *Ty, FPClassTest Mask, bool IsCanonicalizing=false)
For floating-point classes that resolve to a single bit pattern, return that value.
static cl::opt< bool > VerifyKnownBits("instcombine-verify-known-bits", cl::desc("Verify that computeKnownBits() and " "SimplifyDemandedBits() are consistent"), cl::Hidden, cl::init(false))
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Value * simplifyDemandedFPClassFabs(KnownFPClass &Known, Value *Src, FPClassTest DemandedMask, KnownFPClass KnownSrc, bool NSZ)
Perform multiple-use aware simplfications for fabs(Src).
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
static Value * simplifyShiftSelectingPackedElement(Instruction *I, const APInt &DemandedMask, InstCombinerImpl &IC, unsigned Depth)
Let N = 2 * M.
static Value * simplifyDemandedFPClassMinMax(KnownFPClass &Known, Intrinsic::ID IID, const CallInst *CI, FPClassTest DemandedMask, KnownFPClass KnownLHS, KnownFPClass KnownRHS, const Function &F, bool NSZ)
static FPClassTest adjustDemandedMaskFromFlags(FPClassTest DemandedMask, FastMathFlags FMF)
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1415
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1400
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1679
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1521
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1349
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1405
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1648
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1607
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1444
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:828
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1397
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition APInt.h:433
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
BinaryOps getOpcode() const
Definition InstrTypes.h:374
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
bool noSignedZeros() const
Definition FMF.h:67
bool noInfs() const
Definition FMF.h:66
void setNoSignedZeros(bool B=true)
Definition FMF.h:84
void setNoNaNs(bool B=true)
Definition FMF.h:78
bool noNaNs() const
Definition FMF.h:65
void setNoInfs(bool B=true)
Definition FMF.h:81
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2300
LLVM_ABI Value * CreateSelectWithUnknownProfile(Value *C, Value *True, Value *False, StringRef PassName, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF, FPClassTest Interested=fcAllFlags, const Instruction *CtxI=nullptr, unsigned Depth=0) const
Value * SimplifyDemandedUseFPClass(Instruction *I, FPClassTest DemandedMask, KnownFPClass &Known, Instruction *CxtI, unsigned Depth=0)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Value * SimplifyMultipleUseDemandedFPClass(Instruction *I, FPClassTest DemandedMask, KnownFPClass &Known, Instruction *CxtI, unsigned Depth)
Helper routine of SimplifyDemandedUseFPClass.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
std::optional< std::pair< Intrinsic::ID, SmallVector< Value *, 3 > > > convertOrOfShiftsToFunnelShift(Instruction &Or)
Value * simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known)
Helper routine of SimplifyDemandedUseBits.
Value * SimplifyDemandedUseBits(Instruction *I, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Attempts to replace I with a simpler value based on the demanded bits.
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth=0)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Value * SimplifyMultipleUseDemandedBits(Instruction *I, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Helper routine of SimplifyDemandedUseBits.
SimplifyQuery SQ
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
bool isShift() const
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
const Value * getCondition() const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isMultiUnitFPType() const
Returns true if this is a floating-point type that is an unevaluated sum of multiple floating-point u...
Definition Type.h:193
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIEEELikeFPTy() const
Return true if this is a well-behaved IEEE-like type, which has a IEEE compatible layout,...
Definition Type.h:170
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
This class represents zero extension of integer types.
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
DisjointOr_match< LHS, RHS, true > m_c_DisjointOr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI void computeKnownBitsFromContext(const Value *V, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Merge bits known from context-dependent facts into Known.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1731
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2163
LLVM_ABI bool isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be undef, but may be poison.
LLVM_ABI bool cannotOrderStrictlyLess(FPClassTest LHS, FPClassTest RHS, bool OrderedZeroSign=false)
Returns true if all values in LHS must be greater than or equal to those in RHS.
LLVM_ABI bool cannotOrderStrictlyGreater(FPClassTest LHS, FPClassTest RHS, bool OrderedZeroSign=false)
Returns true if all values in LHS must be less than or equal to those in RHS.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI void adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond, Value *Arm, bool Invert, const SimplifyQuery &Q, unsigned Depth=0)
Adjust Known for the given select Arm to include information from the select Cond.
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI void adjustKnownFPClassForSelectArm(KnownFPClass &Known, Value *Cond, Value *Arm, bool Invert, const SimplifyQuery &Q, unsigned Depth=0)
Adjust Known for the given select Arm to include information from the select Cond.
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
LLVM_ABI FPClassTest unknown_sign(FPClassTest Mask)
Return the test mask which returns true if the value could have the same set of classes,...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
LLVM_ABI KnownBits analyzeKnownBitsFromAndXorOr(const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS, const SimplifyQuery &SQ, unsigned Depth=0)
Using KnownBits LHS/RHS produce the known bits for logic op (and/xor/or).
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:314
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:189
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
void makeNonNegative()
Make this value non-negative.
Definition KnownBits.h:127
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:334
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:324
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:183
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:360
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
Definition KnownBits.h:199
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
static LLVM_ABI KnownBits srem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for srem(LHS, RHS).
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static KnownBits sub(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from subtraction of LHS and RHS.
Definition KnownBits.h:366
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
bool isKnownNeverInfOrNaN() const
Return true if it's known this can never be an infinity or nan.
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
bool isKnownNeverInfinity() const
Return true if it's known this can never be an infinity.
static constexpr FPClassTest OrderedGreaterThanZeroMask
static constexpr FPClassTest OrderedLessThanZeroMask
void knownNot(FPClassTest RuleOut)
static LLVM_ABI KnownFPClass fmul(const KnownFPClass &LHS, const KnownFPClass &RHS, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fmul.
static LLVM_ABI KnownFPClass fadd_self(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fadd x, x.
void copysign(const KnownFPClass &Sign)
static KnownFPClass square(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.
bool isKnownAlways(FPClassTest Mask) const
static LLVM_ABI KnownFPClass canonicalize(const KnownFPClass &Src, DenormalMode DenormMode=DenormalMode::getDynamic())
Apply the canonicalize intrinsic to this value.
LLVM_ABI bool isKnownNeverLogicalZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a zero.
static LLVM_ABI KnownFPClass log(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
Propagate known class for log/log2/log10.
static LLVM_ABI KnownFPClass roundToIntegral(const KnownFPClass &Src, bool IsTrunc, bool IsMultiUnitFPType)
Propagate known class for rounding intrinsics (trunc, floor, ceil, rint, nearbyint,...
static LLVM_ABI KnownFPClass minMaxLike(const KnownFPClass &LHS, const KnownFPClass &RHS, MinMaxKind Kind, DenormalMode DenormMode=DenormalMode::getDynamic())
bool isUnknown() const
KnownFPClass intersectWith(const KnownFPClass &RHS) const
static LLVM_ABI KnownFPClass exp(const KnownFPClass &Src)
Report known values for exp, exp2 and exp10.
std::optional< bool > SignBit
std::nullopt if the sign bit is unknown, true if the sign bit is definitely set or false if the sign ...
bool isKnownNeverNaN() const
Return true if it's known this can never be a nan.
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
static LLVM_ABI KnownFPClass fpext(const KnownFPClass &KnownSrc, const fltSemantics &DstTy, const fltSemantics &SrcTy)
Propagate known class for fpext.
static LLVM_ABI KnownFPClass sqrt(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
Propagate known class for sqrt.
LLVM_ABI bool isKnownNeverLogicalPosZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a positive zero.
bool cannotBeOrderedGreaterEqZero(DenormalMode Mode) const
Return true if it's know this can never be a negative value or a logical 0.
static LLVM_ABI KnownFPClass fadd(const KnownFPClass &LHS, const KnownFPClass &RHS, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fadd.
LLVM_ABI bool isKnownNeverLogicalNegZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a negative zero.
Matching combinators.
const Instruction * CxtI