LLVM 23.0.0git
InstCombineSimplifyDemanded.cpp
Go to the documentation of this file.
1//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains logic for simplifying instructions based on information
10// about how they are used.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
22
23using namespace llvm;
24using namespace llvm::PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28static cl::opt<bool>
29 VerifyKnownBits("instcombine-verify-known-bits",
30 cl::desc("Verify that computeKnownBits() and "
31 "SimplifyDemandedBits() are consistent"),
32 cl::Hidden, cl::init(false));
33
35 "instcombine-simplify-vector-elts-depth",
37 "Depth limit when simplifying vector instructions and their operands"),
38 cl::Hidden, cl::init(10));
39
40/// Check to see if the specified operand of the specified instruction is a
41/// constant integer. If so, check to see if there are any bits set in the
42/// constant that are not demanded. If so, shrink the constant and return true.
43static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
44 const APInt &Demanded) {
45 assert(I && "No instruction?");
46 assert(OpNo < I->getNumOperands() && "Operand index too large");
47
48 // The operand must be a constant integer or splat integer.
49 Value *Op = I->getOperand(OpNo);
50 const APInt *C;
51 if (!match(Op, m_APInt(C)))
52 return false;
53
54 // If there are no bits set that aren't demanded, nothing to do.
55 if (C->isSubsetOf(Demanded))
56 return false;
57
58 // This instruction is producing bits that are not demanded. Shrink the RHS.
59 I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
60
61 return true;
62}
63
64/// Let N = 2 * M.
65/// Given an N-bit integer representing a pack of two M-bit integers,
66/// we can select one of the packed integers by right-shifting by either
67/// zero or M (which is the most straightforward to check if M is a power
68/// of 2), and then isolating the lower M bits. In this case, we can
69/// represent the shift as a select on whether the shr amount is nonzero.
71 const APInt &DemandedMask,
73 unsigned Depth) {
74 assert(I->getOpcode() == Instruction::LShr &&
75 "Only lshr instruction supported");
76
77 uint64_t ShlAmt;
78 Value *Upper, *Lower;
79 if (!match(I->getOperand(0),
82 m_Value(Lower)))))
83 return nullptr;
84
85 if (!isPowerOf2_64(ShlAmt))
86 return nullptr;
87
88 const uint64_t DemandedBitWidth = DemandedMask.getActiveBits();
89 if (DemandedBitWidth > ShlAmt)
90 return nullptr;
91
92 // Check that upper demanded bits are not lost from lshift.
93 if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth)
94 return nullptr;
95
96 KnownBits KnownLowerBits = IC.computeKnownBits(Lower, I, Depth);
97 if (!KnownLowerBits.getMaxValue().isIntN(ShlAmt))
98 return nullptr;
99
100 Value *ShrAmt = I->getOperand(1);
101 KnownBits KnownShrBits = IC.computeKnownBits(ShrAmt, I, Depth);
102
103 // Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or
104 // zero.
105 if (~KnownShrBits.Zero != ShlAmt)
106 return nullptr;
107
110 Value *ShrAmtZ =
112 ShrAmt->getName() + ".z");
113 // There is no existing !prof metadata we can derive the !prof metadata for
114 // this select.
117 Select->takeName(I);
118 return Select;
119}
120
121/// Returns the bitwidth of the given scalar or pointer type. For vector types,
122/// returns the element type's bitwidth.
123static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
124 if (unsigned BitWidth = Ty->getScalarSizeInBits())
125 return BitWidth;
126
127 return DL.getPointerTypeSizeInBits(Ty);
128}
129
130/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
131/// the instruction has any properties that allow us to simplify its operands.
133 KnownBits &Known) {
134 APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));
135 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
136 SQ.getWithInstruction(&Inst));
137 if (!V) return false;
138 if (V == &Inst) return true;
139 replaceInstUsesWith(Inst, V);
140 return true;
141}
142
143/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
144/// the instruction has any properties that allow us to simplify its operands.
149
151 KnownFPClass Known;
152
153 Value *V =
154 SimplifyDemandedUseFPClass(&Inst, fcAllFlags, Known, /*CtxI=*/&Inst);
155 if (!V)
156 return false;
157 if (V == &Inst)
158 return true;
159 replaceInstUsesWith(Inst, V);
160 return true;
161}
162
163/// This form of SimplifyDemandedBits simplifies the specified instruction
164/// operand if possible, updating it in place. It returns true if it made any
165/// change and false otherwise.
167 const APInt &DemandedMask,
168 KnownBits &Known,
169 const SimplifyQuery &Q,
170 unsigned Depth) {
171 Use &U = I->getOperandUse(OpNo);
172 Value *V = U.get();
173 if (isa<Constant>(V)) {
174 llvm::computeKnownBits(V, Known, Q, Depth);
175 return false;
176 }
177
178 Known.resetAll();
179 if (DemandedMask.isZero()) {
180 // Not demanding any bits from V.
181 replaceUse(U, UndefValue::get(V->getType()));
182 return true;
183 }
184
186 if (!VInst) {
187 llvm::computeKnownBits(V, Known, Q, Depth);
188 return false;
189 }
190
192 return false;
193
194 Value *NewVal;
195 if (VInst->hasOneUse()) {
196 // If the instruction has one use, we can directly simplify it.
197 NewVal = SimplifyDemandedUseBits(VInst, DemandedMask, Known, Q, Depth);
198 } else {
199 // If there are multiple uses of this instruction, then we can simplify
200 // VInst to some other value, but not modify the instruction.
201 NewVal =
202 SimplifyMultipleUseDemandedBits(VInst, DemandedMask, Known, Q, Depth);
203 }
204 if (!NewVal) return false;
205 if (Instruction* OpInst = dyn_cast<Instruction>(U))
206 salvageDebugInfo(*OpInst);
207
208 replaceUse(U, NewVal);
209 return true;
210}
211
212/// This function attempts to replace V with a simpler value based on the
213/// demanded bits. When this function is called, it is known that only the bits
214/// set in DemandedMask of the result of V are ever used downstream.
215/// Consequently, depending on the mask and V, it may be possible to replace V
216/// with a constant or one of its operands. In such cases, this function does
217/// the replacement and returns true. In all other cases, it returns false after
218/// analyzing the expression and setting KnownOne and known to be one in the
219/// expression. Known.Zero contains all the bits that are known to be zero in
220/// the expression. These are provided to potentially allow the caller (which
221/// might recursively be SimplifyDemandedBits itself) to simplify the
222/// expression.
223/// Known.One and Known.Zero always follow the invariant that:
224/// Known.One & Known.Zero == 0.
225/// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero
226/// are accurate even for bits not in DemandedMask. Note
227/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
228/// be the same.
229///
230/// This returns null if it did not change anything and it permits no
231/// simplification. This returns V itself if it did some simplification of V's
232/// operands based on the information about what bits are demanded. This returns
233/// some other non-null value if it found out that V is equal to another value
234/// in the context where the specified bits are demanded, but not for all users.
236 const APInt &DemandedMask,
237 KnownBits &Known,
238 const SimplifyQuery &Q,
239 unsigned Depth) {
240 assert(I != nullptr && "Null pointer of Value???");
241 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
242 uint32_t BitWidth = DemandedMask.getBitWidth();
243 Type *VTy = I->getType();
244 assert(
245 (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
246 Known.getBitWidth() == BitWidth &&
247 "Value *V, DemandedMask and Known must have same BitWidth");
248
249 KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
250
251 // Update flags after simplifying an operand based on the fact that some high
252 // order bits are not demanded.
253 auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I,
254 unsigned NLZ) {
255 if (NLZ > 0) {
256 // Disable the nsw and nuw flags here: We can no longer guarantee that
257 // we won't wrap after simplification. Removing the nsw/nuw flags is
258 // legal here because the top bit is not demanded.
259 I->setHasNoSignedWrap(false);
260 I->setHasNoUnsignedWrap(false);
261 }
262 return I;
263 };
264
265 // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care
266 // about the high bits of the operands.
267 auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {
268 unsigned NLZ = DemandedMask.countl_zero();
269 // Right fill the mask of bits for the operands to demand the most
270 // significant bit and all those below it.
271 DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
272 if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
273 SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Q, Depth + 1) ||
274 ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
275 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1)) {
276 disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
277 return true;
278 }
279 return false;
280 };
281
282 switch (I->getOpcode()) {
283 default:
284 llvm::computeKnownBits(I, Known, Q, Depth);
285 break;
286 case Instruction::And: {
287 // If either the LHS or the RHS are Zero, the result is zero.
288 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
289 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, Q,
290 Depth + 1))
291 return I;
292
293 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
294 Q, Depth);
295
296 // If the client is only demanding bits that we know, return the known
297 // constant.
298 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
299 return Constant::getIntegerValue(VTy, Known.One);
300
301 // If all of the demanded bits are known 1 on one side, return the other.
302 // These bits cannot contribute to the result of the 'and'.
303 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
304 return I->getOperand(0);
305 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
306 return I->getOperand(1);
307
308 // If the RHS is a constant, see if we can simplify it.
309 if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
310 return I;
311
312 break;
313 }
314 case Instruction::Or: {
315 // If either the LHS or the RHS are One, the result is One.
316 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
317 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, Q,
318 Depth + 1)) {
319 // Disjoint flag may not longer hold.
320 I->dropPoisonGeneratingFlags();
321 return I;
322 }
323
324 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
325 Q, Depth);
326
327 // If the client is only demanding bits that we know, return the known
328 // constant.
329 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
330 return Constant::getIntegerValue(VTy, Known.One);
331
332 // If all of the demanded bits are known zero on one side, return the other.
333 // These bits cannot contribute to the result of the 'or'.
334 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
335 return I->getOperand(0);
336 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
337 return I->getOperand(1);
338
339 // If the RHS is a constant, see if we can simplify it.
340 if (ShrinkDemandedConstant(I, 1, DemandedMask))
341 return I;
342
343 // Infer disjoint flag if no common bits are set.
344 if (!cast<PossiblyDisjointInst>(I)->isDisjoint()) {
345 WithCache<const Value *> LHSCache(I->getOperand(0), LHSKnown),
346 RHSCache(I->getOperand(1), RHSKnown);
347 if (haveNoCommonBitsSet(LHSCache, RHSCache, Q)) {
348 cast<PossiblyDisjointInst>(I)->setIsDisjoint(true);
349 return I;
350 }
351 }
352
353 break;
354 }
355 case Instruction::Xor: {
356 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Q, Depth + 1) ||
357 SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Q, Depth + 1))
358 return I;
359 Value *LHS, *RHS;
360 if (DemandedMask == 1 &&
361 match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
362 match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
363 // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
365 Builder.SetInsertPoint(I);
366 auto *Xor = Builder.CreateXor(LHS, RHS);
367 return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);
368 }
369
370 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
371 Q, Depth);
372
373 // If the client is only demanding bits that we know, return the known
374 // constant.
375 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
376 return Constant::getIntegerValue(VTy, Known.One);
377
378 // If all of the demanded bits are known zero on one side, return the other.
379 // These bits cannot contribute to the result of the 'xor'.
380 if (DemandedMask.isSubsetOf(RHSKnown.Zero))
381 return I->getOperand(0);
382 if (DemandedMask.isSubsetOf(LHSKnown.Zero))
383 return I->getOperand(1);
384
385 // If all of the demanded bits are known to be zero on one side or the
386 // other, turn this into an *inclusive* or.
387 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
388 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
389 Instruction *Or =
390 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1));
391 if (DemandedMask.isAllOnes())
392 cast<PossiblyDisjointInst>(Or)->setIsDisjoint(true);
393 Or->takeName(I);
394 return InsertNewInstWith(Or, I->getIterator());
395 }
396
397 // If all of the demanded bits on one side are known, and all of the set
398 // bits on that side are also known to be set on the other side, turn this
399 // into an AND, as we know the bits will be cleared.
400 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
401 if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
402 RHSKnown.One.isSubsetOf(LHSKnown.One)) {
404 ~RHSKnown.One & DemandedMask);
405 Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
406 return InsertNewInstWith(And, I->getIterator());
407 }
408
409 // If the RHS is a constant, see if we can change it. Don't alter a -1
410 // constant because that's a canonical 'not' op, and that is better for
411 // combining, SCEV, and codegen.
412 const APInt *C;
413 if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnes()) {
414 if ((*C | ~DemandedMask).isAllOnes()) {
415 // Force bits to 1 to create a 'not' op.
416 I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
417 return I;
418 }
419 // If we can't turn this into a 'not', try to shrink the constant.
420 if (ShrinkDemandedConstant(I, 1, DemandedMask))
421 return I;
422 }
423
424 // If our LHS is an 'and' and if it has one use, and if any of the bits we
425 // are flipping are known to be set, then the xor is just resetting those
426 // bits to zero. We can just knock out bits from the 'and' and the 'xor',
427 // simplifying both of them.
428 if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) {
429 ConstantInt *AndRHS, *XorRHS;
430 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
431 match(I->getOperand(1), m_ConstantInt(XorRHS)) &&
432 match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) &&
433 (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
434 APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
435
436 Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue());
437 Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
438 InsertNewInstWith(NewAnd, I->getIterator());
439
440 Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue());
441 Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
442 return InsertNewInstWith(NewXor, I->getIterator());
443 }
444 }
445 break;
446 }
447 case Instruction::Select: {
448 if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Q, Depth + 1) ||
449 SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Q, Depth + 1))
450 return I;
451
452 // If the operands are constants, see if we can simplify them.
453 // This is similar to ShrinkDemandedConstant, but for a select we want to
454 // try to keep the selected constants the same as icmp value constants, if
455 // we can. This helps not break apart (or helps put back together)
456 // canonical patterns like min and max.
457 auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,
458 const APInt &DemandedMask) {
459 const APInt *SelC;
460 if (!match(I->getOperand(OpNo), m_APInt(SelC)))
461 return false;
462
463 // Get the constant out of the ICmp, if there is one.
464 // Only try this when exactly 1 operand is a constant (if both operands
465 // are constant, the icmp should eventually simplify). Otherwise, we may
466 // invert the transform that reduces set bits and infinite-loop.
467 Value *X;
468 const APInt *CmpC;
469 if (!match(I->getOperand(0), m_ICmp(m_Value(X), m_APInt(CmpC))) ||
470 isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
471 return ShrinkDemandedConstant(I, OpNo, DemandedMask);
472
473 // If the constant is already the same as the ICmp, leave it as-is.
474 if (*CmpC == *SelC)
475 return false;
476 // If the constants are not already the same, but can be with the demand
477 // mask, use the constant value from the ICmp.
478 if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) {
479 I->setOperand(OpNo, ConstantInt::get(I->getType(), *CmpC));
480 return true;
481 }
482 return ShrinkDemandedConstant(I, OpNo, DemandedMask);
483 };
484 if (CanonicalizeSelectConstant(I, 1, DemandedMask) ||
485 CanonicalizeSelectConstant(I, 2, DemandedMask))
486 return I;
487
488 // Only known if known in both the LHS and RHS.
489 adjustKnownBitsForSelectArm(LHSKnown, I->getOperand(0), I->getOperand(1),
490 /*Invert=*/false, Q, Depth);
491 adjustKnownBitsForSelectArm(RHSKnown, I->getOperand(0), I->getOperand(2),
492 /*Invert=*/true, Q, Depth);
493 Known = LHSKnown.intersectWith(RHSKnown);
494 break;
495 }
496 case Instruction::Trunc: {
497 // If we do not demand the high bits of a right-shifted and truncated value,
498 // then we may be able to truncate it before the shift.
499 Value *X;
500 const APInt *C;
501 if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
502 // The shift amount must be valid (not poison) in the narrow type, and
503 // it must not be greater than the high bits demanded of the result.
504 if (C->ult(VTy->getScalarSizeInBits()) &&
505 C->ule(DemandedMask.countl_zero())) {
506 // trunc (lshr X, C) --> lshr (trunc X), C
508 Builder.SetInsertPoint(I);
509 Value *Trunc = Builder.CreateTrunc(X, VTy);
510 return Builder.CreateLShr(Trunc, C->getZExtValue());
511 }
512 }
513 }
514 [[fallthrough]];
515 case Instruction::ZExt: {
516 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
517
518 APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
519 KnownBits InputKnown(SrcBitWidth);
520 if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Q,
521 Depth + 1)) {
522 // For zext nneg, we may have dropped the instruction which made the
523 // input non-negative.
524 I->dropPoisonGeneratingFlags();
525 return I;
526 }
527 assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
528 if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() &&
529 !InputKnown.isNegative())
530 InputKnown.makeNonNegative();
531 Known = InputKnown.zextOrTrunc(BitWidth);
532
533 break;
534 }
535 case Instruction::SExt: {
536 // Compute the bits in the result that are not present in the input.
537 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
538
539 APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
540
541 // If any of the sign extended bits are demanded, we know that the sign
542 // bit is demanded.
543 if (DemandedMask.getActiveBits() > SrcBitWidth)
544 InputDemandedBits.setBit(SrcBitWidth-1);
545
546 KnownBits InputKnown(SrcBitWidth);
547 if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Q, Depth + 1))
548 return I;
549
550 // If the input sign bit is known zero, or if the NewBits are not demanded
551 // convert this into a zero extension.
552 if (InputKnown.isNonNegative() ||
553 DemandedMask.getActiveBits() <= SrcBitWidth) {
554 // Convert to ZExt cast.
555 CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy);
556 NewCast->takeName(I);
557 return InsertNewInstWith(NewCast, I->getIterator());
558 }
559
560 // If the sign bit of the input is known set or clear, then we know the
561 // top bits of the result.
562 Known = InputKnown.sext(BitWidth);
563 break;
564 }
565 case Instruction::Add: {
566 if ((DemandedMask & 1) == 0) {
567 // If we do not need the low bit, try to convert bool math to logic:
568 // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
569 Value *X, *Y;
571 m_OneUse(m_SExt(m_Value(Y))))) &&
572 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
573 // Truth table for inputs and output signbits:
574 // X:0 | X:1
575 // ----------
576 // Y:0 | 0 | 0 |
577 // Y:1 | -1 | 0 |
578 // ----------
580 Builder.SetInsertPoint(I);
581 Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);
582 return Builder.CreateSExt(AndNot, VTy);
583 }
584
585 // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN
586 if (match(I, m_Add(m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
587 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
588 (I->getOperand(0)->hasOneUse() || I->getOperand(1)->hasOneUse())) {
589
590 // Truth table for inputs and output signbits:
591 // X:0 | X:1
592 // -----------
593 // Y:0 | -1 | -1 |
594 // Y:1 | -1 | 0 |
595 // -----------
597 Builder.SetInsertPoint(I);
598 Value *Or = Builder.CreateOr(X, Y);
599 return Builder.CreateSExt(Or, VTy);
600 }
601 }
602
603 // Right fill the mask of bits for the operands to demand the most
604 // significant bit and all those below it.
605 unsigned NLZ = DemandedMask.countl_zero();
606 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
607 if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
608 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1))
609 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
610
611 // If low order bits are not demanded and known to be zero in one operand,
612 // then we don't need to demand them from the other operand, since they
613 // can't cause overflow into any bits that are demanded in the result.
614 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
615 APInt DemandedFromLHS = DemandedFromOps;
616 DemandedFromLHS.clearLowBits(NTZ);
617 if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
618 SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Q, Depth + 1))
619 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
620
621 unsigned NtzLHS = (~DemandedMask & LHSKnown.Zero).countr_one();
622 APInt DemandedFromRHS = DemandedFromOps;
623 DemandedFromRHS.clearLowBits(NtzLHS);
624 if (ShrinkDemandedConstant(I, 1, DemandedFromRHS))
625 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
626
627 // If we are known to be adding zeros to every bit below
628 // the highest demanded bit, we just return the other side.
629 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
630 return I->getOperand(0);
631 if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
632 return I->getOperand(1);
633
634 // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask
635 {
636 const APInt *C;
637 if (match(I->getOperand(1), m_APInt(C)) &&
638 C->isOneBitSet(DemandedMask.getActiveBits() - 1)) {
640 Builder.SetInsertPoint(I);
641 return Builder.CreateXor(I->getOperand(0), ConstantInt::get(VTy, *C));
642 }
643 }
644
645 // Otherwise just compute the known bits of the result.
646 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
647 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
648 Known = KnownBits::add(LHSKnown, RHSKnown, NSW, NUW);
649 break;
650 }
651 case Instruction::Sub: {
652 // Right fill the mask of bits for the operands to demand the most
653 // significant bit and all those below it.
654 unsigned NLZ = DemandedMask.countl_zero();
655 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
656 if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
657 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Q, Depth + 1))
658 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
659
660 // If low order bits are not demanded and are known to be zero in RHS,
661 // then we don't need to demand them from LHS, since they can't cause a
662 // borrow from any bits that are demanded in the result.
663 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
664 APInt DemandedFromLHS = DemandedFromOps;
665 DemandedFromLHS.clearLowBits(NTZ);
666 if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
667 SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Q, Depth + 1))
668 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
669
670 // If we are known to be subtracting zeros from every bit below
671 // the highest demanded bit, we just return the other side.
672 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
673 return I->getOperand(0);
674 // We can't do this with the LHS for subtraction, unless we are only
675 // demanding the LSB.
676 if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(LHSKnown.Zero))
677 return I->getOperand(1);
678
679 // Canonicalize sub mask, X -> ~X
680 const APInt *LHSC;
681 if (match(I->getOperand(0), m_LowBitMask(LHSC)) &&
682 DemandedFromOps.isSubsetOf(*LHSC)) {
684 Builder.SetInsertPoint(I);
685 return Builder.CreateNot(I->getOperand(1));
686 }
687
688 // Otherwise just compute the known bits of the result.
689 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
690 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
691 Known = KnownBits::sub(LHSKnown, RHSKnown, NSW, NUW);
692 break;
693 }
694 case Instruction::Mul: {
695 APInt DemandedFromOps;
696 if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))
697 return I;
698
699 if (DemandedMask.isPowerOf2()) {
700 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
701 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
702 // odd (has LSB set), then the left-shifted low bit of X is the answer.
703 unsigned CTZ = DemandedMask.countr_zero();
704 const APInt *C;
705 if (match(I->getOperand(1), m_APInt(C)) && C->countr_zero() == CTZ) {
706 Constant *ShiftC = ConstantInt::get(VTy, CTZ);
707 Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);
708 return InsertNewInstWith(Shl, I->getIterator());
709 }
710 }
711 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
712 // X * X is odd iff X is odd.
713 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
714 if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) {
715 Constant *One = ConstantInt::get(VTy, 1);
716 Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One);
717 return InsertNewInstWith(And1, I->getIterator());
718 }
719
720 llvm::computeKnownBits(I, Known, Q, Depth);
721 break;
722 }
723 case Instruction::Shl: {
724 const APInt *SA;
725 if (match(I->getOperand(1), m_APInt(SA))) {
726 const APInt *ShrAmt;
727 if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt))))
728 if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0)))
729 if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA,
730 DemandedMask, Known))
731 return R;
732
733 // Do not simplify if shl is part of funnel-shift pattern
734 if (I->hasOneUse()) {
735 auto *Inst = dyn_cast<Instruction>(I->user_back());
736 if (Inst && Inst->getOpcode() == BinaryOperator::Or) {
737 if (auto Opt = convertOrOfShiftsToFunnelShift(*Inst)) {
738 auto [IID, FShiftArgs] = *Opt;
739 if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
740 FShiftArgs[0] == FShiftArgs[1]) {
741 llvm::computeKnownBits(I, Known, Q, Depth);
742 break;
743 }
744 }
745 }
746 }
747
748 // We only want bits that already match the signbit then we don't
749 // need to shift.
750 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth - 1);
751 if (DemandedMask.countr_zero() >= ShiftAmt) {
752 if (I->hasNoSignedWrap()) {
753 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
754 unsigned SignBits =
755 ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
756 if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits)
757 return I->getOperand(0);
758 }
759
760 // If we can pre-shift a right-shifted constant to the left without
761 // losing any high bits and we don't demand the low bits, then eliminate
762 // the left-shift:
763 // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X
764 Value *X;
765 Constant *C;
766 if (match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) {
767 Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
768 Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C,
769 LeftShiftAmtC, DL);
770 if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC,
771 LeftShiftAmtC, DL) == C) {
772 Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X);
773 return InsertNewInstWith(Lshr, I->getIterator());
774 }
775 }
776 }
777
778 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
779
780 // If the shift is NUW/NSW, then it does demand the high bits.
782 if (IOp->hasNoSignedWrap())
783 DemandedMaskIn.setHighBits(ShiftAmt+1);
784 else if (IOp->hasNoUnsignedWrap())
785 DemandedMaskIn.setHighBits(ShiftAmt);
786
787 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1))
788 return I;
789
790 Known = KnownBits::shl(Known,
792 /* NUW */ IOp->hasNoUnsignedWrap(),
793 /* NSW */ IOp->hasNoSignedWrap());
794 } else {
795 // This is a variable shift, so we can't shift the demand mask by a known
796 // amount. But if we are not demanding high bits, then we are not
797 // demanding those bits from the pre-shifted operand either.
798 if (unsigned CTLZ = DemandedMask.countl_zero()) {
799 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
800 if (SimplifyDemandedBits(I, 0, DemandedFromOp, Known, Q, Depth + 1)) {
801 // We can't guarantee that nsw/nuw hold after simplifying the operand.
802 I->dropPoisonGeneratingFlags();
803 return I;
804 }
805 }
806 llvm::computeKnownBits(I, Known, Q, Depth);
807 }
808 break;
809 }
810 case Instruction::LShr: {
811 const APInt *SA;
812 if (match(I->getOperand(1), m_APInt(SA))) {
813 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
814
815 // Do not simplify if lshr is part of funnel-shift pattern
816 if (I->hasOneUse()) {
817 auto *Inst = dyn_cast<Instruction>(I->user_back());
818 if (Inst && Inst->getOpcode() == BinaryOperator::Or) {
819 if (auto Opt = convertOrOfShiftsToFunnelShift(*Inst)) {
820 auto [IID, FShiftArgs] = *Opt;
821 if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
822 FShiftArgs[0] == FShiftArgs[1]) {
823 llvm::computeKnownBits(I, Known, Q, Depth);
824 break;
825 }
826 }
827 }
828 }
829
830 // If we are just demanding the shifted sign bit and below, then this can
831 // be treated as an ASHR in disguise.
832 if (DemandedMask.countl_zero() >= ShiftAmt) {
833 // If we only want bits that already match the signbit then we don't
834 // need to shift.
835 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
836 unsigned SignBits =
837 ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
838 if (SignBits >= NumHiDemandedBits)
839 return I->getOperand(0);
840
841 // If we can pre-shift a left-shifted constant to the right without
842 // losing any low bits (we already know we don't demand the high bits),
843 // then eliminate the right-shift:
844 // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
845 Value *X;
846 Constant *C;
847 if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {
848 Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
849 Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::LShr, C,
850 RightShiftAmtC, DL);
851 if (ConstantFoldBinaryOpOperands(Instruction::Shl, NewC,
852 RightShiftAmtC, DL) == C) {
853 Instruction *Shl = BinaryOperator::CreateShl(NewC, X);
854 return InsertNewInstWith(Shl, I->getIterator());
855 }
856 }
857
858 const APInt *Factor;
859 if (match(I->getOperand(0),
860 m_OneUse(m_Mul(m_Value(X), m_APInt(Factor)))) &&
861 Factor->countr_zero() >= ShiftAmt) {
862 BinaryOperator *Mul = BinaryOperator::CreateMul(
863 X, ConstantInt::get(X->getType(), Factor->lshr(ShiftAmt)));
864 return InsertNewInstWith(Mul, I->getIterator());
865 }
866 }
867
868 // Unsigned shift right.
869 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
870 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1)) {
871 // exact flag may not longer hold.
872 I->dropPoisonGeneratingFlags();
873 return I;
874 }
875 Known >>= ShiftAmt;
876 if (ShiftAmt)
877 Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
878 break;
879 }
880 if (Value *V =
881 simplifyShiftSelectingPackedElement(I, DemandedMask, *this, Depth))
882 return V;
883
884 llvm::computeKnownBits(I, Known, Q, Depth);
885 break;
886 }
887 case Instruction::AShr: {
888 unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Q.CxtI, Depth + 1);
889
890 // If we only want bits that already match the signbit then we don't need
891 // to shift.
892 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
893 if (SignBits >= NumHiDemandedBits)
894 return I->getOperand(0);
895
896 // If this is an arithmetic shift right and only the low-bit is set, we can
897 // always convert this into a logical shr, even if the shift amount is
898 // variable. The low bit of the shift cannot be an input sign bit unless
899 // the shift amount is >= the size of the datatype, which is undefined.
900 if (DemandedMask.isOne()) {
901 // Perform the logical shift right.
902 Instruction *NewVal = BinaryOperator::CreateLShr(
903 I->getOperand(0), I->getOperand(1), I->getName());
904 return InsertNewInstWith(NewVal, I->getIterator());
905 }
906
907 const APInt *SA;
908 if (match(I->getOperand(1), m_APInt(SA))) {
909 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
910
911 // Signed shift right.
912 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
913 // If any of the bits being shifted in are demanded, then we should set
914 // the sign bit as demanded.
915 bool ShiftedInBitsDemanded = DemandedMask.countl_zero() < ShiftAmt;
916 if (ShiftedInBitsDemanded)
917 DemandedMaskIn.setSignBit();
918 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Q, Depth + 1)) {
919 // exact flag may not longer hold.
920 I->dropPoisonGeneratingFlags();
921 return I;
922 }
923
924 // If the input sign bit is known to be zero, or if none of the shifted in
925 // bits are demanded, turn this into an unsigned shift right.
926 if (Known.Zero[BitWidth - 1] || !ShiftedInBitsDemanded) {
927 BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
928 I->getOperand(1));
929 LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
930 LShr->takeName(I);
931 return InsertNewInstWith(LShr, I->getIterator());
932 }
933
934 Known = KnownBits::ashr(
935 Known, KnownBits::makeConstant(APInt(BitWidth, ShiftAmt)),
936 ShiftAmt != 0, I->isExact());
937 } else {
938 llvm::computeKnownBits(I, Known, Q, Depth);
939 }
940 break;
941 }
942 case Instruction::UDiv: {
943 // UDiv doesn't demand low bits that are zero in the divisor.
944 const APInt *SA;
945 if (match(I->getOperand(1), m_APInt(SA))) {
946 // TODO: Take the demanded mask of the result into account.
947 unsigned RHSTrailingZeros = SA->countr_zero();
948 APInt DemandedMaskIn =
949 APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros);
950 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Q, Depth + 1)) {
951 // We can't guarantee that "exact" is still true after changing the
952 // the dividend.
953 I->dropPoisonGeneratingFlags();
954 return I;
955 }
956
957 Known = KnownBits::udiv(LHSKnown, KnownBits::makeConstant(*SA),
958 cast<BinaryOperator>(I)->isExact());
959 } else {
960 llvm::computeKnownBits(I, Known, Q, Depth);
961 }
962 break;
963 }
964 case Instruction::SRem: {
965 const APInt *Rem;
966 if (match(I->getOperand(1), m_APInt(Rem)) && Rem->isPowerOf2()) {
967 if (DemandedMask.ult(*Rem)) // srem won't affect demanded bits
968 return I->getOperand(0);
969
970 APInt LowBits = *Rem - 1;
971 APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
972 if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Q, Depth + 1))
973 return I;
974 Known = KnownBits::srem(LHSKnown, KnownBits::makeConstant(*Rem));
975 break;
976 }
977
978 llvm::computeKnownBits(I, Known, Q, Depth);
979 break;
980 }
981 case Instruction::Call: {
982 bool KnownBitsComputed = false;
984 switch (II->getIntrinsicID()) {
985 case Intrinsic::abs: {
986 if (DemandedMask == 1)
987 return II->getArgOperand(0);
988 break;
989 }
990 case Intrinsic::ctpop: {
991 // Checking if the number of clear bits is odd (parity)? If the type has
992 // an even number of bits, that's the same as checking if the number of
993 // set bits is odd, so we can eliminate the 'not' op.
994 Value *X;
995 if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 &&
996 match(II->getArgOperand(0), m_Not(m_Value(X)))) {
998 II->getModule(), Intrinsic::ctpop, VTy);
999 return InsertNewInstWith(CallInst::Create(Ctpop, {X}), I->getIterator());
1000 }
1001 break;
1002 }
1003 case Intrinsic::bswap: {
1004 // If the only bits demanded come from one byte of the bswap result,
1005 // just shift the input byte into position to eliminate the bswap.
1006 unsigned NLZ = DemandedMask.countl_zero();
1007 unsigned NTZ = DemandedMask.countr_zero();
1008
1009 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
1010 // we need all the bits down to bit 8. Likewise, round NLZ. If we
1011 // have 14 leading zeros, round to 8.
1012 NLZ = alignDown(NLZ, 8);
1013 NTZ = alignDown(NTZ, 8);
1014 // If we need exactly one byte, we can do this transformation.
1015 if (BitWidth - NLZ - NTZ == 8) {
1016 // Replace this with either a left or right shift to get the byte into
1017 // the right place.
1018 Instruction *NewVal;
1019 if (NLZ > NTZ)
1020 NewVal = BinaryOperator::CreateLShr(
1021 II->getArgOperand(0), ConstantInt::get(VTy, NLZ - NTZ));
1022 else
1023 NewVal = BinaryOperator::CreateShl(
1024 II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ));
1025 NewVal->takeName(I);
1026 return InsertNewInstWith(NewVal, I->getIterator());
1027 }
1028 break;
1029 }
1030 case Intrinsic::ptrmask: {
1031 unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();
1032 RHSKnown = KnownBits(MaskWidth);
1033 // If either the LHS or the RHS are Zero, the result is zero.
1034 if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Q, Depth + 1) ||
1036 I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),
1037 RHSKnown, Q, Depth + 1))
1038 return I;
1039
1040 // TODO: Should be 1-extend
1041 RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);
1042
1043 Known = LHSKnown & RHSKnown;
1044 KnownBitsComputed = true;
1045
1046 // If the client is only demanding bits we know to be zero, return
1047 // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
1048 // provenance, but making the mask zero will be easily optimizable in
1049 // the backend.
1050 if (DemandedMask.isSubsetOf(Known.Zero) &&
1051 !match(I->getOperand(1), m_Zero()))
1052 return replaceOperand(
1053 *I, 1, Constant::getNullValue(I->getOperand(1)->getType()));
1054
1055 // Mask in demanded space does nothing.
1056 // NOTE: We may have attributes associated with the return value of the
1057 // llvm.ptrmask intrinsic that will be lost when we just return the
1058 // operand. We should try to preserve them.
1059 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
1060 return I->getOperand(0);
1061
1062 // If the RHS is a constant, see if we can simplify it.
1064 I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))
1065 return I;
1066
1067 // Combine:
1068 // (ptrmask (getelementptr i8, ptr p, imm i), imm mask)
1069 // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask)
1070 // where only the low bits known to be zero in the pointer are changed
1071 Value *InnerPtr;
1072 uint64_t GEPIndex;
1073 uint64_t PtrMaskImmediate;
1075 m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)),
1076 m_ConstantInt(PtrMaskImmediate)))) {
1077
1078 LHSKnown = computeKnownBits(InnerPtr, I, Depth + 1);
1079 if (!LHSKnown.isZero()) {
1080 const unsigned trailingZeros = LHSKnown.countMinTrailingZeros();
1081 uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1;
1082
1083 uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits;
1084 uint64_t MaskedLowBitsGEPIndex =
1085 GEPIndex & PointerAlignBits & PtrMaskImmediate;
1086
1087 uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;
1088
1089 if (MaskedGEPIndex != GEPIndex) {
1090 auto *GEP = cast<GEPOperator>(II->getArgOperand(0));
1091 Builder.SetInsertPoint(I);
1092 Type *GEPIndexType =
1093 DL.getIndexType(GEP->getPointerOperand()->getType());
1094 Value *MaskedGEP = Builder.CreateGEP(
1095 GEP->getSourceElementType(), InnerPtr,
1096 ConstantInt::get(GEPIndexType, MaskedGEPIndex),
1097 GEP->getName(), GEP->isInBounds());
1098
1099 replaceOperand(*I, 0, MaskedGEP);
1100 return I;
1101 }
1102 }
1103 }
1104
1105 break;
1106 }
1107
1108 case Intrinsic::fshr:
1109 case Intrinsic::fshl: {
1110 const APInt *SA;
1111 if (!match(I->getOperand(2), m_APInt(SA)))
1112 break;
1113
1114 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
1115 // defined, so no need to special-case zero shifts here.
1116 uint64_t ShiftAmt = SA->urem(BitWidth);
1117 if (II->getIntrinsicID() == Intrinsic::fshr)
1118 ShiftAmt = BitWidth - ShiftAmt;
1119
1120 APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
1121 APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
1122 if (I->getOperand(0) != I->getOperand(1)) {
1123 if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Q,
1124 Depth + 1) ||
1125 SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Q,
1126 Depth + 1)) {
1127 // Range attribute or metadata may no longer hold.
1128 I->dropPoisonGeneratingAnnotations();
1129 return I;
1130 }
1131 } else { // fshl is a rotate
1132 // Avoid converting rotate into funnel shift.
1133 // Only simplify if one operand is constant.
1134 LHSKnown = computeKnownBits(I->getOperand(0), I, Depth + 1);
1135 if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One) &&
1136 !match(I->getOperand(0), m_SpecificInt(LHSKnown.One))) {
1137 replaceOperand(*I, 0, Constant::getIntegerValue(VTy, LHSKnown.One));
1138 return I;
1139 }
1140
1141 RHSKnown = computeKnownBits(I->getOperand(1), I, Depth + 1);
1142 if (DemandedMaskRHS.isSubsetOf(RHSKnown.Zero | RHSKnown.One) &&
1143 !match(I->getOperand(1), m_SpecificInt(RHSKnown.One))) {
1144 replaceOperand(*I, 1, Constant::getIntegerValue(VTy, RHSKnown.One));
1145 return I;
1146 }
1147 }
1148
1149 LHSKnown <<= ShiftAmt;
1150 RHSKnown >>= BitWidth - ShiftAmt;
1151 Known = LHSKnown.unionWith(RHSKnown);
1152 KnownBitsComputed = true;
1153 break;
1154 }
1155 case Intrinsic::umax: {
1156 // UMax(A, C) == A if ...
1157 // The lowest non-zero bit of DemandMask is higher than the highest
1158 // non-zero bit of C.
1159 const APInt *C;
1160 unsigned CTZ = DemandedMask.countr_zero();
1161 if (match(II->getArgOperand(1), m_APInt(C)) &&
1162 CTZ >= C->getActiveBits())
1163 return II->getArgOperand(0);
1164 break;
1165 }
1166 case Intrinsic::umin: {
1167 // UMin(A, C) == A if ...
1168 // The lowest non-zero bit of DemandMask is higher than the highest
1169 // non-one bit of C.
1170 // This comes from using DeMorgans on the above umax example.
1171 const APInt *C;
1172 unsigned CTZ = DemandedMask.countr_zero();
1173 if (match(II->getArgOperand(1), m_APInt(C)) &&
1174 CTZ >= C->getBitWidth() - C->countl_one())
1175 return II->getArgOperand(0);
1176 break;
1177 }
1178 default: {
1179 // Handle target specific intrinsics
1180 std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
1181 *II, DemandedMask, Known, KnownBitsComputed);
1182 if (V)
1183 return *V;
1184 break;
1185 }
1186 }
1187 }
1188
1189 if (!KnownBitsComputed)
1190 llvm::computeKnownBits(I, Known, Q, Depth);
1191 break;
1192 }
1193 }
1194
1195 if (I->getType()->isPointerTy()) {
1196 Align Alignment = I->getPointerAlignment(DL);
1197 Known.Zero.setLowBits(Log2(Alignment));
1198 }
1199
1200 // If the client is only demanding bits that we know, return the known
1201 // constant. We can't directly simplify pointers as a constant because of
1202 // pointer provenance.
1203 // TODO: We could return `(inttoptr const)` for pointers.
1204 if (!I->getType()->isPointerTy() &&
1205 DemandedMask.isSubsetOf(Known.Zero | Known.One))
1206 return Constant::getIntegerValue(VTy, Known.One);
1207
1208 if (VerifyKnownBits) {
1209 KnownBits ReferenceKnown = llvm::computeKnownBits(I, Q, Depth);
1210 if (Known != ReferenceKnown) {
1211 errs() << "Mismatched known bits for " << *I << " in "
1212 << I->getFunction()->getName() << "\n";
1213 errs() << "computeKnownBits(): " << ReferenceKnown << "\n";
1214 errs() << "SimplifyDemandedBits(): " << Known << "\n";
1215 std::abort();
1216 }
1217 }
1218
1219 return nullptr;
1220}
1221
1222/// Helper routine of SimplifyDemandedUseBits. It computes Known
1223/// bits. It also tries to handle simplifications that can be done based on
1224/// DemandedMask, but without modifying the Instruction.
1226 Instruction *I, const APInt &DemandedMask, KnownBits &Known,
1227 const SimplifyQuery &Q, unsigned Depth) {
1228 unsigned BitWidth = DemandedMask.getBitWidth();
1229 Type *ITy = I->getType();
1230
1231 KnownBits LHSKnown(BitWidth);
1232 KnownBits RHSKnown(BitWidth);
1233
1234 // Despite the fact that we can't simplify this instruction in all User's
1235 // context, we can at least compute the known bits, and we can
1236 // do simplifications that apply to *just* the one user if we know that
1237 // this instruction has a simpler value in that context.
1238 switch (I->getOpcode()) {
1239 case Instruction::And: {
1240 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1241 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1242 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1243 Q, Depth);
1245
1246 // If the client is only demanding bits that we know, return the known
1247 // constant.
1248 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1249 return Constant::getIntegerValue(ITy, Known.One);
1250
1251 // If all of the demanded bits are known 1 on one side, return the other.
1252 // These bits cannot contribute to the result of the 'and' in this context.
1253 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
1254 return I->getOperand(0);
1255 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
1256 return I->getOperand(1);
1257
1258 break;
1259 }
1260 case Instruction::Or: {
1261 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1262 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1263 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1264 Q, Depth);
1266
1267 // If the client is only demanding bits that we know, return the known
1268 // constant.
1269 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1270 return Constant::getIntegerValue(ITy, Known.One);
1271
1272 // We can simplify (X|Y) -> X or Y in the user's context if we know that
1273 // only bits from X or Y are demanded.
1274 // If all of the demanded bits are known zero on one side, return the other.
1275 // These bits cannot contribute to the result of the 'or' in this context.
1276 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
1277 return I->getOperand(0);
1278 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
1279 return I->getOperand(1);
1280
1281 break;
1282 }
1283 case Instruction::Xor: {
1284 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1285 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1286 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
1287 Q, Depth);
1289
1290 // If the client is only demanding bits that we know, return the known
1291 // constant.
1292 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1293 return Constant::getIntegerValue(ITy, Known.One);
1294
1295 // We can simplify (X^Y) -> X or Y in the user's context if we know that
1296 // only bits from X or Y are demanded.
1297 // If all of the demanded bits are known zero on one side, return the other.
1298 if (DemandedMask.isSubsetOf(RHSKnown.Zero))
1299 return I->getOperand(0);
1300 if (DemandedMask.isSubsetOf(LHSKnown.Zero))
1301 return I->getOperand(1);
1302
1303 break;
1304 }
1305 case Instruction::Add: {
1306 unsigned NLZ = DemandedMask.countl_zero();
1307 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
1308
1309 // If an operand adds zeros to every bit below the highest demanded bit,
1310 // that operand doesn't change the result. Return the other side.
1311 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1312 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
1313 return I->getOperand(0);
1314
1315 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1316 if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
1317 return I->getOperand(1);
1318
1319 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1320 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
1321 Known = KnownBits::add(LHSKnown, RHSKnown, NSW, NUW);
1323 break;
1324 }
1325 case Instruction::Sub: {
1326 unsigned NLZ = DemandedMask.countl_zero();
1327 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
1328
1329 // If an operand subtracts zeros from every bit below the highest demanded
1330 // bit, that operand doesn't change the result. Return the other side.
1331 llvm::computeKnownBits(I->getOperand(1), RHSKnown, Q, Depth + 1);
1332 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
1333 return I->getOperand(0);
1334
1335 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1336 bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
1337 llvm::computeKnownBits(I->getOperand(0), LHSKnown, Q, Depth + 1);
1338 Known = KnownBits::sub(LHSKnown, RHSKnown, NSW, NUW);
1340 break;
1341 }
1342 case Instruction::AShr: {
1343 // Compute the Known bits to simplify things downstream.
1344 llvm::computeKnownBits(I, Known, Q, Depth);
1345
1346 // If this user is only demanding bits that we know, return the known
1347 // constant.
1348 if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
1349 return Constant::getIntegerValue(ITy, Known.One);
1350
1351 // If the right shift operand 0 is a result of a left shift by the same
1352 // amount, this is probably a zero/sign extension, which may be unnecessary,
1353 // if we do not demand any of the new sign bits. So, return the original
1354 // operand instead.
1355 const APInt *ShiftRC;
1356 const APInt *ShiftLC;
1357 Value *X;
1358 unsigned BitWidth = DemandedMask.getBitWidth();
1359 if (match(I,
1360 m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) &&
1361 ShiftLC == ShiftRC && ShiftLC->ult(BitWidth) &&
1362 DemandedMask.isSubsetOf(APInt::getLowBitsSet(
1363 BitWidth, BitWidth - ShiftRC->getZExtValue()))) {
1364 return X;
1365 }
1366
1367 break;
1368 }
1369 default:
1370 // Compute the Known bits to simplify things downstream.
1371 llvm::computeKnownBits(I, Known, Q, Depth);
1372
1373 // If this user is only demanding bits that we know, return the known
1374 // constant.
1375 if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
1376 return Constant::getIntegerValue(ITy, Known.One);
1377
1378 break;
1379 }
1380
1381 return nullptr;
1382}
1383
1384/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
1385/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
1386/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
1387/// of "C2-C1".
1388///
1389/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
1390/// ..., bn}, without considering the specific value X is holding.
1391/// This transformation is legal iff one of following conditions is hold:
1392/// 1) All the bit in S are 0, in this case E1 == E2.
1393/// 2) We don't care those bits in S, per the input DemandedMask.
1394/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
1395/// rest bits.
1396///
1397/// Currently we only test condition 2).
1398///
1399/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
1400/// not successful.
1402 Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
1403 const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {
1404 if (!ShlOp1 || !ShrOp1)
1405 return nullptr; // No-op.
1406
1407 Value *VarX = Shr->getOperand(0);
1408 Type *Ty = VarX->getType();
1409 unsigned BitWidth = Ty->getScalarSizeInBits();
1410 if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
1411 return nullptr; // Undef.
1412
1413 unsigned ShlAmt = ShlOp1.getZExtValue();
1414 unsigned ShrAmt = ShrOp1.getZExtValue();
1415
1416 Known.One.clearAllBits();
1417 Known.Zero.setLowBits(ShlAmt - 1);
1418 Known.Zero &= DemandedMask;
1419
1420 APInt BitMask1(APInt::getAllOnes(BitWidth));
1421 APInt BitMask2(APInt::getAllOnes(BitWidth));
1422
1423 bool isLshr = (Shr->getOpcode() == Instruction::LShr);
1424 BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
1425 (BitMask1.ashr(ShrAmt) << ShlAmt);
1426
1427 if (ShrAmt <= ShlAmt) {
1428 BitMask2 <<= (ShlAmt - ShrAmt);
1429 } else {
1430 BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
1431 BitMask2.ashr(ShrAmt - ShlAmt);
1432 }
1433
1434 // Check if condition-2 (see the comment to this function) is satified.
1435 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
1436 if (ShrAmt == ShlAmt)
1437 return VarX;
1438
1439 if (!Shr->hasOneUse())
1440 return nullptr;
1441
1442 BinaryOperator *New;
1443 if (ShrAmt < ShlAmt) {
1444 Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
1445 New = BinaryOperator::CreateShl(VarX, Amt);
1447 New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
1448 New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
1449 } else {
1450 Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
1451 New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
1452 BinaryOperator::CreateAShr(VarX, Amt);
1453 if (cast<BinaryOperator>(Shr)->isExact())
1454 New->setIsExact(true);
1455 }
1456
1457 return InsertNewInstWith(New, Shl->getIterator());
1458 }
1459
1460 return nullptr;
1461}
1462
1463/// The specified value produces a vector with any number of elements.
1464/// This method analyzes which elements of the operand are poison and
1465/// returns that information in PoisonElts.
1466///
1467/// DemandedElts contains the set of elements that are actually used by the
1468/// caller, and by default (AllowMultipleUsers equals false) the value is
1469/// simplified only if it has a single caller. If AllowMultipleUsers is set
1470/// to true, DemandedElts refers to the union of sets of elements that are
1471/// used by all callers.
1472///
1473/// If the information about demanded elements can be used to simplify the
1474/// operation, the operation is simplified, then the resultant value is
1475/// returned. This returns null if no change was made.
1477 APInt DemandedElts,
1478 APInt &PoisonElts,
1479 unsigned Depth,
1480 bool AllowMultipleUsers) {
1481 // Cannot analyze scalable type. The number of vector elements is not a
1482 // compile-time constant.
1483 if (isa<ScalableVectorType>(V->getType()))
1484 return nullptr;
1485
1486 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
1487 APInt EltMask(APInt::getAllOnes(VWidth));
1488 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
1489
1490 if (match(V, m_Poison())) {
1491 // If the entire vector is poison, just return this info.
1492 PoisonElts = EltMask;
1493 return nullptr;
1494 }
1495
1496 if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
1497 PoisonElts = EltMask;
1498 return PoisonValue::get(V->getType());
1499 }
1500
1501 PoisonElts = 0;
1502
1503 if (auto *C = dyn_cast<Constant>(V)) {
1504 // Check if this is identity. If so, return 0 since we are not simplifying
1505 // anything.
1506 if (DemandedElts.isAllOnes())
1507 return nullptr;
1508
1509 Type *EltTy = cast<VectorType>(V->getType())->getElementType();
1512 for (unsigned i = 0; i != VWidth; ++i) {
1513 if (!DemandedElts[i]) { // If not demanded, set to poison.
1514 Elts.push_back(Poison);
1515 PoisonElts.setBit(i);
1516 continue;
1517 }
1518
1519 Constant *Elt = C->getAggregateElement(i);
1520 if (!Elt) return nullptr;
1521
1522 Elts.push_back(Elt);
1523 if (isa<PoisonValue>(Elt)) // Already poison.
1524 PoisonElts.setBit(i);
1525 }
1526
1527 // If we changed the constant, return it.
1528 Constant *NewCV = ConstantVector::get(Elts);
1529 return NewCV != C ? NewCV : nullptr;
1530 }
1531
1532 // Limit search depth.
1534 return nullptr;
1535
1536 if (!AllowMultipleUsers) {
1537 // If multiple users are using the root value, proceed with
1538 // simplification conservatively assuming that all elements
1539 // are needed.
1540 if (!V->hasOneUse()) {
1541 // Quit if we find multiple users of a non-root value though.
1542 // They'll be handled when it's their turn to be visited by
1543 // the main instcombine process.
1544 if (Depth != 0)
1545 // TODO: Just compute the PoisonElts information recursively.
1546 return nullptr;
1547
1548 // Conservatively assume that all elements are needed.
1549 DemandedElts = EltMask;
1550 }
1551 }
1552
1554 if (!I) return nullptr; // Only analyze instructions.
1555
1556 bool MadeChange = false;
1557 auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
1558 APInt Demanded, APInt &Undef) {
1559 auto *II = dyn_cast<IntrinsicInst>(Inst);
1560 Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
1561 if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
1562 replaceOperand(*Inst, OpNum, V);
1563 MadeChange = true;
1564 }
1565 };
1566
1567 APInt PoisonElts2(VWidth, 0);
1568 APInt PoisonElts3(VWidth, 0);
1569 switch (I->getOpcode()) {
1570 default: break;
1571
1572 case Instruction::GetElementPtr: {
1573 // The LangRef requires that struct geps have all constant indices. As
1574 // such, we can't convert any operand to partial undef.
1575 auto mayIndexStructType = [](GetElementPtrInst &GEP) {
1576 for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
1577 I != E; I++)
1578 if (I.isStruct())
1579 return true;
1580 return false;
1581 };
1582 if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
1583 break;
1584
1585 // Conservatively track the demanded elements back through any vector
1586 // operands we may have. We know there must be at least one, or we
1587 // wouldn't have a vector result to get here. Note that we intentionally
1588 // merge the undef bits here since gepping with either an poison base or
1589 // index results in poison.
1590 for (unsigned i = 0; i < I->getNumOperands(); i++) {
1591 if (i == 0 ? match(I->getOperand(i), m_Undef())
1592 : match(I->getOperand(i), m_Poison())) {
1593 // If the entire vector is undefined, just return this info.
1594 PoisonElts = EltMask;
1595 return nullptr;
1596 }
1597 if (I->getOperand(i)->getType()->isVectorTy()) {
1598 APInt PoisonEltsOp(VWidth, 0);
1599 simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp);
1600 // gep(x, undef) is not undef, so skip considering idx ops here
1601 // Note that we could propagate poison, but we can't distinguish between
1602 // undef & poison bits ATM
1603 if (i == 0)
1604 PoisonElts |= PoisonEltsOp;
1605 }
1606 }
1607
1608 break;
1609 }
1610 case Instruction::InsertElement: {
1611 // If this is a variable index, we don't know which element it overwrites.
1612 // demand exactly the same input as we produce.
1613 ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
1614 if (!Idx) {
1615 // Note that we can't propagate undef elt info, because we don't know
1616 // which elt is getting updated.
1617 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2);
1618 break;
1619 }
1620
1621 // The element inserted overwrites whatever was there, so the input demanded
1622 // set is simpler than the output set.
1623 unsigned IdxNo = Idx->getZExtValue();
1624 APInt PreInsertDemandedElts = DemandedElts;
1625 if (IdxNo < VWidth)
1626 PreInsertDemandedElts.clearBit(IdxNo);
1627
1628 // If we only demand the element that is being inserted and that element
1629 // was extracted from the same index in another vector with the same type,
1630 // replace this insert with that other vector.
1631 // Note: This is attempted before the call to simplifyAndSetOp because that
1632 // may change PoisonElts to a value that does not match with Vec.
1633 Value *Vec;
1634 if (PreInsertDemandedElts == 0 &&
1635 match(I->getOperand(1),
1636 m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) &&
1637 Vec->getType() == I->getType()) {
1638 return Vec;
1639 }
1640
1641 simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts);
1642
1643 // If this is inserting an element that isn't demanded, remove this
1644 // insertelement.
1645 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
1646 Worklist.push(I);
1647 return I->getOperand(0);
1648 }
1649
1650 // The inserted element is defined.
1651 PoisonElts.clearBit(IdxNo);
1652 break;
1653 }
1654 case Instruction::ShuffleVector: {
1655 auto *Shuffle = cast<ShuffleVectorInst>(I);
1656 assert(Shuffle->getOperand(0)->getType() ==
1657 Shuffle->getOperand(1)->getType() &&
1658 "Expected shuffle operands to have same type");
1659 unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType())
1660 ->getNumElements();
1661 // Handle trivial case of a splat. Only check the first element of LHS
1662 // operand.
1663 if (all_of(Shuffle->getShuffleMask(), equal_to(0)) &&
1664 DemandedElts.isAllOnes()) {
1665 if (!isa<PoisonValue>(I->getOperand(1))) {
1666 I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType()));
1667 MadeChange = true;
1668 }
1669 APInt LeftDemanded(OpWidth, 1);
1670 APInt LHSPoisonElts(OpWidth, 0);
1671 simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
1672 if (LHSPoisonElts[0])
1673 PoisonElts = EltMask;
1674 else
1675 PoisonElts.clearAllBits();
1676 break;
1677 }
1678
1679 APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);
1680 for (unsigned i = 0; i < VWidth; i++) {
1681 if (DemandedElts[i]) {
1682 unsigned MaskVal = Shuffle->getMaskValue(i);
1683 if (MaskVal != -1u) {
1684 assert(MaskVal < OpWidth * 2 &&
1685 "shufflevector mask index out of range!");
1686 if (MaskVal < OpWidth)
1687 LeftDemanded.setBit(MaskVal);
1688 else
1689 RightDemanded.setBit(MaskVal - OpWidth);
1690 }
1691 }
1692 }
1693
1694 APInt LHSPoisonElts(OpWidth, 0);
1695 simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
1696
1697 APInt RHSPoisonElts(OpWidth, 0);
1698 simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts);
1699
1700 // If this shuffle does not change the vector length and the elements
1701 // demanded by this shuffle are an identity mask, then this shuffle is
1702 // unnecessary.
1703 //
1704 // We are assuming canonical form for the mask, so the source vector is
1705 // operand 0 and operand 1 is not used.
1706 //
1707 // Note that if an element is demanded and this shuffle mask is undefined
1708 // for that element, then the shuffle is not considered an identity
1709 // operation. The shuffle prevents poison from the operand vector from
1710 // leaking to the result by replacing poison with an undefined value.
1711 if (VWidth == OpWidth) {
1712 bool IsIdentityShuffle = true;
1713 for (unsigned i = 0; i < VWidth; i++) {
1714 unsigned MaskVal = Shuffle->getMaskValue(i);
1715 if (DemandedElts[i] && i != MaskVal) {
1716 IsIdentityShuffle = false;
1717 break;
1718 }
1719 }
1720 if (IsIdentityShuffle)
1721 return Shuffle->getOperand(0);
1722 }
1723
1724 bool NewPoisonElts = false;
1725 unsigned LHSIdx = -1u, LHSValIdx = -1u;
1726 unsigned RHSIdx = -1u, RHSValIdx = -1u;
1727 bool LHSUniform = true;
1728 bool RHSUniform = true;
1729 for (unsigned i = 0; i < VWidth; i++) {
1730 unsigned MaskVal = Shuffle->getMaskValue(i);
1731 if (MaskVal == -1u) {
1732 PoisonElts.setBit(i);
1733 } else if (!DemandedElts[i]) {
1734 NewPoisonElts = true;
1735 PoisonElts.setBit(i);
1736 } else if (MaskVal < OpWidth) {
1737 if (LHSPoisonElts[MaskVal]) {
1738 NewPoisonElts = true;
1739 PoisonElts.setBit(i);
1740 } else {
1741 LHSIdx = LHSIdx == -1u ? i : OpWidth;
1742 LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
1743 LHSUniform = LHSUniform && (MaskVal == i);
1744 }
1745 } else {
1746 if (RHSPoisonElts[MaskVal - OpWidth]) {
1747 NewPoisonElts = true;
1748 PoisonElts.setBit(i);
1749 } else {
1750 RHSIdx = RHSIdx == -1u ? i : OpWidth;
1751 RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
1752 RHSUniform = RHSUniform && (MaskVal - OpWidth == i);
1753 }
1754 }
1755 }
1756
1757 // Try to transform shuffle with constant vector and single element from
1758 // this constant vector to single insertelement instruction.
1759 // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1760 // insertelement V, C[ci], ci-n
1761 if (OpWidth ==
1762 cast<FixedVectorType>(Shuffle->getType())->getNumElements()) {
1763 Value *Op = nullptr;
1764 Constant *Value = nullptr;
1765 unsigned Idx = -1u;
1766
1767 // Find constant vector with the single element in shuffle (LHS or RHS).
1768 if (LHSIdx < OpWidth && RHSUniform) {
1769 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
1770 Op = Shuffle->getOperand(1);
1771 Value = CV->getOperand(LHSValIdx);
1772 Idx = LHSIdx;
1773 }
1774 }
1775 if (RHSIdx < OpWidth && LHSUniform) {
1776 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
1777 Op = Shuffle->getOperand(0);
1778 Value = CV->getOperand(RHSValIdx);
1779 Idx = RHSIdx;
1780 }
1781 }
1782 // Found constant vector with single element - convert to insertelement.
1783 if (Op && Value) {
1785 Op, Value, ConstantInt::get(Type::getInt64Ty(I->getContext()), Idx),
1786 Shuffle->getName());
1787 InsertNewInstWith(New, Shuffle->getIterator());
1788 return New;
1789 }
1790 }
1791 if (NewPoisonElts) {
1792 // Add additional discovered undefs.
1794 for (unsigned i = 0; i < VWidth; ++i) {
1795 if (PoisonElts[i])
1797 else
1798 Elts.push_back(Shuffle->getMaskValue(i));
1799 }
1800 Shuffle->setShuffleMask(Elts);
1801 MadeChange = true;
1802 }
1803 break;
1804 }
1805 case Instruction::Select: {
1806 // If this is a vector select, try to transform the select condition based
1807 // on the current demanded elements.
1809 if (Sel->getCondition()->getType()->isVectorTy()) {
1810 // TODO: We are not doing anything with PoisonElts based on this call.
1811 // It is overwritten below based on the other select operands. If an
1812 // element of the select condition is known undef, then we are free to
1813 // choose the output value from either arm of the select. If we know that
1814 // one of those values is undef, then the output can be undef.
1815 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1816 }
1817
1818 // Next, see if we can transform the arms of the select.
1819 APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
1820 if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
1821 for (unsigned i = 0; i < VWidth; i++) {
1822 Constant *CElt = CV->getAggregateElement(i);
1823
1824 // isNullValue() always returns false when called on a ConstantExpr.
1825 if (CElt->isNullValue())
1826 DemandedLHS.clearBit(i);
1827 else if (CElt->isOneValue())
1828 DemandedRHS.clearBit(i);
1829 }
1830 }
1831
1832 simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2);
1833 simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3);
1834
1835 // Output elements are undefined if the element from each arm is undefined.
1836 // TODO: This can be improved. See comment in select condition handling.
1837 PoisonElts = PoisonElts2 & PoisonElts3;
1838 break;
1839 }
1840 case Instruction::BitCast: {
1841 // Vector->vector casts only.
1842 VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
1843 if (!VTy) break;
1844 unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();
1845 APInt InputDemandedElts(InVWidth, 0);
1846 PoisonElts2 = APInt(InVWidth, 0);
1847 unsigned Ratio;
1848
1849 if (VWidth == InVWidth) {
1850 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1851 // elements as are demanded of us.
1852 Ratio = 1;
1853 InputDemandedElts = DemandedElts;
1854 } else if ((VWidth % InVWidth) == 0) {
1855 // If the number of elements in the output is a multiple of the number of
1856 // elements in the input then an input element is live if any of the
1857 // corresponding output elements are live.
1858 Ratio = VWidth / InVWidth;
1859 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1860 if (DemandedElts[OutIdx])
1861 InputDemandedElts.setBit(OutIdx / Ratio);
1862 } else if ((InVWidth % VWidth) == 0) {
1863 // If the number of elements in the input is a multiple of the number of
1864 // elements in the output then an input element is live if the
1865 // corresponding output element is live.
1866 Ratio = InVWidth / VWidth;
1867 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1868 if (DemandedElts[InIdx / Ratio])
1869 InputDemandedElts.setBit(InIdx);
1870 } else {
1871 // Unsupported so far.
1872 break;
1873 }
1874
1875 simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2);
1876
1877 if (VWidth == InVWidth) {
1878 PoisonElts = PoisonElts2;
1879 } else if ((VWidth % InVWidth) == 0) {
1880 // If the number of elements in the output is a multiple of the number of
1881 // elements in the input then an output element is undef if the
1882 // corresponding input element is undef.
1883 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1884 if (PoisonElts2[OutIdx / Ratio])
1885 PoisonElts.setBit(OutIdx);
1886 } else if ((InVWidth % VWidth) == 0) {
1887 // If the number of elements in the input is a multiple of the number of
1888 // elements in the output then an output element is undef if all of the
1889 // corresponding input elements are undef.
1890 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
1891 APInt SubUndef = PoisonElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
1892 if (SubUndef.popcount() == Ratio)
1893 PoisonElts.setBit(OutIdx);
1894 }
1895 } else {
1896 llvm_unreachable("Unimp");
1897 }
1898 break;
1899 }
1900 case Instruction::FPTrunc:
1901 case Instruction::FPExt:
1902 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1903 break;
1904
1905 case Instruction::Call: {
1907 if (!II) break;
1908 switch (II->getIntrinsicID()) {
1909 case Intrinsic::masked_gather: // fallthrough
1910 case Intrinsic::masked_load: {
1911 // Subtlety: If we load from a pointer, the pointer must be valid
1912 // regardless of whether the element is demanded. Doing otherwise risks
1913 // segfaults which didn't exist in the original program.
1914 APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
1915 DemandedPassThrough(DemandedElts);
1916 if (auto *CMask = dyn_cast<Constant>(II->getOperand(1))) {
1917 for (unsigned i = 0; i < VWidth; i++) {
1918 if (Constant *CElt = CMask->getAggregateElement(i)) {
1919 if (CElt->isNullValue())
1920 DemandedPtrs.clearBit(i);
1921 else if (CElt->isAllOnesValue())
1922 DemandedPassThrough.clearBit(i);
1923 }
1924 }
1925 }
1926
1927 if (II->getIntrinsicID() == Intrinsic::masked_gather)
1928 simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
1929 simplifyAndSetOp(II, 2, DemandedPassThrough, PoisonElts3);
1930
1931 // Output elements are undefined if the element from both sources are.
1932 // TODO: can strengthen via mask as well.
1933 PoisonElts = PoisonElts2 & PoisonElts3;
1934 break;
1935 }
1936 default: {
1937 // Handle target specific intrinsics
1938 std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
1939 *II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
1940 simplifyAndSetOp);
1941 if (V)
1942 return *V;
1943 break;
1944 }
1945 } // switch on IntrinsicID
1946 break;
1947 } // case Call
1948 } // switch on Opcode
1949
1950 // TODO: We bail completely on integer div/rem and shifts because they have
1951 // UB/poison potential, but that should be refined.
1952 BinaryOperator *BO;
1953 if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
1954 Value *X = BO->getOperand(0);
1955 Value *Y = BO->getOperand(1);
1956
1957 // Look for an equivalent binop except that one operand has been shuffled.
1958 // If the demand for this binop only includes elements that are the same as
1959 // the other binop, then we may be able to replace this binop with a use of
1960 // the earlier one.
1961 //
1962 // Example:
1963 // %other_bo = bo (shuf X, {0}), Y
1964 // %this_extracted_bo = extelt (bo X, Y), 0
1965 // -->
1966 // %other_bo = bo (shuf X, {0}), Y
1967 // %this_extracted_bo = extelt %other_bo, 0
1968 //
1969 // TODO: Handle demand of an arbitrary single element or more than one
1970 // element instead of just element 0.
1971 // TODO: Unlike general demanded elements transforms, this should be safe
1972 // for any (div/rem/shift) opcode too.
1973 if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() &&
1974 BO->hasOneUse() ) {
1975
1976 auto findShufBO = [&](bool MatchShufAsOp0) -> User * {
1977 // Try to use shuffle-of-operand in place of an operand:
1978 // bo X, Y --> bo (shuf X), Y
1979 // bo X, Y --> bo X, (shuf Y)
1980
1981 Value *OtherOp = MatchShufAsOp0 ? Y : X;
1982 if (!OtherOp->hasUseList())
1983 return nullptr;
1984
1985 BinaryOperator::BinaryOps Opcode = BO->getOpcode();
1986 Value *ShufOp = MatchShufAsOp0 ? X : Y;
1987
1988 for (User *U : OtherOp->users()) {
1989 ArrayRef<int> Mask;
1990 auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_Mask(Mask));
1991 if (BO->isCommutative()
1992 ? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
1993 : MatchShufAsOp0
1994 ? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
1995 : match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf)))
1996 if (match(Mask, m_ZeroMask()) && Mask[0] != PoisonMaskElem)
1997 if (DT.dominates(U, I))
1998 return U;
1999 }
2000 return nullptr;
2001 };
2002
2003 if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true))
2004 return ShufBO;
2005 if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false))
2006 return ShufBO;
2007 }
2008
2009 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
2010 simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2);
2011
2012 // Output elements are undefined if both are undefined. Consider things
2013 // like undef & 0. The result is known zero, not undef.
2014 PoisonElts &= PoisonElts2;
2015 }
2016
2017 // If we've proven all of the lanes poison, return a poison value.
2018 // TODO: Intersect w/demanded lanes
2019 if (PoisonElts.isAllOnes())
2020 return PoisonValue::get(I->getType());
2021
2022 return MadeChange ? I : nullptr;
2023}
2024
2025/// For floating-point classes that resolve to a single bit pattern, return that
2026/// value.
2028 bool IsCanonicalizing = false) {
2029 if (Mask == fcNone)
2030 return PoisonValue::get(Ty);
2031
2032 if (Mask == fcPosZero)
2033 return Constant::getNullValue(Ty);
2034
2035 // TODO: Support aggregate types that are allowed by FPMathOperator.
2036 if (Ty->isAggregateType())
2037 return nullptr;
2038
2039 // Turn any possible snans into quiet if we can.
2040 if (Mask == fcNan && IsCanonicalizing)
2041 return ConstantFP::getQNaN(Ty);
2042
2043 switch (Mask) {
2044 case fcNegZero:
2045 return ConstantFP::getZero(Ty, true);
2046 case fcPosInf:
2047 return ConstantFP::getInfinity(Ty);
2048 case fcNegInf:
2049 return ConstantFP::getInfinity(Ty, true);
2050 case fcQNan:
2051 // Payload bits cannot be dropped for pure signbit operations.
2052 return IsCanonicalizing ? ConstantFP::getQNaN(Ty) : nullptr;
2053 default:
2054 return nullptr;
2055 }
2056}
2057
2058/// Perform multiple-use aware simplfications for fabs(\p Src). Returns a
2059/// replacement value if it's simplified, otherwise nullptr. Updates \p Known
2060/// with the known fpclass if not simplified.
2062 FPClassTest DemandedMask,
2063 KnownFPClass KnownSrc, bool NSZ) {
2064 if ((DemandedMask & fcNan) == fcNone)
2065 KnownSrc.knownNot(fcNan);
2066 if ((DemandedMask & fcInf) == fcNone)
2067 KnownSrc.knownNot(fcInf);
2068
2069 if (KnownSrc.SignBit == false ||
2070 ((DemandedMask & fcNan) == fcNone && KnownSrc.isKnownNever(fcNegative)))
2071 return Src;
2072
2073 // If the only sign bit difference is due to -0, ignore it with nsz
2074 if (NSZ &&
2076 return Src;
2077
2078 Known = KnownFPClass::fabs(KnownSrc);
2079 Known.knownNot(~DemandedMask);
2080 return nullptr;
2081}
2082
2083/// Try to set an inferred no-nans or no-infs in \p FMF. \p ValidResults is a
2084/// mask of known valid results for the operator (already computed from the
2085/// result, and the known operand inputs in \p Known)
2087 FPClassTest ValidResults,
2088 ArrayRef<KnownFPClass> Known) {
2089 if (!FMF.noNaNs() && (ValidResults & fcNan) == fcNone) {
2090 if (all_of(Known, [](const KnownFPClass KnownSrc) {
2091 return KnownSrc.isKnownNeverNaN();
2092 }))
2093 FMF.setNoNaNs();
2094 }
2095
2096 if (!FMF.noInfs() && (ValidResults & fcInf) == fcNone) {
2097 if (all_of(Known, [](const KnownFPClass KnownSrc) {
2098 return KnownSrc.isKnownNeverInfinity();
2099 }))
2100 FMF.setNoInfs();
2101 }
2102
2103 return FMF;
2104}
2105
2107 FastMathFlags FMF) {
2108 if (FMF.noNaNs())
2109 DemandedMask &= ~fcNan;
2110
2111 if (FMF.noInfs())
2112 DemandedMask &= ~fcInf;
2113 return DemandedMask;
2114}
2115
2116/// Apply epilog fixups to a floating-point intrinsic. See if the result can
2117/// fold to a constant, or apply fast math flags.
2119 FastMathFlags FMF,
2120 FPClassTest DemandedMask,
2121 KnownFPClass &Known,
2122 ArrayRef<KnownFPClass> KnownSrcs) {
2123 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2124 Constant *SingleVal = getFPClassConstant(FPOp->getType(), ValidResults,
2125 /*IsCanonicalizing=*/true);
2126 if (SingleVal)
2127 return SingleVal;
2128
2129 FastMathFlags InferredFMF =
2130 inferFastMathValueFlags(FMF, ValidResults, KnownSrcs);
2131 if (InferredFMF != FMF) {
2133 FPOp->setFastMathFlags(InferredFMF);
2134 return FPOp;
2135 }
2136
2137 return nullptr;
2138}
2139
2140/// Perform multiple-use aware simplfications for fneg(fabs(\p Src)). Returns a
2141/// replacement value if it's simplified, otherwise nullptr. Updates \p Known
2142/// with the known fpclass if not simplified.
2144 FPClassTest DemandedMask,
2145 KnownFPClass KnownSrc, bool NSZ) {
2146 if ((DemandedMask & fcNan) == fcNone)
2147 KnownSrc.knownNot(fcNan);
2148 if ((DemandedMask & fcInf) == fcNone)
2149 KnownSrc.knownNot(fcInf);
2150
2151 // If the source value is known negative, we can directly fold to it.
2152 if (KnownSrc.SignBit == true)
2153 return Src;
2154
2155 // If the only sign bit difference is for 0, ignore it with nsz.
2156 if (NSZ &&
2158 return Src;
2159
2160 Known = KnownFPClass::fneg(KnownFPClass::fabs(KnownSrc));
2161 Known.knownNot(~DemandedMask);
2162 return nullptr;
2163}
2164
2166 FPClassTest DemandedMask,
2167 KnownFPClass KnownSrc,
2168 bool NSZ) {
2169 if (NSZ) {
2170 constexpr FPClassTest NegOrZero = fcNegative | fcPosZero;
2171 constexpr FPClassTest PosOrZero = fcPositive | fcNegZero;
2172
2173 if ((DemandedMask & ~NegOrZero) == fcNone &&
2174 KnownSrc.isKnownAlways(NegOrZero))
2175 return MagSrc;
2176
2177 if ((DemandedMask & ~PosOrZero) == fcNone &&
2178 KnownSrc.isKnownAlways(PosOrZero))
2179 return MagSrc;
2180 } else {
2181 if ((DemandedMask & ~fcNegative) == fcNone && KnownSrc.SignBit == true)
2182 return MagSrc;
2183
2184 if ((DemandedMask & ~fcPositive) == fcNone && KnownSrc.SignBit == false)
2185 return MagSrc;
2186 }
2187
2188 return nullptr;
2189}
2190
2191static Value *
2193 const CallInst *CI, FPClassTest DemandedMask,
2194 KnownFPClass KnownLHS, KnownFPClass KnownRHS,
2195 const Function &F, bool NSZ) {
2196 bool OrderedZeroSign = !NSZ;
2197
2199 switch (IID) {
2200 case Intrinsic::maximum: {
2202
2203 // If one operand is known greater than the other, it must be that
2204 // operand unless the other is a nan.
2206 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2207 KnownRHS.isKnownNever(fcNan))
2208 return CI->getArgOperand(0);
2209
2211 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2212 KnownLHS.isKnownNever(fcNan))
2213 return CI->getArgOperand(1);
2214
2215 break;
2216 }
2217 case Intrinsic::minimum: {
2219
2220 // If one operand is known less than the other, it must be that operand
2221 // unless the other is a nan.
2223 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2224 KnownRHS.isKnownNever(fcNan))
2225 return CI->getArgOperand(0);
2226
2228 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2229 KnownLHS.isKnownNever(fcNan))
2230 return CI->getArgOperand(1);
2231
2232 break;
2233 }
2234 case Intrinsic::maxnum:
2235 case Intrinsic::maximumnum: {
2236 OpKind = IID == Intrinsic::maxnum ? KnownFPClass::MinMaxKind::maxnum
2238
2240 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2241 KnownLHS.isKnownNever(fcNan))
2242 return CI->getArgOperand(0);
2243
2245 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2246 KnownRHS.isKnownNever(fcNan))
2247 return CI->getArgOperand(1);
2248
2249 break;
2250 }
2251 case Intrinsic::minnum:
2252 case Intrinsic::minimumnum: {
2253 OpKind = IID == Intrinsic::minnum ? KnownFPClass::MinMaxKind::minnum
2255
2257 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2258 KnownLHS.isKnownNever(fcNan))
2259 return CI->getArgOperand(0);
2260
2262 KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2263 KnownRHS.isKnownNever(fcNan))
2264 return CI->getArgOperand(1);
2265
2266 break;
2267 }
2268 default:
2269 llvm_unreachable("not a min/max intrinsic");
2270 }
2271
2272 Type *EltTy = CI->getType()->getScalarType();
2273 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2274 Known = KnownFPClass::minMaxLike(KnownLHS, KnownRHS, OpKind, Mode);
2275 Known.knownNot(~DemandedMask);
2276
2277 return getFPClassConstant(CI->getType(), Known.KnownFPClasses,
2278 /*IsCanonicalizing=*/true);
2279}
2280
2281static Value *
2283 FastMathFlags FMF, FPClassTest DemandedMask,
2284 KnownFPClass &Known, unsigned Depth) {
2285
2286 FPClassTest SrcDemandedMask = DemandedMask;
2287 if (DemandedMask & fcNan)
2288 SrcDemandedMask |= fcNan;
2289
2290 // Zero results may have been rounded from subnormal or normal sources.
2291 if (DemandedMask & fcNegZero)
2292 SrcDemandedMask |= fcNegSubnormal | fcNegNormal;
2293 if (DemandedMask & fcPosZero)
2294 SrcDemandedMask |= fcPosSubnormal | fcPosNormal;
2295
2296 // Subnormal results may have been normal in the source type
2297 if (DemandedMask & fcNegSubnormal)
2298 SrcDemandedMask |= fcNegNormal;
2299 if (DemandedMask & fcPosSubnormal)
2300 SrcDemandedMask |= fcPosNormal;
2301
2302 if (DemandedMask & fcPosInf)
2303 SrcDemandedMask |= fcPosNormal;
2304 if (DemandedMask & fcNegInf)
2305 SrcDemandedMask |= fcNegNormal;
2306
2307 KnownFPClass KnownSrc;
2308 if (IC.SimplifyDemandedFPClass(&I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
2309 return &I;
2310
2311 Known = KnownFPClass::fptrunc(KnownSrc);
2312 Known.knownNot(~DemandedMask);
2313
2314 return simplifyDemandedFPClassResult(&I, FMF, DemandedMask, Known,
2315 {KnownSrc});
2316}
2317
2319 FPClassTest DemandedMask,
2320 KnownFPClass &Known,
2321 Instruction *CxtI,
2322 unsigned Depth) {
2323 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2324 assert(Known == KnownFPClass() && "expected uninitialized state");
2325
2326 Type *VTy = I->getType();
2327
2328 FastMathFlags FMF;
2329 if (auto *FPOp = dyn_cast<FPMathOperator>(I)) {
2330 FMF = FPOp->getFastMathFlags();
2331 DemandedMask = adjustDemandedMaskFromFlags(DemandedMask, FMF);
2332 }
2333
2334 switch (I->getOpcode()) {
2335 case Instruction::FNeg: {
2336 // Special case fneg(fabs(x))
2337
2338 Value *FNegSrc = I->getOperand(0);
2339 Value *FNegFAbsSrc;
2340 if (match(FNegSrc, m_OneUse(m_FAbs(m_Value(FNegFAbsSrc))))) {
2341 KnownFPClass KnownSrc;
2343 llvm::unknown_sign(DemandedMask), KnownSrc,
2344 Depth + 1))
2345 return I;
2346
2347 FastMathFlags FabsFMF = cast<FPMathOperator>(FNegSrc)->getFastMathFlags();
2348 FPClassTest ThisDemandedMask =
2349 adjustDemandedMaskFromFlags(DemandedMask, FabsFMF);
2350
2351 bool IsNSZ = FMF.noSignedZeros() || FabsFMF.noSignedZeros();
2352 if (Value *Simplified = simplifyDemandedFPClassFnegFabs(
2353 Known, FNegFAbsSrc, ThisDemandedMask, KnownSrc, IsNSZ))
2354 return Simplified;
2355
2356 if ((ThisDemandedMask & fcNan) == fcNone)
2357 KnownSrc.knownNot(fcNan);
2358 if ((ThisDemandedMask & fcInf) == fcNone)
2359 KnownSrc.knownNot(fcInf);
2360
2361 // fneg(fabs(x)) => fneg(x)
2362 if (KnownSrc.SignBit == false)
2363 return replaceOperand(*I, 0, FNegFAbsSrc);
2364
2365 // fneg(fabs(x)) => fneg(x), ignoring -0 if nsz.
2366 if (IsNSZ &&
2368 return replaceOperand(*I, 0, FNegFAbsSrc);
2369
2370 break;
2371 }
2372
2373 if (SimplifyDemandedFPClass(I, 0, llvm::fneg(DemandedMask), Known,
2374 Depth + 1))
2375 return I;
2376 Known.fneg();
2377 Known.knownNot(~DemandedMask);
2378 break;
2379 }
2380 case Instruction::FAdd:
2381 case Instruction::FSub: {
2382 KnownFPClass KnownLHS, KnownRHS;
2383
2384 // fadd x, x can be handled more aggressively.
2385 if (I->getOperand(0) == I->getOperand(1) &&
2386 I->getOpcode() == Instruction::FAdd &&
2387 isGuaranteedNotToBeUndef(I->getOperand(0), SQ.AC, CxtI, SQ.DT,
2388 Depth + 1)) {
2389 Type *EltTy = VTy->getScalarType();
2390 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2391
2392 FPClassTest SrcDemandedMask = DemandedMask;
2393 if (DemandedMask & fcNan)
2394 SrcDemandedMask |= fcNan;
2395
2396 // Doubling a subnormal could have resulted in a normal value.
2397 if (DemandedMask & fcPosNormal)
2398 SrcDemandedMask |= fcPosSubnormal;
2399 if (DemandedMask & fcNegNormal)
2400 SrcDemandedMask |= fcNegSubnormal;
2401
2402 // Doubling a subnormal may produce 0 if FTZ/DAZ.
2403 if (Mode != DenormalMode::getIEEE()) {
2404 if (DemandedMask & fcPosZero) {
2405 SrcDemandedMask |= fcPosSubnormal;
2406
2407 if (Mode.inputsMayBePositiveZero() || Mode.outputsMayBePositiveZero())
2408 SrcDemandedMask |= fcNegSubnormal;
2409 }
2410
2411 if (DemandedMask & fcNegZero)
2412 SrcDemandedMask |= fcNegSubnormal;
2413 }
2414
2415 // Doubling a normal could have resulted in an infinity.
2416 if (DemandedMask & fcPosInf)
2417 SrcDemandedMask |= fcPosNormal;
2418 if (DemandedMask & fcNegInf)
2419 SrcDemandedMask |= fcNegNormal;
2420
2421 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2422 return I;
2423
2424 Known = KnownFPClass::fadd_self(KnownLHS, Mode);
2425 KnownRHS = KnownLHS;
2426 } else {
2427 FPClassTest SrcDemandedMask = fcFinite;
2428
2429 // inf + (-inf) = nan
2430 if (DemandedMask & fcNan)
2431 SrcDemandedMask |= fcNan | fcInf;
2432
2433 if (DemandedMask & fcInf)
2434 SrcDemandedMask |= fcInf;
2435
2436 if (SimplifyDemandedFPClass(I, 1, SrcDemandedMask, KnownRHS, Depth + 1) ||
2437 SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2438 return I;
2439
2440 Type *EltTy = VTy->getScalarType();
2441 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2442
2443 Known = I->getOpcode() == Instruction::FAdd
2444 ? KnownFPClass::fadd(KnownLHS, KnownRHS, Mode)
2445 : KnownFPClass::fsub(KnownLHS, KnownRHS, Mode);
2446 }
2447
2448 Known.knownNot(~DemandedMask);
2449
2450 if (Constant *SingleVal = getFPClassConstant(VTy, Known.KnownFPClasses,
2451 /*IsCanonicalizing=*/true))
2452 return SingleVal;
2453
2454 // Propagate known result to simplify edge case checks.
2455 bool ResultNotNan = (DemandedMask & fcNan) == fcNone;
2456
2457 // With nnan: X + {+/-}Inf --> {+/-}Inf
2458 if (ResultNotNan && I->getOpcode() == Instruction::FAdd &&
2459 KnownRHS.isKnownAlways(fcInf | fcNan) && KnownLHS.isKnownNever(fcNan))
2460 return I->getOperand(1);
2461
2462 // With nnan: {+/-}Inf + X --> {+/-}Inf
2463 // With nnan: {+/-}Inf - X --> {+/-}Inf
2464 if (ResultNotNan && KnownLHS.isKnownAlways(fcInf | fcNan) &&
2465 KnownRHS.isKnownNever(fcNan))
2466 return I->getOperand(0);
2467
2469 FMF, Known.KnownFPClasses, {KnownLHS, KnownRHS});
2470 if (InferredFMF != FMF) {
2471 I->setFastMathFlags(InferredFMF);
2472 return I;
2473 }
2474
2475 return nullptr;
2476 }
2477 case Instruction::FMul: {
2478 KnownFPClass KnownLHS, KnownRHS;
2479
2480 Value *X = I->getOperand(0);
2481 Value *Y = I->getOperand(1);
2482
2483 FPClassTest SrcDemandedMask =
2484 DemandedMask & (fcNan | fcZero | fcSubnormal | fcNormal);
2485
2486 if (DemandedMask & fcInf) {
2487 // mul x, inf = inf
2488 // mul large_x, large_y = inf
2489 SrcDemandedMask |= fcSubnormal | fcNormal | fcInf;
2490 }
2491
2492 if (DemandedMask & fcNan) {
2493 // mul +/-inf, 0 => nan
2494 SrcDemandedMask |= fcZero | fcInf | fcNan;
2495
2496 // TODO: Mode check
2497 // mul +/-inf, sub => nan if daz
2498 SrcDemandedMask |= fcSubnormal;
2499 }
2500
2501 // mul normal, subnormal = normal
2502 // Normal inputs may result in underflow.
2503 if (DemandedMask & (fcNormal | fcSubnormal))
2504 SrcDemandedMask |= fcNormal | fcSubnormal;
2505
2506 if (DemandedMask & fcZero)
2507 SrcDemandedMask |= fcNormal | fcSubnormal;
2508
2509 if (X == Y && isGuaranteedNotToBeUndef(X, SQ.AC, CxtI, SQ.DT, Depth + 1)) {
2510 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2511 return I;
2512 Type *EltTy = VTy->getScalarType();
2513
2514 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2515 Known = KnownFPClass::square(KnownLHS, Mode);
2516 Known.knownNot(~DemandedMask);
2517
2518 if (Constant *Folded = getFPClassConstant(VTy, Known.KnownFPClasses,
2519 /*IsCanonicalizing=*/true))
2520 return Folded;
2521
2522 if (Known.isKnownAlways(fcPosZero | fcPosInf | fcNan) &&
2523 KnownLHS.isKnownNever(fcSubnormal | fcNormal)) {
2524 // We can skip the fabs if the source was already known positive.
2525 if (KnownLHS.isKnownAlways(fcPositive))
2526 return X;
2527
2528 // => fabs(x), in case this was a -inf or -0.
2529 // Note: Dropping canonicalize.
2531 Builder.SetInsertPoint(I);
2532 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FMF);
2533 Fabs->takeName(I);
2534 return Fabs;
2535 }
2536
2537 return nullptr;
2538 }
2539
2540 if (SimplifyDemandedFPClass(I, 1, SrcDemandedMask, KnownRHS, Depth + 1) ||
2541 SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2542 return I;
2543
2544 if (FMF.noInfs()) {
2545 // Flag implies inputs cannot be infinity.
2546 KnownLHS.knownNot(fcInf);
2547 KnownRHS.knownNot(fcInf);
2548 }
2549
2550 bool NonNanResult = (DemandedMask & fcNan) == fcNone;
2551
2552 // With no-nans/no-infs:
2553 // X * 0.0 --> copysign(0.0, X)
2554 // X * -0.0 --> copysign(0.0, -X)
2555 if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) &&
2556 KnownRHS.isKnownAlways(fcPosZero | fcNan)) {
2558 Builder.SetInsertPoint(I);
2559
2560 // => copysign(+0, lhs)
2561 // Note: Dropping canonicalize
2562 Value *Copysign = Builder.CreateCopySign(Y, X, FMF);
2563 Copysign->takeName(I);
2564 return Copysign;
2565 }
2566
2567 if (KnownLHS.isKnownAlways(fcPosZero | fcNan) &&
2568 (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) {
2570 Builder.SetInsertPoint(I);
2571
2572 // => copysign(+0, rhs)
2573 // Note: Dropping canonicalize
2574 Value *Copysign = Builder.CreateCopySign(X, Y, FMF);
2575 Copysign->takeName(I);
2576 return Copysign;
2577 }
2578
2579 if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) &&
2580 KnownRHS.isKnownAlways(fcNegZero | fcNan)) {
2582 Builder.SetInsertPoint(I);
2583
2584 // => copysign(0, fneg(lhs))
2585 // Note: Dropping canonicalize
2586 Value *Copysign =
2587 Builder.CreateCopySign(Y, Builder.CreateFNegFMF(X, FMF), FMF);
2588 Copysign->takeName(I);
2589 return Copysign;
2590 }
2591
2592 if (KnownLHS.isKnownAlways(fcNegZero | fcNan) &&
2593 (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) {
2595 Builder.SetInsertPoint(I);
2596
2597 // => copysign(+0, fneg(rhs))
2598 // Note: Dropping canonicalize
2599 Value *Copysign =
2600 Builder.CreateCopySign(X, Builder.CreateFNegFMF(Y, FMF), FMF);
2601 Copysign->takeName(I);
2602 return Copysign;
2603 }
2604
2605 Type *EltTy = VTy->getScalarType();
2606 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2607
2608 if (KnownLHS.isKnownAlways(fcInf | fcNan) &&
2609 (KnownRHS.isKnownNeverNaN() &&
2610 KnownRHS.cannotBeOrderedGreaterEqZero(Mode))) {
2612 Builder.SetInsertPoint(I);
2613
2614 // Note: Dropping canonicalize
2615 Value *Neg = Builder.CreateFNegFMF(X, FMF);
2616 Neg->takeName(I);
2617 return Neg;
2618 }
2619
2620 if (KnownRHS.isKnownAlways(fcInf | fcNan) &&
2621 (KnownLHS.isKnownNeverNaN() &&
2622 KnownLHS.cannotBeOrderedGreaterEqZero(Mode))) {
2624 Builder.SetInsertPoint(I);
2625
2626 // Note: Dropping canonicalize
2627 Value *Neg = Builder.CreateFNegFMF(Y, FMF);
2628 Neg->takeName(I);
2629 return Neg;
2630 }
2631
2632 Known = KnownFPClass::fmul(KnownLHS, KnownRHS, Mode);
2633 Known.knownNot(~DemandedMask);
2634
2635 if (Constant *SingleVal = getFPClassConstant(VTy, Known.KnownFPClasses,
2636 /*IsCanonicalizing=*/true))
2637 return SingleVal;
2638
2640 FMF, Known.KnownFPClasses, {KnownLHS, KnownRHS});
2641 if (InferredFMF != FMF) {
2642 I->setFastMathFlags(InferredFMF);
2643 return I;
2644 }
2645
2646 return nullptr;
2647 }
2648 case Instruction::FDiv: {
2649 Value *X = I->getOperand(0);
2650 Value *Y = I->getOperand(1);
2651 if (X == Y && isGuaranteedNotToBeUndef(X, SQ.AC, CxtI, SQ.DT, Depth + 1)) {
2652 // If the source is 0, inf or nan, the result is a nan
2654 Builder.SetInsertPoint(I);
2655
2656 Value *IsZeroOrNan = Builder.CreateFCmpFMF(
2657 FCmpInst::FCMP_UEQ, I->getOperand(0), ConstantFP::getZero(VTy), FMF);
2658
2659 Value *Fabs =
2660 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, I->getOperand(0), FMF);
2661 Value *IsInfOrNan = Builder.CreateFCmpFMF(
2663
2664 Value *IsInfOrZeroOrNan = Builder.CreateOr(IsInfOrNan, IsZeroOrNan);
2665
2666 return Builder.CreateSelectFMFWithUnknownProfile(
2667 IsInfOrZeroOrNan, ConstantFP::getQNaN(VTy),
2668 ConstantFP::get(
2670 FMF, DEBUG_TYPE);
2671 }
2672
2673 Type *EltTy = VTy->getScalarType();
2674 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2675
2676 // Every output class could require denormal inputs (except for the
2677 // degenerate case of only-nan results, without DAZ).
2678 FPClassTest SrcDemandedMask = (DemandedMask & fcNan) | fcSubnormal;
2679
2680 // Normal inputs may result in underflow.
2681 // x / x = 1.0 for non0/inf/nan
2682 // -x = +y / -z
2683 // -x = -y / +z
2684 if (DemandedMask & (fcSubnormal | fcNormal))
2685 SrcDemandedMask |= fcNormal;
2686
2687 if (DemandedMask & fcNan) {
2688 // 0 / 0 = nan
2689 // inf / inf = nan
2690
2691 // Subnormal is added in case of DAZ, but this isn't strictly
2692 // necessary. Every other input class implies a possible subnormal source,
2693 // so this only could matter in the degenerate case of only-nan results.
2694 SrcDemandedMask |= fcZero | fcInf | fcNan;
2695 }
2696
2697 // Zero outputs may be the result of underflow.
2698 if (DemandedMask & fcZero)
2699 SrcDemandedMask |= fcNormal | fcSubnormal;
2700
2701 FPClassTest LHSDemandedMask = SrcDemandedMask;
2702 FPClassTest RHSDemandedMask = SrcDemandedMask;
2703
2704 // 0 / inf = 0
2705 if (DemandedMask & fcZero) {
2706 assert((LHSDemandedMask & fcSubnormal) &&
2707 "should not have to worry about daz here");
2708 LHSDemandedMask |= fcZero;
2709 RHSDemandedMask |= fcInf;
2710 }
2711
2712 // x / 0 = inf
2713 // large_normal / small_normal = inf
2714 // inf / 1 = inf
2715 // large_normal / subnormal = inf
2716 if (DemandedMask & fcInf) {
2717 LHSDemandedMask |= fcInf | fcNormal | fcSubnormal;
2718 RHSDemandedMask |= fcZero | fcSubnormal | fcNormal;
2719 }
2720
2721 KnownFPClass KnownLHS, KnownRHS;
2722 if (SimplifyDemandedFPClass(I, 0, LHSDemandedMask, KnownLHS, Depth + 1) ||
2723 SimplifyDemandedFPClass(I, 1, RHSDemandedMask, KnownRHS, Depth + 1))
2724 return I;
2725
2726 // nsz [+-]0 / x -> 0
2727 if (FMF.noSignedZeros() && KnownLHS.isKnownAlways(fcZero) &&
2728 KnownRHS.isKnownNeverNaN())
2729 return ConstantFP::getZero(VTy);
2730
2731 if (KnownLHS.isKnownAlways(fcPosZero) && KnownRHS.isKnownNeverNaN()) {
2733 Builder.SetInsertPoint(I);
2734
2735 // nnan +0 / x -> copysign(0, rhs)
2736 // TODO: -0 / x => copysign(0, fneg(rhs))
2737 Value *Copysign = Builder.CreateCopySign(X, Y, FMF);
2738 Copysign->takeName(I);
2739 return Copysign;
2740 }
2741
2742 bool ResultNotNan = (DemandedMask & fcNan) == fcNone;
2743 bool ResultNotInf = (DemandedMask & fcInf) == fcNone;
2744
2745 if (!ResultNotInf &&
2746 ((ResultNotNan || (KnownLHS.isKnownNeverNaN() &&
2747 KnownLHS.isKnownNeverLogicalZero(Mode))) &&
2748 (KnownRHS.isKnownAlways(fcPosZero) ||
2749 (FMF.noSignedZeros() && KnownRHS.isKnownAlways(fcZero))))) {
2751 Builder.SetInsertPoint(I);
2752
2753 // nnan x / 0 => copysign(inf, x);
2754 // nnan nsz x / -0 => copysign(inf, x);
2755 Value *Copysign =
2756 Builder.CreateCopySign(ConstantFP::getInfinity(VTy), X, FMF);
2757 Copysign->takeName(I);
2758 return Copysign;
2759 }
2760
2761 // nnan ninf X / [-]0.0 -> poison
2762 if (ResultNotNan && ResultNotInf && KnownRHS.isKnownAlways(fcZero))
2763 return PoisonValue::get(VTy);
2764
2765 Known = KnownFPClass::fdiv(KnownLHS, KnownRHS, Mode);
2766 Known.knownNot(~DemandedMask);
2767
2768 if (Constant *SingleVal = getFPClassConstant(VTy, Known.KnownFPClasses,
2769 /*IsCanonicalizing=*/true))
2770 return SingleVal;
2771
2773 FMF, Known.KnownFPClasses, {KnownLHS, KnownRHS});
2774 if (InferredFMF != FMF) {
2775 I->setFastMathFlags(InferredFMF);
2776 return I;
2777 }
2778
2779 return nullptr;
2780 }
2781 case Instruction::FPTrunc:
2782 return simplifyDemandedUseFPClassFPTrunc(*this, *I, FMF, DemandedMask,
2783 Known, Depth);
2784 case Instruction::FPExt: {
2785 FPClassTest SrcDemandedMask = DemandedMask;
2786 if (DemandedMask & fcNan)
2787 SrcDemandedMask |= fcNan;
2788
2789 // No subnormal result does not imply not-subnormal in the source type.
2790 if ((DemandedMask & fcNegNormal) != fcNone)
2791 SrcDemandedMask |= fcNegSubnormal;
2792 if ((DemandedMask & fcPosNormal) != fcNone)
2793 SrcDemandedMask |= fcPosSubnormal;
2794
2795 KnownFPClass KnownSrc;
2796 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
2797 return I;
2798
2799 const fltSemantics &DstTy = VTy->getScalarType()->getFltSemantics();
2800 const fltSemantics &SrcTy =
2801 I->getOperand(0)->getType()->getScalarType()->getFltSemantics();
2802
2803 Known = KnownFPClass::fpext(KnownSrc, DstTy, SrcTy);
2804 Known.knownNot(~DemandedMask);
2805
2806 return simplifyDemandedFPClassResult(I, FMF, DemandedMask, Known,
2807 {KnownSrc});
2808 }
2809 case Instruction::Call: {
2810 CallInst *CI = cast<CallInst>(I);
2811 const Intrinsic::ID IID = CI->getIntrinsicID();
2812 switch (IID) {
2813 case Intrinsic::fabs: {
2814 KnownFPClass KnownSrc;
2815 if (SimplifyDemandedFPClass(I, 0, llvm::inverse_fabs(DemandedMask),
2816 KnownSrc, Depth + 1))
2817 return I;
2818
2819 if (Value *Simplified = simplifyDemandedFPClassFabs(
2820 Known, CI->getArgOperand(0), DemandedMask, KnownSrc,
2821 FMF.noSignedZeros()))
2822 return Simplified;
2823 break;
2824 }
2825 case Intrinsic::arithmetic_fence:
2826 if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))
2827 return I;
2828 break;
2829 case Intrinsic::copysign: {
2830 // Flip on more potentially demanded classes
2831 const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(DemandedMask);
2832 KnownFPClass KnownMag;
2833 if (SimplifyDemandedFPClass(CI, 0, DemandedMaskAnySign, KnownMag,
2834 Depth + 1))
2835 return I;
2836
2837 if ((DemandedMask & fcNegative) == DemandedMask) {
2838 // Roundabout way of replacing with fneg(fabs)
2839 CI->setOperand(1, ConstantFP::get(VTy, -1.0));
2840 return I;
2841 }
2842
2843 if ((DemandedMask & fcPositive) == DemandedMask) {
2844 // Roundabout way of replacing with fabs
2845 CI->setOperand(1, ConstantFP::getZero(VTy));
2846 return I;
2847 }
2848
2849 if (Value *Simplified = simplifyDemandedFPClassCopysignMag(
2850 CI->getArgOperand(0), DemandedMask, KnownMag,
2851 FMF.noSignedZeros()))
2852 return Simplified;
2853
2855 fcAllFlags, CxtI, Depth + 1);
2856 if (KnownMag.SignBit && KnownSign.SignBit &&
2857 *KnownMag.SignBit == *KnownSign.SignBit)
2858 return CI->getOperand(0);
2859
2860 // TODO: Call argument attribute not considered
2861 // Input implied not-nan from flag.
2862 if (FMF.noNaNs())
2863 KnownSign.knownNot(fcNan);
2864
2865 if (KnownSign.SignBit == false) {
2867 CI->setOperand(1, ConstantFP::getZero(VTy));
2868 return I;
2869 }
2870
2871 if (KnownSign.SignBit == true) {
2873 CI->setOperand(1, ConstantFP::get(VTy, -1.0));
2874 return I;
2875 }
2876
2877 Known = KnownFPClass::copysign(KnownMag, KnownSign);
2878 Known.knownNot(~DemandedMask);
2879 break;
2880 }
2881 case Intrinsic::fma:
2882 case Intrinsic::fmuladd: {
2883 // We can't do any simplification on the source besides stripping out
2884 // unneeded nans.
2885 FPClassTest SrcDemandedMask = DemandedMask | ~fcNan;
2886 if (DemandedMask & fcNan)
2887 SrcDemandedMask |= fcNan;
2888
2889 KnownFPClass KnownSrc[3];
2890
2891 Type *EltTy = VTy->getScalarType();
2892 if (CI->getArgOperand(0) == CI->getArgOperand(1) &&
2893 isGuaranteedNotToBeUndef(CI->getArgOperand(0), SQ.AC, CxtI, SQ.DT,
2894 Depth + 1)) {
2895 if (SimplifyDemandedFPClass(CI, 0, SrcDemandedMask, KnownSrc[0],
2896 Depth + 1) ||
2897 SimplifyDemandedFPClass(CI, 2, SrcDemandedMask, KnownSrc[2],
2898 Depth + 1))
2899 return I;
2900
2901 KnownSrc[1] = KnownSrc[0];
2902 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2903 Known = KnownFPClass::fma_square(KnownSrc[0], KnownSrc[2], Mode);
2904 } else {
2905 for (int OpIdx = 0; OpIdx != 3; ++OpIdx) {
2906 if (SimplifyDemandedFPClass(CI, OpIdx, SrcDemandedMask,
2907 KnownSrc[OpIdx], Depth + 1))
2908 return CI;
2909 }
2910
2911 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2912 Known = KnownFPClass::fma(KnownSrc[0], KnownSrc[1], KnownSrc[2], Mode);
2913 }
2914
2915 return simplifyDemandedFPClassResult(CI, FMF, DemandedMask, Known,
2916 {KnownSrc});
2917 }
2918 case Intrinsic::maximum:
2919 case Intrinsic::minimum:
2920 case Intrinsic::maximumnum:
2921 case Intrinsic::minimumnum:
2922 case Intrinsic::maxnum:
2923 case Intrinsic::minnum: {
2924 const bool PropagateNaN =
2925 IID == Intrinsic::maximum || IID == Intrinsic::minimum;
2926
2927 // We can't tell much based on the demanded result without inspecting the
2928 // operands (e.g., a known-positive result could have been clamped), but
2929 // we can still prune known-nan inputs.
2930 FPClassTest SrcDemandedMask =
2931 PropagateNaN && ((DemandedMask & fcNan) == fcNone)
2932 ? DemandedMask | ~fcNan
2933 : fcAllFlags;
2934
2935 KnownFPClass KnownLHS, KnownRHS;
2936 if (SimplifyDemandedFPClass(CI, 1, SrcDemandedMask, KnownRHS,
2937 Depth + 1) ||
2938 SimplifyDemandedFPClass(CI, 0, SrcDemandedMask, KnownLHS, Depth + 1))
2939 return I;
2940
2941 Value *Simplified =
2942 simplifyDemandedFPClassMinMax(Known, IID, CI, DemandedMask, KnownLHS,
2943 KnownRHS, F, FMF.noSignedZeros());
2944 if (Simplified)
2945 return Simplified;
2946
2947 auto *FPOp = cast<FPMathOperator>(CI);
2948
2949 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2950 FastMathFlags InferredFMF = FMF;
2951
2952 if (!FMF.noSignedZeros()) {
2953 // Add NSZ flag if we know the result will not be sensitive to the sign
2954 // of 0.
2955 FPClassTest ZeroMask = fcZero;
2956
2957 Type *EltTy = VTy->getScalarType();
2958 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
2959 if (Mode != DenormalMode::getIEEE())
2960 ZeroMask |= fcSubnormal;
2961
2962 bool ResultNotLogical0 = (ValidResults & ZeroMask) == fcNone;
2963 if (ResultNotLogical0 || ((KnownLHS.isKnownNeverLogicalNegZero(Mode) ||
2964 KnownRHS.isKnownNeverLogicalPosZero(Mode)) &&
2965 (KnownLHS.isKnownNeverLogicalPosZero(Mode) ||
2966 KnownRHS.isKnownNeverLogicalNegZero(Mode))))
2967 InferredFMF.setNoSignedZeros(true);
2968 }
2969
2970 if (!FMF.noNaNs() &&
2971 ((PropagateNaN && (ValidResults & fcNan) == fcNone) ||
2972 (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN()))) {
2974 InferredFMF.setNoNaNs(true);
2975 }
2976
2977 if (InferredFMF != FMF) {
2978 CI->setFastMathFlags(InferredFMF);
2979 return FPOp;
2980 }
2981
2982 return nullptr;
2983 }
2984 case Intrinsic::exp:
2985 case Intrinsic::exp2:
2986 case Intrinsic::exp10: {
2987 if ((DemandedMask & fcPositive) == fcNone) {
2988 // Only returns positive values or nans.
2989 if ((DemandedMask & fcNan) == fcNone)
2990 return PoisonValue::get(VTy);
2991
2992 // Only need nan propagation.
2993 if ((DemandedMask & ~fcNan) == fcNone)
2994 return ConstantFP::getQNaN(VTy);
2995
2996 return CI->getArgOperand(0);
2997 }
2998
2999 FPClassTest SrcDemandedMask = DemandedMask & fcNan;
3000 if (DemandedMask & fcNan)
3001 SrcDemandedMask |= fcNan;
3002
3003 if (DemandedMask & fcZero) {
3004 // exp(-infinity) = 0
3005 SrcDemandedMask |= fcNegInf;
3006
3007 // exp(-largest_normal) = 0
3008 //
3009 // Negative numbers of sufficiently large magnitude underflow to 0. No
3010 // subnormal input has a 0 result.
3011 SrcDemandedMask |= fcNegNormal;
3012 }
3013
3014 if (DemandedMask & fcPosSubnormal) {
3015 // Negative numbers of sufficiently large magnitude underflow to 0. No
3016 // subnormal input has a 0 result.
3017 SrcDemandedMask |= fcNegNormal;
3018 }
3019
3020 if (DemandedMask & fcPosNormal) {
3021 // exp(0) = 1
3022 // exp(+/- smallest_normal) = 1
3023 // exp(+/- largest_denormal) = 1
3024 // exp(+/- smallest_denormal) = 1
3025 // exp(-1) = pos normal
3026 SrcDemandedMask |= fcNormal | fcSubnormal | fcZero;
3027 }
3028
3029 // exp(inf), exp(largest_normal) = inf
3030 if (DemandedMask & fcPosInf)
3031 SrcDemandedMask |= fcPosInf | fcPosNormal;
3032
3033 KnownFPClass KnownSrc;
3034
3035 // TODO: This could really make use of KnownFPClass of specific value
3036 // range, (i.e., close enough to 1)
3037 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
3038 return I;
3039
3040 // exp(+/-0) = 1
3041 if (KnownSrc.isKnownAlways(fcZero))
3042 return ConstantFP::get(VTy, 1.0);
3043
3044 // Only perform nan propagation.
3045 // Note: Dropping canonicalize / quiet of signaling nan.
3046 if (KnownSrc.isKnownAlways(fcNan))
3047 return CI->getArgOperand(0);
3048
3049 // exp(0 | nan) => x == 0.0 ? 1.0 : x
3050 if (KnownSrc.isKnownAlways(fcZero | fcNan)) {
3052 Builder.SetInsertPoint(CI);
3053
3054 // fadd +/-0, 1.0 => 1.0
3055 // fadd nan, 1.0 => nan
3056 return Builder.CreateFAddFMF(CI->getArgOperand(0),
3057 ConstantFP::get(VTy, 1.0), FMF);
3058 }
3059
3060 if (KnownSrc.isKnownAlways(fcInf | fcNan)) {
3061 // exp(-inf) = 0
3062 // exp(+inf) = +inf
3064 Builder.SetInsertPoint(CI);
3065
3066 // Note: Dropping canonicalize / quiet of signaling nan.
3067 Value *X = CI->getArgOperand(0);
3068 Value *IsPosInfOrNan = Builder.CreateFCmpFMF(
3070 // We do not know whether an infinity or a NaN is more likely here,
3071 // so mark the branch weights as unkown.
3072 Value *ZeroOrInf = Builder.CreateSelectFMFWithUnknownProfile(
3073 IsPosInfOrNan, X, ConstantFP::getZero(VTy), FMF, DEBUG_TYPE);
3074 return ZeroOrInf;
3075 }
3076
3077 Known = KnownFPClass::exp(KnownSrc);
3078 Known.knownNot(~DemandedMask);
3079
3080 return simplifyDemandedFPClassResult(CI, FMF, DemandedMask, Known,
3081 KnownSrc);
3082 }
3083 case Intrinsic::log:
3084 case Intrinsic::log2:
3085 case Intrinsic::log10: {
3086 FPClassTest DemandedSrcMask = DemandedMask & (fcNan | fcPosInf);
3087 if (DemandedMask & fcNan)
3088 DemandedSrcMask |= fcNan;
3089
3090 Type *EltTy = VTy->getScalarType();
3091 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
3092
3093 // log(x < 0) = nan
3094 if (DemandedMask & fcNan)
3095 DemandedSrcMask |= (fcNegative & ~fcNegZero);
3096
3097 // log(0) = -inf
3098 if (DemandedMask & fcNegInf) {
3099 DemandedSrcMask |= fcZero;
3100
3101 // No value produces subnormal result.
3102 if (Mode.inputsMayBeZero())
3103 DemandedSrcMask |= fcSubnormal;
3104 }
3105
3106 if (DemandedMask & fcNormal)
3107 DemandedSrcMask |= fcNormal | fcSubnormal;
3108
3109 // log(1) = 0
3110 if (DemandedMask & fcZero)
3111 DemandedSrcMask |= fcPosNormal;
3112
3113 KnownFPClass KnownSrc;
3114 if (SimplifyDemandedFPClass(I, 0, DemandedSrcMask, KnownSrc, Depth + 1))
3115 return I;
3116
3117 Known = KnownFPClass::log(KnownSrc, Mode);
3118 Known.knownNot(~DemandedMask);
3119
3120 return simplifyDemandedFPClassResult(CI, FMF, DemandedMask, Known,
3121 KnownSrc);
3122 }
3123 case Intrinsic::sqrt: {
3124 FPClassTest DemandedSrcMask =
3125 DemandedMask & (fcNegZero | fcPositive | fcNan);
3126
3127 if (DemandedMask & fcNan)
3128 DemandedSrcMask |= fcNan | (fcNegative & ~fcNegZero);
3129
3130 // sqrt(max_subnormal) is a normal value
3131 if (DemandedMask & fcPosNormal)
3132 DemandedSrcMask |= fcPosSubnormal;
3133
3134 KnownFPClass KnownSrc;
3135 if (SimplifyDemandedFPClass(I, 0, DemandedSrcMask, KnownSrc, Depth + 1))
3136 return I;
3137
3138 // Infer the source cannot be negative if the result cannot be nan.
3139 if ((DemandedMask & fcNan) == fcNone)
3140 KnownSrc.knownNot((fcNegative & ~fcNegZero) | fcNan);
3141
3142 // Infer the source cannot be +inf if the result is not +nf
3143 if ((DemandedMask & fcPosInf) == fcNone)
3144 KnownSrc.knownNot(fcPosInf);
3145
3146 Type *EltTy = VTy->getScalarType();
3147 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
3148
3149 // sqrt(-x) = nan, but be careful of negative subnormals flushed to 0.
3150 if (KnownSrc.isKnownNever(fcPositive) &&
3151 KnownSrc.isKnownNeverLogicalZero(Mode))
3152 return ConstantFP::getQNaN(VTy);
3153
3154 Known = KnownFPClass::sqrt(KnownSrc, Mode);
3155 Known.knownNot(~DemandedMask);
3156
3157 if (Known.KnownFPClasses == fcZero) {
3158 if (FMF.noSignedZeros())
3159 return ConstantFP::getZero(VTy);
3161 Builder.SetInsertPoint(CI);
3162
3163 Value *Copysign = Builder.CreateCopySign(ConstantFP::getZero(VTy),
3164 CI->getArgOperand(0), FMF);
3165 Copysign->takeName(CI);
3166 return Copysign;
3167 }
3168
3169 return simplifyDemandedFPClassResult(CI, FMF, DemandedMask, Known,
3170 {KnownSrc});
3171 }
3172 case Intrinsic::ldexp: {
3173 FPClassTest SrcDemandedMask = DemandedMask & fcInf;
3174 if (DemandedMask & fcNan)
3175 SrcDemandedMask |= fcNan;
3176
3177 if (DemandedMask & fcPosInf)
3178 SrcDemandedMask |= fcPosNormal | fcPosSubnormal;
3179 if (DemandedMask & fcNegInf)
3180 SrcDemandedMask |= fcNegNormal | fcNegSubnormal;
3181
3182 if (DemandedMask & (fcPosNormal | fcPosSubnormal))
3183 SrcDemandedMask |= fcPosNormal | fcPosSubnormal;
3184 if (DemandedMask & (fcNegNormal | fcNegSubnormal))
3185 SrcDemandedMask |= fcNegNormal | fcNegSubnormal;
3186
3187 if (DemandedMask & fcPosZero)
3188 SrcDemandedMask |= fcPosFinite;
3189 if (DemandedMask & fcNegZero)
3190 SrcDemandedMask |= fcNegFinite;
3191
3192 KnownFPClass KnownSrc;
3193 if (SimplifyDemandedFPClass(CI, 0, SrcDemandedMask, KnownSrc, Depth + 1))
3194 return CI;
3195
3196 Type *EltTy = VTy->getScalarType();
3197 const fltSemantics &FltSem = EltTy->getFltSemantics();
3198 DenormalMode Mode = F.getDenormalMode(FltSem);
3199
3200 KnownBits KnownExpBits =
3202
3203 Known = KnownFPClass::ldexp(KnownSrc, KnownExpBits, FltSem, Mode);
3204 Known.knownNot(~DemandedMask);
3205
3206 return simplifyDemandedFPClassResult(CI, FMF, DemandedMask, Known,
3207 {KnownSrc});
3208 }
3209 case Intrinsic::trunc:
3210 case Intrinsic::floor:
3211 case Intrinsic::ceil:
3212 case Intrinsic::rint:
3213 case Intrinsic::nearbyint:
3214 case Intrinsic::round:
3215 case Intrinsic::roundeven: {
3216 FPClassTest DemandedSrcMask = DemandedMask;
3217 if (DemandedMask & fcNan)
3218 DemandedSrcMask |= fcNan;
3219
3220 // Zero results imply valid subnormal sources.
3221 if (DemandedMask & fcNegZero)
3222 DemandedSrcMask |= fcNegSubnormal | fcNegNormal;
3223
3224 if (DemandedMask & fcPosZero)
3225 DemandedSrcMask |= fcPosSubnormal | fcPosNormal;
3226
3227 KnownFPClass KnownSrc;
3228 if (SimplifyDemandedFPClass(CI, 0, DemandedSrcMask, KnownSrc, Depth + 1))
3229 return I;
3230
3231 // Note: Possibly dropping snan quiet.
3232 if (KnownSrc.isKnownAlways(fcInf | fcNan | fcZero))
3233 return CI->getArgOperand(0);
3234
3235 bool IsRoundNearestOrTrunc =
3236 IID == Intrinsic::round || IID == Intrinsic::roundeven ||
3237 IID == Intrinsic::nearbyint || IID == Intrinsic::rint ||
3238 IID == Intrinsic::trunc;
3239
3240 // Ignore denormals-as-zero, as canonicalization is not mandated.
3241 if ((IID == Intrinsic::floor || IsRoundNearestOrTrunc) &&
3243 return ConstantFP::getZero(VTy);
3244
3245 if ((IID == Intrinsic::ceil || IsRoundNearestOrTrunc) &&
3247 return ConstantFP::getZero(VTy, true);
3248
3249 if (IID == Intrinsic::floor && KnownSrc.isKnownAlways(fcNegSubnormal))
3250 return ConstantFP::get(VTy, -1.0);
3251
3252 if (IID == Intrinsic::ceil && KnownSrc.isKnownAlways(fcPosSubnormal))
3253 return ConstantFP::get(VTy, 1.0);
3254
3256 KnownSrc, IID == Intrinsic::trunc,
3258
3259 Known.knownNot(~DemandedMask);
3260
3261 if (Constant *SingleVal = getFPClassConstant(VTy, Known.KnownFPClasses,
3262 /*IsCanonicalizing=*/true))
3263 return SingleVal;
3264
3265 if ((IID == Intrinsic::trunc || IsRoundNearestOrTrunc) &&
3266 KnownSrc.isKnownAlways(fcZero | fcSubnormal)) {
3268 Builder.SetInsertPoint(CI);
3269
3270 Value *Copysign = Builder.CreateCopySign(ConstantFP::getZero(VTy),
3271 CI->getArgOperand(0));
3272 Copysign->takeName(CI);
3273 return Copysign;
3274 }
3275
3276 FastMathFlags InferredFMF =
3277 inferFastMathValueFlags(FMF, Known.KnownFPClasses, KnownSrc);
3278 if (InferredFMF != FMF) {
3280 CI->setFastMathFlags(InferredFMF);
3281 return CI;
3282 }
3283
3284 return nullptr;
3285 }
3286 case Intrinsic::fptrunc_round:
3287 return simplifyDemandedUseFPClassFPTrunc(*this, *CI, FMF, DemandedMask,
3288 Known, Depth);
3289 case Intrinsic::canonicalize: {
3290 Type *EltTy = VTy->getScalarType();
3291
3292 // TODO: This could have more refined support for PositiveZero denormal
3293 // mode.
3294 if (EltTy->isIEEELikeFPTy()) {
3295 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
3296
3297 FPClassTest SrcDemandedMask = DemandedMask;
3298
3299 // A demanded quiet nan result may have come from a signaling nan, so we
3300 // need to expand the demanded mask.
3301 if ((DemandedMask & fcQNan) != fcNone)
3302 SrcDemandedMask |= fcSNan;
3303
3304 if (Mode != DenormalMode::getIEEE()) {
3305 // Any zero results may have come from flushed denormals.
3306 if (DemandedMask & fcPosZero)
3307 SrcDemandedMask |= fcPosSubnormal;
3308 if (DemandedMask & fcNegZero)
3309 SrcDemandedMask |= fcNegSubnormal;
3310 }
3311
3312 if (Mode == DenormalMode::getPreserveSign()) {
3313 // If a denormal input will be flushed, and we don't need zeros, we
3314 // don't need denormals either.
3315 if ((DemandedMask & fcPosZero) == fcNone)
3316 SrcDemandedMask &= ~fcPosSubnormal;
3317
3318 if ((DemandedMask & fcNegZero) == fcNone)
3319 SrcDemandedMask &= ~fcNegSubnormal;
3320 }
3321
3322 KnownFPClass KnownSrc;
3323
3324 // Simplify upstream operations before trying to simplify this call.
3325 if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
3326 return I;
3327
3328 // Perform the canonicalization to see if this folded to a constant.
3329 Known = KnownFPClass::canonicalize(KnownSrc, Mode);
3330 Known.knownNot(~DemandedMask);
3331
3332 if (Constant *SingleVal = getFPClassConstant(VTy, Known.KnownFPClasses))
3333 return SingleVal;
3334
3335 // For IEEE handling, there is only a bit change for nan inputs, so we
3336 // can drop it if we do not demand nan results or we know the input
3337 // isn't a nan.
3338 // Otherwise, we also need to avoid denormal inputs to drop the
3339 // canonicalize.
3340 if (KnownSrc.isKnownNeverNaN() && (Mode == DenormalMode::getIEEE() ||
3341 KnownSrc.isKnownNeverSubnormal()))
3342 return CI->getArgOperand(0);
3343
3344 FastMathFlags InferredFMF =
3345 inferFastMathValueFlags(FMF, Known.KnownFPClasses, KnownSrc);
3346 if (InferredFMF != FMF) {
3348 CI->setFastMathFlags(InferredFMF);
3349 return CI;
3350 }
3351
3352 return nullptr;
3353 }
3354
3355 [[fallthrough]];
3356 }
3357 default:
3358 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
3359 Known.knownNot(~DemandedMask);
3360 break;
3361 }
3362
3363 break;
3364 }
3365 case Instruction::Select: {
3366 KnownFPClass KnownLHS, KnownRHS;
3367 if (SimplifyDemandedFPClass(I, 2, DemandedMask, KnownRHS, Depth + 1) ||
3368 SimplifyDemandedFPClass(I, 1, DemandedMask, KnownLHS, Depth + 1))
3369 return I;
3370
3371 if (KnownLHS.isKnownNever(DemandedMask))
3372 return I->getOperand(2);
3373 if (KnownRHS.isKnownNever(DemandedMask))
3374 return I->getOperand(1);
3375
3376 adjustKnownFPClassForSelectArm(KnownLHS, I->getOperand(0), I->getOperand(1),
3377 /*Invert=*/false, SQ, Depth);
3378 adjustKnownFPClassForSelectArm(KnownRHS, I->getOperand(0), I->getOperand(2),
3379 /*Invert=*/true, SQ, Depth);
3380 Known = KnownLHS.intersectWith(KnownRHS);
3381 Known.knownNot(~DemandedMask);
3382 break;
3383 }
3384 case Instruction::ExtractElement: {
3385 // TODO: Handle demanded element mask
3386 if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))
3387 return I;
3388 Known.knownNot(~DemandedMask);
3389 break;
3390 }
3391 case Instruction::InsertElement: {
3392 KnownFPClass KnownInserted, KnownVec;
3393 if (SimplifyDemandedFPClass(I, 1, DemandedMask, KnownInserted, Depth + 1) ||
3394 SimplifyDemandedFPClass(I, 0, DemandedMask, KnownVec, Depth + 1))
3395 return I;
3396
3397 // TODO: Use demanded elements logic from computeKnownFPClass
3398 Known = KnownVec | KnownInserted;
3399 Known.knownNot(~DemandedMask);
3400 break;
3401 }
3402 case Instruction::ShuffleVector: {
3403 KnownFPClass KnownLHS, KnownRHS;
3404 if (SimplifyDemandedFPClass(I, 1, DemandedMask, KnownRHS, Depth + 1) ||
3405 SimplifyDemandedFPClass(I, 0, DemandedMask, KnownLHS, Depth + 1))
3406 return I;
3407
3408 // TODO: This is overly conservative and should consider demanded elements,
3409 // and splats.
3410 Known = KnownLHS | KnownRHS;
3411 Known.knownNot(~DemandedMask);
3412 break;
3413 }
3414 case Instruction::ExtractValue: {
3415 Value *ExtractSrc;
3416 if (match(I, m_ExtractValue<0>(m_OneUse(m_Value(ExtractSrc))))) {
3417 if (auto *II = dyn_cast<IntrinsicInst>(ExtractSrc)) {
3418 const Intrinsic::ID IID = II->getIntrinsicID();
3419 switch (IID) {
3420 case Intrinsic::frexp: {
3421 FPClassTest SrcDemandedMask = fcNone;
3422 if (DemandedMask & fcNan)
3423 SrcDemandedMask |= fcNan;
3424 if (DemandedMask & fcNegFinite)
3425 SrcDemandedMask |= fcNegFinite;
3426 if (DemandedMask & fcPosFinite)
3427 SrcDemandedMask |= fcPosFinite;
3428 if (DemandedMask & fcPosInf)
3429 SrcDemandedMask |= fcPosInf;
3430 if (DemandedMask & fcNegInf)
3431 SrcDemandedMask |= fcNegInf;
3432
3433 KnownFPClass KnownSrc;
3434 if (SimplifyDemandedFPClass(II, 0, SrcDemandedMask, KnownSrc,
3435 Depth + 1))
3436 return I;
3437
3438 Type *EltTy = VTy->getScalarType();
3439 DenormalMode Mode = F.getDenormalMode(EltTy->getFltSemantics());
3440
3441 Known = KnownFPClass::frexp_mant(KnownSrc, Mode);
3442 Known.KnownFPClasses &= DemandedMask;
3443
3444 if (Constant *SingleVal =
3446 /*IsCanonicalizing=*/true))
3447 return SingleVal;
3448
3449 if (Known.isKnownAlways(fcInf | fcNan))
3450 return II->getArgOperand(0);
3451
3452 return nullptr;
3453 }
3454 default:
3455 break;
3456 }
3457 }
3458 }
3459 [[fallthrough]];
3460 }
3461 default:
3462 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
3463 Known.knownNot(~DemandedMask);
3464 break;
3465 }
3466
3467 return getFPClassConstant(VTy, Known.KnownFPClasses);
3468}
3469
3470/// Helper routine of SimplifyDemandedUseFPClass. It computes Known
3471/// floating-point classes. It also tries to handle simplifications that can be
3472/// done based on DemandedMask, but without modifying the Instruction.
3474 Instruction *I, FPClassTest DemandedMask, KnownFPClass &Known,
3475 Instruction *CxtI, unsigned Depth) {
3476 FastMathFlags FMF;
3477 if (auto *FPOp = dyn_cast<FPMathOperator>(I)) {
3478 FMF = FPOp->getFastMathFlags();
3479 DemandedMask = adjustDemandedMaskFromFlags(DemandedMask, FMF);
3480 }
3481
3482 switch (I->getOpcode()) {
3483 case Instruction::Select: {
3484 // TODO: Can we infer which side it came from based on adjusted result
3485 // class?
3486 KnownFPClass KnownRHS =
3487 computeKnownFPClass(I->getOperand(2), DemandedMask, CxtI, Depth + 1);
3488 if (KnownRHS.isKnownNever(DemandedMask))
3489 return I->getOperand(1);
3490
3491 KnownFPClass KnownLHS =
3492 computeKnownFPClass(I->getOperand(1), DemandedMask, CxtI, Depth + 1);
3493 if (KnownLHS.isKnownNever(DemandedMask))
3494 return I->getOperand(2);
3495
3496 adjustKnownFPClassForSelectArm(KnownLHS, I->getOperand(0), I->getOperand(1),
3497 /*Invert=*/false, SQ, Depth);
3498 adjustKnownFPClassForSelectArm(KnownRHS, I->getOperand(0), I->getOperand(2),
3499 /*Invert=*/true, SQ, Depth);
3500 Known = KnownLHS.intersectWith(KnownRHS);
3501 Known.knownNot(~DemandedMask);
3502 break;
3503 }
3504 case Instruction::FNeg: {
3505 // Special case fneg(fabs(x))
3506 Value *Src;
3507
3508 Value *FNegSrc = I->getOperand(0);
3509 if (!match(FNegSrc, m_FAbs(m_Value(Src)))) {
3510 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
3511 break;
3512 }
3513
3514 KnownFPClass KnownSrc =
3515 computeKnownFPClass(Src, fcAllFlags, CxtI, Depth + 1);
3516
3517 FastMathFlags FabsFMF = cast<FPMathOperator>(FNegSrc)->getFastMathFlags();
3518 FPClassTest ThisDemandedMask =
3519 adjustDemandedMaskFromFlags(DemandedMask, FabsFMF);
3520
3521 // We cannot apply the NSZ logic with multiple uses. We can apply it if the
3522 // inner fabs has it and this is the only use.
3523 if (Value *Simplified = simplifyDemandedFPClassFnegFabs(
3524 Known, Src, ThisDemandedMask, KnownSrc, /*NSZ=*/false))
3525 return Simplified;
3526 break;
3527 }
3528 case Instruction::Call: {
3529 const CallInst *CI = cast<CallInst>(I);
3530 const Intrinsic::ID IID = CI->getIntrinsicID();
3531 switch (IID) {
3532 case Intrinsic::fabs: {
3533 Value *Src = CI->getArgOperand(0);
3534 KnownFPClass KnownSrc =
3535 computeKnownFPClass(Src, fcAllFlags, CxtI, Depth + 1);
3536
3537 // NSZ cannot be applied in multiple use case (maybe it could if all uses
3538 // were known nsz)
3539 if (Value *Simplified = simplifyDemandedFPClassFabs(
3540 Known, CI->getArgOperand(0), DemandedMask, KnownSrc,
3541 /*NSZ=*/false))
3542 return Simplified;
3543 break;
3544 }
3545 case Intrinsic::copysign: {
3546 Value *Mag = CI->getArgOperand(0);
3547 Value *Sign = CI->getArgOperand(1);
3548 KnownFPClass KnownMag =
3549 computeKnownFPClass(Mag, fcAllFlags, CxtI, Depth + 1);
3550
3551 // Rule out some cases by magnitude, which may help prove the sign bit is
3552 // one direction or the other.
3553 KnownMag.knownNot(~llvm::unknown_sign(DemandedMask));
3554
3555 // Cannot use nsz in the multiple use case.
3556 if (Value *Simplified = simplifyDemandedFPClassCopysignMag(
3557 Mag, DemandedMask, KnownMag, /*NSZ=*/false))
3558 return Simplified;
3559
3560 KnownFPClass KnownSign =
3561 computeKnownFPClass(Sign, fcAllFlags, CxtI, Depth + 1);
3562
3563 if (FMF.noInfs())
3564 KnownSign.knownNot(fcInf);
3565 if (FMF.noNaNs())
3566 KnownSign.knownNot(fcNan);
3567
3568 if (KnownSign.SignBit && KnownMag.SignBit &&
3569 *KnownSign.SignBit == *KnownMag.SignBit)
3570 return Mag;
3571
3572 Known = KnownFPClass::copysign(KnownMag, KnownSign);
3573 break;
3574 }
3575 case Intrinsic::maxnum:
3576 case Intrinsic::minnum:
3577 case Intrinsic::maximum:
3578 case Intrinsic::minimum:
3579 case Intrinsic::maximumnum:
3580 case Intrinsic::minimumnum: {
3582 CI->getArgOperand(1), DemandedMask, CxtI, Depth + 1);
3583 if (KnownRHS.isUnknown())
3584 return nullptr;
3585
3587 CI->getArgOperand(0), DemandedMask, CxtI, Depth + 1);
3588
3589 // Cannot use NSZ in the multiple use case.
3590 return simplifyDemandedFPClassMinMax(Known, IID, CI, DemandedMask,
3591 KnownLHS, KnownRHS, F,
3592 /*NSZ=*/false);
3593 }
3594 default:
3595 break;
3596 }
3597
3598 [[fallthrough]];
3599 }
3600 default:
3601 Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
3602 Known.knownNot(~DemandedMask);
3603 break;
3604 }
3605
3606 return getFPClassConstant(I->getType(), Known.KnownFPClasses);
3607}
3608
3610 FPClassTest DemandedMask,
3611 KnownFPClass &Known,
3612 unsigned Depth) {
3613 Use &U = I->getOperandUse(OpNo);
3614 Value *V = U.get();
3615 Type *VTy = V->getType();
3616
3617 if (DemandedMask == fcNone) {
3618 if (isa<PoisonValue>(V))
3619 return false;
3621 return true;
3622 }
3623
3624 // Handle constant
3626 if (!VInst) {
3627 // Handle constants and arguments
3628 Known = computeKnownFPClass(V, fcAllFlags, I, Depth);
3629 Known.knownNot(~DemandedMask);
3630
3631 if (Known.KnownFPClasses == fcNone) {
3632 if (isa<PoisonValue>(V))
3633 return false;
3635 return true;
3636 }
3637
3638 // Do not try to replace values which are already constants (unless we are
3639 // folding to poison). Doing so could promote poison elements to non-poison
3640 // constants.
3641 if (isa<Constant>(V))
3642 return false;
3643
3644 Value *FoldedToConst = getFPClassConstant(VTy, Known.KnownFPClasses);
3645 if (!FoldedToConst || FoldedToConst == V)
3646 return false;
3647
3648 replaceUse(U, FoldedToConst);
3649 return true;
3650 }
3651
3653 Known.knownNot(~DemandedMask);
3654 return false;
3655 }
3656
3657 Value *NewVal;
3658
3659 if (VInst->hasOneUse()) {
3660 // If the instruction has one use, we can directly simplify it.
3661 NewVal = SimplifyDemandedUseFPClass(VInst, DemandedMask, Known, I, Depth);
3662 } else {
3663 // If there are multiple uses of this instruction, then we can simplify
3664 // VInst to some other value, but not modify the instruction.
3665 NewVal = SimplifyMultipleUseDemandedFPClass(VInst, DemandedMask, Known, I,
3666 Depth);
3667 }
3668
3669 if (!NewVal)
3670 return false;
3671 if (Instruction *OpInst = dyn_cast<Instruction>(U))
3672 salvageDebugInfo(*OpInst);
3673
3674 replaceUse(U, NewVal);
3675 return true;
3676}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Value * simplifyDemandedUseFPClassFPTrunc(InstCombinerImpl &IC, Instruction &I, FastMathFlags FMF, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth)
static cl::opt< unsigned > SimplifyDemandedVectorEltsDepthLimit("instcombine-simplify-vector-elts-depth", cl::desc("Depth limit when simplifying vector instructions and their operands"), cl::Hidden, cl::init(10))
static Constant * getFPClassConstant(Type *Ty, FPClassTest Mask, bool IsCanonicalizing=false)
For floating-point classes that resolve to a single bit pattern, return that value.
static cl::opt< bool > VerifyKnownBits("instcombine-verify-known-bits", cl::desc("Verify that computeKnownBits() and " "SimplifyDemandedBits() are consistent"), cl::Hidden, cl::init(false))
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Value * simplifyDemandedFPClassFabs(KnownFPClass &Known, Value *Src, FPClassTest DemandedMask, KnownFPClass KnownSrc, bool NSZ)
Perform multiple-use aware simplfications for fabs(Src).
static Value * simplifyDemandedFPClassFnegFabs(KnownFPClass &Known, Value *Src, FPClassTest DemandedMask, KnownFPClass KnownSrc, bool NSZ)
Perform multiple-use aware simplfications for fneg(fabs(Src)).
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
static Value * simplifyShiftSelectingPackedElement(Instruction *I, const APInt &DemandedMask, InstCombinerImpl &IC, unsigned Depth)
Let N = 2 * M.
static Value * simplifyDemandedFPClassMinMax(KnownFPClass &Known, Intrinsic::ID IID, const CallInst *CI, FPClassTest DemandedMask, KnownFPClass KnownLHS, KnownFPClass KnownRHS, const Function &F, bool NSZ)
static Value * simplifyDemandedFPClassCopysignMag(Value *MagSrc, FPClassTest DemandedMask, KnownFPClass KnownSrc, bool NSZ)
static FPClassTest adjustDemandedMaskFromFlags(FPClassTest DemandedMask, FastMathFlags FMF)
static FastMathFlags inferFastMathValueFlags(FastMathFlags FMF, FPClassTest ValidResults, ArrayRef< KnownFPClass > Known)
Try to set an inferred no-nans or no-infs in FMF.
static Value * simplifyDemandedFPClassResult(Instruction *FPOp, FastMathFlags FMF, FPClassTest DemandedMask, KnownFPClass &Known, ArrayRef< KnownFPClass > KnownSrcs)
Apply epilog fixups to a floating-point intrinsic.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1139
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1421
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1406
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1685
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1355
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1411
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1613
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1450
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1403
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition APInt.h:433
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
BinaryOps getOpcode() const
Definition InstrTypes.h:374
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool noSignedZeros() const
Definition FMF.h:70
bool noInfs() const
Definition FMF.h:69
void setNoSignedZeros(bool B=true)
Definition FMF.h:87
void setNoNaNs(bool B=true)
Definition FMF.h:81
bool noNaNs() const
Definition FMF.h:68
void setNoInfs(bool B=true)
Definition FMF.h:84
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2308
LLVM_ABI Value * CreateSelectWithUnknownProfile(Value *C, Value *True, Value *False, StringRef PassName, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF, FPClassTest Interested=fcAllFlags, const Instruction *CtxI=nullptr, unsigned Depth=0) const
Value * SimplifyDemandedUseFPClass(Instruction *I, FPClassTest DemandedMask, KnownFPClass &Known, Instruction *CxtI, unsigned Depth=0)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
bool SimplifyDemandedInstructionFPClass(Instruction &Inst)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Value * SimplifyMultipleUseDemandedFPClass(Instruction *I, FPClassTest DemandedMask, KnownFPClass &Known, Instruction *CxtI, unsigned Depth)
Helper routine of SimplifyDemandedUseFPClass.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
std::optional< std::pair< Intrinsic::ID, SmallVector< Value *, 3 > > > convertOrOfShiftsToFunnelShift(Instruction &Or)
Value * simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known)
Helper routine of SimplifyDemandedUseBits.
Value * SimplifyDemandedUseBits(Instruction *I, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Attempts to replace I with a simpler value based on the demanded bits.
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth=0)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Value * SimplifyMultipleUseDemandedBits(Instruction *I, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Helper routine of SimplifyDemandedUseBits.
SimplifyQuery SQ
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
BuilderTy & Builder
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
bool isShift() const
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
const Value * getCondition() const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isMultiUnitFPType() const
Returns true if this is a floating-point type that is an unevaluated sum of multiple floating-point u...
Definition Type.h:193
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIEEELikeFPTy() const
Return true if this is a well-behaved IEEE-like type, which has a IEEE compatible layout,...
Definition Type.h:170
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
This class represents zero extension of integer types.
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
DisjointOr_match< LHS, RHS, true > m_c_DisjointOr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI void computeKnownBitsFromContext(const Value *V, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)
Merge bits known from context-dependent facts into Known.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1730
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
LLVM_ABI bool isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be undef, but may be poison.
LLVM_ABI bool cannotOrderStrictlyLess(FPClassTest LHS, FPClassTest RHS, bool OrderedZeroSign=false)
Returns true if all values in LHS must be greater than or equal to those in RHS.
LLVM_ABI bool cannotOrderStrictlyGreater(FPClassTest LHS, FPClassTest RHS, bool OrderedZeroSign=false)
Returns true if all values in LHS must be less than or equal to those in RHS.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI void adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond, Value *Arm, bool Invert, const SimplifyQuery &Q, unsigned Depth=0)
Adjust Known for the given select Arm to include information from the select Cond.
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI void adjustKnownFPClassForSelectArm(KnownFPClass &Known, Value *Cond, Value *Arm, bool Invert, const SimplifyQuery &Q, unsigned Depth=0)
Adjust Known for the given select Arm to include information from the select Cond.
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
LLVM_ABI FPClassTest unknown_sign(FPClassTest Mask)
Return the test mask which returns true if the value could have the same set of classes,...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
LLVM_ABI KnownBits analyzeKnownBitsFromAndXorOr(const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS, const SimplifyQuery &SQ, unsigned Depth=0)
Using KnownBits LHS/RHS produce the known bits for logic op (and/xor/or).
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:317
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:192
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
void makeNonNegative()
Make this value non-negative.
Definition KnownBits.h:127
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:337
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:327
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:186
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:363
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
Definition KnownBits.h:202
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
static LLVM_ABI KnownBits srem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for srem(LHS, RHS).
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static KnownBits sub(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from subtraction of LHS and RHS.
Definition KnownBits.h:369
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
bool isKnownNeverInfOrNaN() const
Return true if it's known this can never be an infinity or nan.
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
bool isKnownNeverInfinity() const
Return true if it's known this can never be an infinity.
static constexpr FPClassTest OrderedGreaterThanZeroMask
static constexpr FPClassTest OrderedLessThanZeroMask
void knownNot(FPClassTest RuleOut)
static LLVM_ABI KnownFPClass fmul(const KnownFPClass &LHS, const KnownFPClass &RHS, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fmul.
static LLVM_ABI KnownFPClass fadd_self(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fadd x, x.
void copysign(const KnownFPClass &Sign)
static KnownFPClass square(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
static LLVM_ABI KnownFPClass fsub(const KnownFPClass &LHS, const KnownFPClass &RHS, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fsub.
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.
bool isKnownAlways(FPClassTest Mask) const
static LLVM_ABI KnownFPClass canonicalize(const KnownFPClass &Src, DenormalMode DenormMode=DenormalMode::getDynamic())
Apply the canonicalize intrinsic to this value.
LLVM_ABI bool isKnownNeverLogicalZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a zero.
static LLVM_ABI KnownFPClass log(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
Propagate known class for log/log2/log10.
static LLVM_ABI KnownFPClass fdiv(const KnownFPClass &LHS, const KnownFPClass &RHS, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fdiv.
static LLVM_ABI KnownFPClass roundToIntegral(const KnownFPClass &Src, bool IsTrunc, bool IsMultiUnitFPType)
Propagate known class for rounding intrinsics (trunc, floor, ceil, rint, nearbyint,...
static LLVM_ABI KnownFPClass ldexp(const KnownFPClass &Src, const KnownBits &N, const fltSemantics &Flt, DenormalMode Mode=DenormalMode::getDynamic())
Propagate known class for ldexp.
static LLVM_ABI KnownFPClass minMaxLike(const KnownFPClass &LHS, const KnownFPClass &RHS, MinMaxKind Kind, DenormalMode DenormMode=DenormalMode::getDynamic())
bool isUnknown() const
KnownFPClass intersectWith(const KnownFPClass &RHS) const
static LLVM_ABI KnownFPClass exp(const KnownFPClass &Src)
Report known values for exp, exp2 and exp10.
static LLVM_ABI KnownFPClass frexp_mant(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
Propagate known class for mantissa component of frexp.
std::optional< bool > SignBit
std::nullopt if the sign bit is unknown, true if the sign bit is definitely set or false if the sign ...
bool isKnownNeverNaN() const
Return true if it's known this can never be a nan.
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
static LLVM_ABI KnownFPClass fpext(const KnownFPClass &KnownSrc, const fltSemantics &DstTy, const fltSemantics &SrcTy)
Propagate known class for fpext.
static LLVM_ABI KnownFPClass fma(const KnownFPClass &LHS, const KnownFPClass &RHS, const KnownFPClass &Addend, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fma.
static LLVM_ABI KnownFPClass fptrunc(const KnownFPClass &KnownSrc)
Propagate known class for fptrunc.
static LLVM_ABI KnownFPClass sqrt(const KnownFPClass &Src, DenormalMode Mode=DenormalMode::getDynamic())
Propagate known class for sqrt.
LLVM_ABI bool isKnownNeverLogicalPosZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a positive zero.
bool cannotBeOrderedGreaterEqZero(DenormalMode Mode) const
Return true if it's know this can never be a negative value or a logical 0.
static LLVM_ABI KnownFPClass fadd(const KnownFPClass &LHS, const KnownFPClass &RHS, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fadd.
LLVM_ABI bool isKnownNeverLogicalNegZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a negative zero.
static LLVM_ABI KnownFPClass fma_square(const KnownFPClass &Squared, const KnownFPClass &Addend, DenormalMode Mode=DenormalMode::getDynamic())
Report known values for fma squared, squared, addend.
Matching combinators.
const Instruction * CxtI