LLVM 22.0.0git
GISelValueTracking.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2//*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10/// Provides analysis for querying information about KnownBits during GISel
11/// passes.
12//
13//===----------------------------------------------------------------------===//
15#include "llvm/ADT/APFloat.h"
17#include "llvm/ADT/ScopeExit.h"
35#include "llvm/IR/FMF.h"
40
41#define DEBUG_TYPE "gisel-known-bits"
42
43using namespace llvm;
44using namespace MIPatternMatch;
45
47
49 "Analysis for ComputingKnownBits", false, true)
50
52 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
53 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
54
56 const MachineInstr *MI = MRI.getVRegDef(R);
57 switch (MI->getOpcode()) {
58 case TargetOpcode::COPY:
59 return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
60 case TargetOpcode::G_ASSERT_ALIGN: {
61 // TODO: Min with source
62 return Align(MI->getOperand(2).getImm());
63 }
64 case TargetOpcode::G_FRAME_INDEX: {
65 int FrameIdx = MI->getOperand(1).getIndex();
66 return MF.getFrameInfo().getObjectAlign(FrameIdx);
67 }
68 case TargetOpcode::G_INTRINSIC:
69 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
70 case TargetOpcode::G_INTRINSIC_CONVERGENT:
71 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
72 default:
73 return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
74 }
75}
76
78 assert(MI.getNumExplicitDefs() == 1 &&
79 "expected single return generic instruction");
80 return getKnownBits(MI.getOperand(0).getReg());
81}
82
84 const LLT Ty = MRI.getType(R);
85 // Since the number of lanes in a scalable vector is unknown at compile time,
86 // we track one bit which is implicitly broadcast to all lanes. This means
87 // that all lanes in a scalable vector are considered demanded.
88 APInt DemandedElts =
89 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
90 return getKnownBits(R, DemandedElts);
91}
92
94 const APInt &DemandedElts,
95 unsigned Depth) {
96 KnownBits Known;
97 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
98 return Known;
99}
100
102 LLT Ty = MRI.getType(R);
103 unsigned BitWidth = Ty.getScalarSizeInBits();
105}
106
110
114
115[[maybe_unused]] static void
116dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
117 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
118 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
119 << toString(Known.Zero | Known.One, 16, false) << "\n"
120 << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
121 << "\n"
122 << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false)
123 << "\n";
124}
125
126/// Compute known bits for the intersection of \p Src0 and \p Src1
127void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
128 KnownBits &Known,
129 const APInt &DemandedElts,
130 unsigned Depth) {
131 // Test src1 first, since we canonicalize simpler expressions to the RHS.
132 computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
133
134 // If we don't know any bits, early out.
135 if (Known.isUnknown())
136 return;
137
138 KnownBits Known2;
139 computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
140
141 // Only known if known in both the LHS and RHS.
142 Known = Known.intersectWith(Known2);
143}
144
145// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
146// created using Width. Use this function when the inputs are KnownBits
147// objects. TODO: Move this KnownBits.h if this is usable in more cases.
148static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
149 const KnownBits &OffsetKnown,
150 const KnownBits &WidthKnown) {
151 KnownBits Mask(BitWidth);
152 Mask.Zero = APInt::getBitsSetFrom(
154 Mask.One = APInt::getLowBitsSet(
156 return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
157}
158
160 const APInt &DemandedElts,
161 unsigned Depth) {
162 MachineInstr &MI = *MRI.getVRegDef(R);
163 unsigned Opcode = MI.getOpcode();
164 LLT DstTy = MRI.getType(R);
165
166 // Handle the case where this is called on a register that does not have a
167 // type constraint. For example, it may be post-ISel or this target might not
168 // preserve the type when early-selecting instructions.
169 if (!DstTy.isValid()) {
170 Known = KnownBits();
171 return;
172 }
173
174#ifndef NDEBUG
175 if (DstTy.isFixedVector()) {
176 assert(
177 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
178 "DemandedElt width should equal the fixed vector number of elements");
179 } else {
180 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
181 "DemandedElt width should be 1 for scalars or scalable vectors");
182 }
183#endif
184
185 unsigned BitWidth = DstTy.getScalarSizeInBits();
186 Known = KnownBits(BitWidth); // Don't know anything
187
188 // Depth may get bigger than max depth if it gets passed to a different
189 // GISelValueTracking object.
190 // This may happen when say a generic part uses a GISelValueTracking object
191 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
192 // which creates a new GISelValueTracking object with a different and smaller
193 // depth. If we just check for equality, we would never exit if the depth
194 // that is passed down to the target specific GISelValueTracking object is
195 // already bigger than its max depth.
196 if (Depth >= getMaxDepth())
197 return;
198
199 if (!DemandedElts)
200 return; // No demanded elts, better to assume we don't know anything.
201
202 KnownBits Known2;
203
204 switch (Opcode) {
205 default:
206 TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
207 Depth);
208 break;
209 case TargetOpcode::G_BUILD_VECTOR: {
210 // Collect the known bits that are shared by every demanded vector element.
211 Known.Zero.setAllBits();
212 Known.One.setAllBits();
213 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
214 if (!DemandedElts[I])
215 continue;
216
217 computeKnownBitsImpl(MO.getReg(), Known2, APInt(1, 1), Depth + 1);
218
219 // Known bits are the values that are shared by every demanded element.
220 Known = Known.intersectWith(Known2);
221
222 // If we don't know any bits, early out.
223 if (Known.isUnknown())
224 break;
225 }
226 break;
227 }
228 case TargetOpcode::G_SPLAT_VECTOR: {
229 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, APInt(1, 1),
230 Depth + 1);
231 // Implicitly truncate the bits to match the official semantics of
232 // G_SPLAT_VECTOR.
233 Known = Known.trunc(BitWidth);
234 break;
235 }
236 case TargetOpcode::COPY:
237 case TargetOpcode::G_PHI:
238 case TargetOpcode::PHI: {
241 // Destination registers should not have subregisters at this
242 // point of the pipeline, otherwise the main live-range will be
243 // defined more than once, which is against SSA.
244 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
245 // PHI's operand are a mix of registers and basic blocks interleaved.
246 // We only care about the register ones.
247 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
248 const MachineOperand &Src = MI.getOperand(Idx);
249 Register SrcReg = Src.getReg();
250 LLT SrcTy = MRI.getType(SrcReg);
251 // Look through trivial copies and phis but don't look through trivial
252 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
253 // analysis is currently unable to determine the bit width of a
254 // register class.
255 //
256 // We can't use NoSubRegister by name as it's defined by each target but
257 // it's always defined to be 0 by tablegen.
258 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
259 SrcTy.isValid()) {
260 // In case we're forwarding from a vector register to a non-vector
261 // register we need to update the demanded elements to reflect this
262 // before recursing.
263 APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
264 ? APInt::getAllOnes(SrcTy.getNumElements())
265 : DemandedElts; // Known to be APInt(1, 1)
266 // For COPYs we don't do anything, don't increase the depth.
267 computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
268 Depth + (Opcode != TargetOpcode::COPY));
269 Known2 = Known2.anyextOrTrunc(BitWidth);
270 Known = Known.intersectWith(Known2);
271 // If we reach a point where we don't know anything
272 // just stop looking through the operands.
273 if (Known.isUnknown())
274 break;
275 } else {
276 // We know nothing.
277 Known = KnownBits(BitWidth);
278 break;
279 }
280 }
281 break;
282 }
283 case TargetOpcode::G_CONSTANT: {
284 Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
285 break;
286 }
287 case TargetOpcode::G_FRAME_INDEX: {
288 int FrameIdx = MI.getOperand(1).getIndex();
289 TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
290 break;
291 }
292 case TargetOpcode::G_SUB: {
293 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
294 Depth + 1);
295 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
296 Depth + 1);
297 Known = KnownBits::sub(Known, Known2);
298 break;
299 }
300 case TargetOpcode::G_XOR: {
301 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
302 Depth + 1);
303 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
304 Depth + 1);
305
306 Known ^= Known2;
307 break;
308 }
309 case TargetOpcode::G_PTR_ADD: {
310 if (DstTy.isVector())
311 break;
312 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
313 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
314 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
315 break;
316 [[fallthrough]];
317 }
318 case TargetOpcode::G_ADD: {
319 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
320 Depth + 1);
321 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
322 Depth + 1);
323 Known = KnownBits::add(Known, Known2);
324 break;
325 }
326 case TargetOpcode::G_AND: {
327 // If either the LHS or the RHS are Zero, the result is zero.
328 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
329 Depth + 1);
330 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
331 Depth + 1);
332
333 Known &= Known2;
334 break;
335 }
336 case TargetOpcode::G_OR: {
337 // If either the LHS or the RHS are Zero, the result is zero.
338 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
339 Depth + 1);
340 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
341 Depth + 1);
342
343 Known |= Known2;
344 break;
345 }
346 case TargetOpcode::G_MUL: {
347 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
348 Depth + 1);
349 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
350 Depth + 1);
351 Known = KnownBits::mul(Known, Known2);
352 break;
353 }
354 case TargetOpcode::G_UMULH: {
355 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
356 Depth + 1);
357 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
358 Depth + 1);
359 Known = KnownBits::mulhu(Known, Known2);
360 break;
361 }
362 case TargetOpcode::G_SMULH: {
363 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
364 Depth + 1);
365 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
366 Depth + 1);
367 Known = KnownBits::mulhs(Known, Known2);
368 break;
369 }
370 case TargetOpcode::G_SELECT: {
371 computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
372 Known, DemandedElts, Depth + 1);
373 break;
374 }
375 case TargetOpcode::G_SMIN: {
376 // TODO: Handle clamp pattern with number of sign bits
377 KnownBits KnownRHS;
378 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
379 Depth + 1);
380 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
381 Depth + 1);
382 Known = KnownBits::smin(Known, KnownRHS);
383 break;
384 }
385 case TargetOpcode::G_SMAX: {
386 // TODO: Handle clamp pattern with number of sign bits
387 KnownBits KnownRHS;
388 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
389 Depth + 1);
390 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
391 Depth + 1);
392 Known = KnownBits::smax(Known, KnownRHS);
393 break;
394 }
395 case TargetOpcode::G_UMIN: {
396 KnownBits KnownRHS;
397 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
398 Depth + 1);
399 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
400 Depth + 1);
401 Known = KnownBits::umin(Known, KnownRHS);
402 break;
403 }
404 case TargetOpcode::G_UMAX: {
405 KnownBits KnownRHS;
406 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
407 Depth + 1);
408 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
409 Depth + 1);
410 Known = KnownBits::umax(Known, KnownRHS);
411 break;
412 }
413 case TargetOpcode::G_FCMP:
414 case TargetOpcode::G_ICMP: {
415 if (DstTy.isVector())
416 break;
417 if (TL.getBooleanContents(DstTy.isVector(),
418 Opcode == TargetOpcode::G_FCMP) ==
420 BitWidth > 1)
421 Known.Zero.setBitsFrom(1);
422 break;
423 }
424 case TargetOpcode::G_SEXT: {
425 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
426 Depth + 1);
427 // If the sign bit is known to be zero or one, then sext will extend
428 // it to the top bits, else it will just zext.
429 Known = Known.sext(BitWidth);
430 break;
431 }
432 case TargetOpcode::G_ASSERT_SEXT:
433 case TargetOpcode::G_SEXT_INREG: {
434 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
435 Depth + 1);
436 Known = Known.sextInReg(MI.getOperand(2).getImm());
437 break;
438 }
439 case TargetOpcode::G_ANYEXT: {
440 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
441 Depth + 1);
442 Known = Known.anyext(BitWidth);
443 break;
444 }
445 case TargetOpcode::G_LOAD: {
446 const MachineMemOperand *MMO = *MI.memoperands_begin();
447 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
448 if (const MDNode *Ranges = MMO->getRanges())
449 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
450 Known = KnownRange.anyext(Known.getBitWidth());
451 break;
452 }
453 case TargetOpcode::G_SEXTLOAD:
454 case TargetOpcode::G_ZEXTLOAD: {
455 if (DstTy.isVector())
456 break;
457 const MachineMemOperand *MMO = *MI.memoperands_begin();
458 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
459 if (const MDNode *Ranges = MMO->getRanges())
460 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
461 Known = Opcode == TargetOpcode::G_SEXTLOAD
462 ? KnownRange.sext(Known.getBitWidth())
463 : KnownRange.zext(Known.getBitWidth());
464 break;
465 }
466 case TargetOpcode::G_ASHR: {
467 KnownBits LHSKnown, RHSKnown;
468 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
469 Depth + 1);
470 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
471 Depth + 1);
472 Known = KnownBits::ashr(LHSKnown, RHSKnown);
473 break;
474 }
475 case TargetOpcode::G_LSHR: {
476 KnownBits LHSKnown, RHSKnown;
477 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
478 Depth + 1);
479 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
480 Depth + 1);
481 Known = KnownBits::lshr(LHSKnown, RHSKnown);
482 break;
483 }
484 case TargetOpcode::G_SHL: {
485 KnownBits LHSKnown, RHSKnown;
486 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
487 Depth + 1);
488 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
489 Depth + 1);
490 Known = KnownBits::shl(LHSKnown, RHSKnown);
491 break;
492 }
493 case TargetOpcode::G_INTTOPTR:
494 case TargetOpcode::G_PTRTOINT:
495 if (DstTy.isVector())
496 break;
497 // Fall through and handle them the same as zext/trunc.
498 [[fallthrough]];
499 case TargetOpcode::G_ZEXT:
500 case TargetOpcode::G_TRUNC: {
501 Register SrcReg = MI.getOperand(1).getReg();
502 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
503 Known = Known.zextOrTrunc(BitWidth);
504 break;
505 }
506 case TargetOpcode::G_ASSERT_ZEXT: {
507 Register SrcReg = MI.getOperand(1).getReg();
508 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
509
510 unsigned SrcBitWidth = MI.getOperand(2).getImm();
511 assert(SrcBitWidth && "SrcBitWidth can't be zero");
512 APInt InMask = APInt::getLowBitsSet(BitWidth, SrcBitWidth);
513 Known.Zero |= (~InMask);
514 Known.One &= (~Known.Zero);
515 break;
516 }
517 case TargetOpcode::G_ASSERT_ALIGN: {
518 int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
519
520 // TODO: Should use maximum with source
521 // If a node is guaranteed to be aligned, set low zero bits accordingly as
522 // well as clearing one bits.
523 Known.Zero.setLowBits(LogOfAlign);
524 Known.One.clearLowBits(LogOfAlign);
525 break;
526 }
527 case TargetOpcode::G_MERGE_VALUES: {
528 unsigned NumOps = MI.getNumOperands();
529 unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
530
531 for (unsigned I = 0; I != NumOps - 1; ++I) {
532 KnownBits SrcOpKnown;
533 computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
534 DemandedElts, Depth + 1);
535 Known.insertBits(SrcOpKnown, I * OpSize);
536 }
537 break;
538 }
539 case TargetOpcode::G_UNMERGE_VALUES: {
540 unsigned NumOps = MI.getNumOperands();
541 Register SrcReg = MI.getOperand(NumOps - 1).getReg();
542 LLT SrcTy = MRI.getType(SrcReg);
543
544 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
545 return; // TODO: Handle vector->subelement unmerges
546
547 // Figure out the result operand index
548 unsigned DstIdx = 0;
549 for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
550 ++DstIdx)
551 ;
552
553 APInt SubDemandedElts = DemandedElts;
554 if (SrcTy.isVector()) {
555 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
556 SubDemandedElts =
557 DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
558 }
559
560 KnownBits SrcOpKnown;
561 computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
562
563 if (SrcTy.isVector())
564 Known = std::move(SrcOpKnown);
565 else
566 Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
567 break;
568 }
569 case TargetOpcode::G_BSWAP: {
570 Register SrcReg = MI.getOperand(1).getReg();
571 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
572 Known = Known.byteSwap();
573 break;
574 }
575 case TargetOpcode::G_BITREVERSE: {
576 Register SrcReg = MI.getOperand(1).getReg();
577 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
578 Known = Known.reverseBits();
579 break;
580 }
581 case TargetOpcode::G_CTPOP: {
582 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
583 Depth + 1);
584 // We can bound the space the count needs. Also, bits known to be zero
585 // can't contribute to the population.
586 unsigned BitsPossiblySet = Known2.countMaxPopulation();
587 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
588 Known.Zero.setBitsFrom(LowBits);
589 // TODO: we could bound Known.One using the lower bound on the number of
590 // bits which might be set provided by popcnt KnownOne2.
591 break;
592 }
593 case TargetOpcode::G_UBFX: {
594 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
595 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
596 Depth + 1);
597 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
598 Depth + 1);
599 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
600 Depth + 1);
601 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
602 break;
603 }
604 case TargetOpcode::G_SBFX: {
605 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
606 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
607 Depth + 1);
608 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
609 Depth + 1);
610 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
611 Depth + 1);
612 OffsetKnown = OffsetKnown.sext(BitWidth);
613 WidthKnown = WidthKnown.sext(BitWidth);
614 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
615 // Sign extend the extracted value using shift left and arithmetic shift
616 // right.
618 KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown);
619 Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
620 break;
621 }
622 case TargetOpcode::G_UADDO:
623 case TargetOpcode::G_UADDE:
624 case TargetOpcode::G_SADDO:
625 case TargetOpcode::G_SADDE: {
626 if (MI.getOperand(1).getReg() == R) {
627 // If we know the result of a compare has the top bits zero, use this
628 // info.
629 if (TL.getBooleanContents(DstTy.isVector(), false) ==
631 BitWidth > 1)
632 Known.Zero.setBitsFrom(1);
633 break;
634 }
635
636 assert(MI.getOperand(0).getReg() == R &&
637 "We only compute knownbits for the sum here.");
638 // With [US]ADDE, a carry bit may be added in.
639 KnownBits Carry(1);
640 if (Opcode == TargetOpcode::G_UADDE || Opcode == TargetOpcode::G_SADDE) {
641 computeKnownBitsImpl(MI.getOperand(4).getReg(), Carry, DemandedElts,
642 Depth + 1);
643 // Carry has bit width 1
644 Carry = Carry.trunc(1);
645 } else {
646 Carry.setAllZero();
647 }
648
649 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
650 Depth + 1);
651 computeKnownBitsImpl(MI.getOperand(3).getReg(), Known2, DemandedElts,
652 Depth + 1);
653 Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
654 break;
655 }
656 case TargetOpcode::G_USUBO:
657 case TargetOpcode::G_USUBE:
658 case TargetOpcode::G_SSUBO:
659 case TargetOpcode::G_SSUBE:
660 case TargetOpcode::G_UMULO:
661 case TargetOpcode::G_SMULO: {
662 if (MI.getOperand(1).getReg() == R) {
663 // If we know the result of a compare has the top bits zero, use this
664 // info.
665 if (TL.getBooleanContents(DstTy.isVector(), false) ==
667 BitWidth > 1)
668 Known.Zero.setBitsFrom(1);
669 }
670 break;
671 }
672 case TargetOpcode::G_CTLZ:
673 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
674 KnownBits SrcOpKnown;
675 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
676 Depth + 1);
677 // If we have a known 1, its position is our upper bound.
678 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
679 unsigned LowBits = llvm::bit_width(PossibleLZ);
680 Known.Zero.setBitsFrom(LowBits);
681 break;
682 }
683 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
685 Register InVec = Extract.getVectorReg();
686 Register EltNo = Extract.getIndexReg();
687
688 auto ConstEltNo = getIConstantVRegVal(EltNo, MRI);
689
690 LLT VecVT = MRI.getType(InVec);
691 // computeKnownBits not yet implemented for scalable vectors.
692 if (VecVT.isScalableVector())
693 break;
694
695 const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
696 const unsigned NumSrcElts = VecVT.getNumElements();
697 // A return type different from the vector's element type may lead to
698 // issues with pattern selection. Bail out to avoid that.
699 if (BitWidth > EltBitWidth)
700 break;
701
702 Known.Zero.setAllBits();
703 Known.One.setAllBits();
704
705 // If we know the element index, just demand that vector element, else for
706 // an unknown element index, ignore DemandedElts and demand them all.
707 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
708 if (ConstEltNo && ConstEltNo->ult(NumSrcElts))
709 DemandedSrcElts =
710 APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
711
712 computeKnownBitsImpl(InVec, Known, DemandedSrcElts, Depth + 1);
713 break;
714 }
715 case TargetOpcode::G_SHUFFLE_VECTOR: {
716 APInt DemandedLHS, DemandedRHS;
717 // Collect the known bits that are shared by every vector element referenced
718 // by the shuffle.
719 unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
720 if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
721 DemandedElts, DemandedLHS, DemandedRHS))
722 break;
723
724 // Known bits are the values that are shared by every demanded element.
725 Known.Zero.setAllBits();
726 Known.One.setAllBits();
727 if (!!DemandedLHS) {
728 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedLHS,
729 Depth + 1);
730 Known = Known.intersectWith(Known2);
731 }
732 // If we don't know any bits, early out.
733 if (Known.isUnknown())
734 break;
735 if (!!DemandedRHS) {
736 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedRHS,
737 Depth + 1);
738 Known = Known.intersectWith(Known2);
739 }
740 break;
741 }
742 case TargetOpcode::G_CONCAT_VECTORS: {
743 if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector())
744 break;
745 // Split DemandedElts and test each of the demanded subvectors.
746 Known.Zero.setAllBits();
747 Known.One.setAllBits();
748 unsigned NumSubVectorElts =
749 MRI.getType(MI.getOperand(1).getReg()).getNumElements();
750
751 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
752 APInt DemandedSub =
753 DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
754 if (!!DemandedSub) {
755 computeKnownBitsImpl(MO.getReg(), Known2, DemandedSub, Depth + 1);
756
757 Known = Known.intersectWith(Known2);
758 }
759 // If we don't know any bits, early out.
760 if (Known.isUnknown())
761 break;
762 }
763 break;
764 }
765 case TargetOpcode::G_ABS: {
766 Register SrcReg = MI.getOperand(1).getReg();
767 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
768 Known = Known.abs();
769 Known.Zero.setHighBits(computeNumSignBits(SrcReg, DemandedElts, Depth + 1) -
770 1);
771 break;
772 }
773 }
774
775 LLVM_DEBUG(dumpResult(MI, Known, Depth));
776}
777
779 Ty = Ty.getScalarType();
781 return Mode.Output == DenormalMode::IEEE ||
783}
784
785void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
786 FPClassTest InterestedClasses,
787 unsigned Depth) {
788 LLT Ty = MRI.getType(R);
789 APInt DemandedElts =
790 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
791 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
792}
793
794void GISelValueTracking::computeKnownFPClassForFPTrunc(
795 const MachineInstr &MI, const APInt &DemandedElts,
796 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
797 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
798 fcNone)
799 return;
800
801 Register Val = MI.getOperand(1).getReg();
802 KnownFPClass KnownSrc;
803 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
804 Depth + 1);
805
806 // Sign should be preserved
807 // TODO: Handle cannot be ordered greater than zero
808 if (KnownSrc.cannotBeOrderedLessThanZero())
810
811 Known.propagateNaN(KnownSrc, true);
812
813 // Infinity needs a range check.
814}
815
816void GISelValueTracking::computeKnownFPClass(Register R,
817 const APInt &DemandedElts,
818 FPClassTest InterestedClasses,
819 KnownFPClass &Known,
820 unsigned Depth) {
821 assert(Known.isUnknown() && "should not be called with known information");
822
823 if (!DemandedElts) {
824 // No demanded elts, better to assume we don't know anything.
825 Known.resetAll();
826 return;
827 }
828
829 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
830
831 MachineInstr &MI = *MRI.getVRegDef(R);
832 unsigned Opcode = MI.getOpcode();
833 LLT DstTy = MRI.getType(R);
834
835 if (!DstTy.isValid()) {
836 Known.resetAll();
837 return;
838 }
839
840 if (auto Cst = GFConstant::getConstant(R, MRI)) {
841 switch (Cst->getKind()) {
843 auto APF = Cst->getScalarValue();
844 Known.KnownFPClasses = APF.classify();
845 Known.SignBit = APF.isNegative();
846 break;
847 }
849 Known.KnownFPClasses = fcNone;
850 bool SignBitAllZero = true;
851 bool SignBitAllOne = true;
852
853 for (auto C : *Cst) {
854 Known.KnownFPClasses |= C.classify();
855 if (C.isNegative())
856 SignBitAllZero = false;
857 else
858 SignBitAllOne = false;
859 }
860
861 if (SignBitAllOne != SignBitAllZero)
862 Known.SignBit = SignBitAllOne;
863
864 break;
865 }
867 Known.resetAll();
868 break;
869 }
870 }
871
872 return;
873 }
874
875 FPClassTest KnownNotFromFlags = fcNone;
877 KnownNotFromFlags |= fcNan;
879 KnownNotFromFlags |= fcInf;
880
881 // We no longer need to find out about these bits from inputs if we can
882 // assume this from flags/attributes.
883 InterestedClasses &= ~KnownNotFromFlags;
884
885 llvm::scope_exit ClearClassesFromFlags(
886 [=, &Known] { Known.knownNot(KnownNotFromFlags); });
887
888 // All recursive calls that increase depth must come after this.
890 return;
891
892 const MachineFunction *MF = MI.getMF();
893
894 switch (Opcode) {
895 default:
896 TL.computeKnownFPClassForTargetInstr(*this, R, Known, DemandedElts, MRI,
897 Depth);
898 break;
899 case TargetOpcode::G_FNEG: {
900 Register Val = MI.getOperand(1).getReg();
901 computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known, Depth + 1);
902 Known.fneg();
903 break;
904 }
905 case TargetOpcode::G_SELECT: {
906 GSelect &SelMI = cast<GSelect>(MI);
907 Register Cond = SelMI.getCondReg();
908 Register LHS = SelMI.getTrueReg();
909 Register RHS = SelMI.getFalseReg();
910
911 FPClassTest FilterLHS = fcAllFlags;
912 FPClassTest FilterRHS = fcAllFlags;
913
914 Register TestedValue;
915 FPClassTest MaskIfTrue = fcAllFlags;
916 FPClassTest MaskIfFalse = fcAllFlags;
917 FPClassTest ClassVal = fcNone;
918
920 Register CmpLHS, CmpRHS;
921 if (mi_match(Cond, MRI,
922 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) {
923 // If the select filters out a value based on the class, it no longer
924 // participates in the class of the result
925
926 // TODO: In some degenerate cases we can infer something if we try again
927 // without looking through sign operations.
928 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
929 std::tie(TestedValue, MaskIfTrue, MaskIfFalse) =
930 fcmpImpliesClass(Pred, *MF, CmpLHS, CmpRHS, LookThroughFAbsFNeg);
931 } else if (mi_match(
932 Cond, MRI,
933 m_GIsFPClass(m_Reg(TestedValue), m_FPClassTest(ClassVal)))) {
934 FPClassTest TestedMask = ClassVal;
935 MaskIfTrue = TestedMask;
936 MaskIfFalse = ~TestedMask;
937 }
938
939 if (TestedValue == LHS) {
940 // match !isnan(x) ? x : y
941 FilterLHS = MaskIfTrue;
942 } else if (TestedValue == RHS) { // && IsExactClass
943 // match !isnan(x) ? y : x
944 FilterRHS = MaskIfFalse;
945 }
946
947 KnownFPClass Known2;
948 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known,
949 Depth + 1);
950 Known.KnownFPClasses &= FilterLHS;
951
952 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS,
953 Known2, Depth + 1);
954 Known2.KnownFPClasses &= FilterRHS;
955
956 Known |= Known2;
957 break;
958 }
959 case TargetOpcode::G_FCOPYSIGN: {
960 Register Magnitude = MI.getOperand(1).getReg();
961 Register Sign = MI.getOperand(2).getReg();
962
963 KnownFPClass KnownSign;
964
965 computeKnownFPClass(Magnitude, DemandedElts, InterestedClasses, Known,
966 Depth + 1);
967 computeKnownFPClass(Sign, DemandedElts, InterestedClasses, KnownSign,
968 Depth + 1);
969 Known.copysign(KnownSign);
970 break;
971 }
972 case TargetOpcode::G_FMA:
973 case TargetOpcode::G_STRICT_FMA:
974 case TargetOpcode::G_FMAD: {
975 if ((InterestedClasses & fcNegative) == fcNone)
976 break;
977
978 Register A = MI.getOperand(1).getReg();
979 Register B = MI.getOperand(2).getReg();
980 Register C = MI.getOperand(3).getReg();
981
982 if (A != B)
983 break;
984
985 // The multiply cannot be -0 and therefore the add can't be -0
986 Known.knownNot(fcNegZero);
987
988 // x * x + y is non-negative if y is non-negative.
989 KnownFPClass KnownAddend;
990 computeKnownFPClass(C, DemandedElts, InterestedClasses, KnownAddend,
991 Depth + 1);
992
993 if (KnownAddend.cannotBeOrderedLessThanZero())
994 Known.knownNot(fcNegative);
995 break;
996 }
997 case TargetOpcode::G_FSQRT:
998 case TargetOpcode::G_STRICT_FSQRT: {
999 KnownFPClass KnownSrc;
1000 FPClassTest InterestedSrcs = InterestedClasses;
1001 if (InterestedClasses & fcNan)
1002 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1003
1004 Register Val = MI.getOperand(1).getReg();
1005
1006 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1007
1008 if (KnownSrc.isKnownNeverPosInfinity())
1009 Known.knownNot(fcPosInf);
1010 if (KnownSrc.isKnownNever(fcSNan))
1011 Known.knownNot(fcSNan);
1012
1013 // Any negative value besides -0 returns a nan.
1014 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1015 Known.knownNot(fcNan);
1016
1017 // The only negative value that can be returned is -0 for -0 inputs.
1019 break;
1020 }
1021 case TargetOpcode::G_FABS: {
1022 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
1023 Register Val = MI.getOperand(1).getReg();
1024 // If we only care about the sign bit we don't need to inspect the
1025 // operand.
1026 computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known,
1027 Depth + 1);
1028 }
1029 Known.fabs();
1030 break;
1031 }
1032 case TargetOpcode::G_FSIN:
1033 case TargetOpcode::G_FCOS:
1034 case TargetOpcode::G_FSINCOS: {
1035 // Return NaN on infinite inputs.
1036 Register Val = MI.getOperand(1).getReg();
1037 KnownFPClass KnownSrc;
1038
1039 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1040 Depth + 1);
1041 Known.knownNot(fcInf);
1042
1043 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
1044 Known.knownNot(fcNan);
1045 break;
1046 }
1047 case TargetOpcode::G_FMAXNUM:
1048 case TargetOpcode::G_FMINNUM:
1049 case TargetOpcode::G_FMINNUM_IEEE:
1050 case TargetOpcode::G_FMAXIMUM:
1051 case TargetOpcode::G_FMINIMUM:
1052 case TargetOpcode::G_FMAXNUM_IEEE:
1053 case TargetOpcode::G_FMAXIMUMNUM:
1054 case TargetOpcode::G_FMINIMUMNUM: {
1055 Register LHS = MI.getOperand(1).getReg();
1056 Register RHS = MI.getOperand(2).getReg();
1057 KnownFPClass KnownLHS, KnownRHS;
1058
1059 computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS,
1060 Depth + 1);
1061 computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS,
1062 Depth + 1);
1063
1064 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
1065 Known = KnownLHS | KnownRHS;
1066
1067 // If either operand is not NaN, the result is not NaN.
1068 if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
1069 Opcode == TargetOpcode::G_FMAXNUM ||
1070 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1071 Opcode == TargetOpcode::G_FMAXIMUMNUM))
1072 Known.knownNot(fcNan);
1073
1074 if (Opcode == TargetOpcode::G_FMAXNUM ||
1075 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1076 Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
1077 // If at least one operand is known to be positive, the result must be
1078 // positive.
1079 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
1080 KnownLHS.isKnownNeverNaN()) ||
1081 (KnownRHS.cannotBeOrderedLessThanZero() &&
1082 KnownRHS.isKnownNeverNaN()))
1084 } else if (Opcode == TargetOpcode::G_FMAXIMUM) {
1085 // If at least one operand is known to be positive, the result must be
1086 // positive.
1087 if (KnownLHS.cannotBeOrderedLessThanZero() ||
1088 KnownRHS.cannotBeOrderedLessThanZero())
1090 } else if (Opcode == TargetOpcode::G_FMINNUM ||
1091 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1092 Opcode == TargetOpcode::G_FMINNUM_IEEE) {
1093 // If at least one operand is known to be negative, the result must be
1094 // negative.
1095 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
1096 KnownLHS.isKnownNeverNaN()) ||
1097 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
1098 KnownRHS.isKnownNeverNaN()))
1100 } else if (Opcode == TargetOpcode::G_FMINIMUM) {
1101 // If at least one operand is known to be negative, the result must be
1102 // negative.
1103 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
1106 } else {
1107 llvm_unreachable("unhandled intrinsic");
1108 }
1109
1110 // Fixup zero handling if denormals could be returned as a zero.
1111 //
1112 // As there's no spec for denormal flushing, be conservative with the
1113 // treatment of denormals that could be flushed to zero. For older
1114 // subtargets on AMDGPU the min/max instructions would not flush the
1115 // output and return the original value.
1116 //
1117 if ((Known.KnownFPClasses & fcZero) != fcNone &&
1118 !Known.isKnownNeverSubnormal()) {
1119 DenormalMode Mode =
1120 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()));
1121 if (Mode != DenormalMode::getIEEE())
1122 Known.KnownFPClasses |= fcZero;
1123 }
1124
1125 if (Known.isKnownNeverNaN()) {
1126 if (KnownLHS.SignBit && KnownRHS.SignBit &&
1127 *KnownLHS.SignBit == *KnownRHS.SignBit) {
1128 if (*KnownLHS.SignBit)
1129 Known.signBitMustBeOne();
1130 else
1131 Known.signBitMustBeZero();
1132 } else if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1133 Opcode == TargetOpcode::G_FMINIMUM) ||
1134 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1135 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1136 Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
1137 Opcode == TargetOpcode::G_FMINNUM_IEEE ||
1138 // FIXME: Should be using logical zero versions
1139 ((KnownLHS.isKnownNeverNegZero() ||
1140 KnownRHS.isKnownNeverPosZero()) &&
1141 (KnownLHS.isKnownNeverPosZero() ||
1142 KnownRHS.isKnownNeverNegZero()))) {
1143 if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1144 Opcode == TargetOpcode::G_FMAXNUM ||
1145 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1146 Opcode == TargetOpcode::G_FMAXNUM_IEEE) &&
1147 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
1148 Known.signBitMustBeZero();
1149 else if ((Opcode == TargetOpcode::G_FMINIMUM ||
1150 Opcode == TargetOpcode::G_FMINNUM ||
1151 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1152 Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
1153 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
1154 Known.signBitMustBeOne();
1155 }
1156 }
1157 break;
1158 }
1159 case TargetOpcode::G_FCANONICALIZE: {
1160 Register Val = MI.getOperand(1).getReg();
1161 KnownFPClass KnownSrc;
1162 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1163 Depth + 1);
1164
1165 // This is essentially a stronger form of
1166 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
1167 // actually have an IR canonicalization guarantee.
1168
1169 // Canonicalize may flush denormals to zero, so we have to consider the
1170 // denormal mode to preserve known-not-0 knowledge.
1171 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
1172
1173 // Stronger version of propagateNaN
1174 // Canonicalize is guaranteed to quiet signaling nans.
1175 if (KnownSrc.isKnownNeverNaN())
1176 Known.knownNot(fcNan);
1177 else
1178 Known.knownNot(fcSNan);
1179
1180 // If the parent function flushes denormals, the canonical output cannot
1181 // be a denormal.
1182 LLT Ty = MRI.getType(Val).getScalarType();
1183 const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1184 DenormalMode DenormMode = MF->getDenormalMode(FPType);
1185 if (DenormMode == DenormalMode::getIEEE()) {
1186 if (KnownSrc.isKnownNever(fcPosZero))
1187 Known.knownNot(fcPosZero);
1188 if (KnownSrc.isKnownNever(fcNegZero))
1189 Known.knownNot(fcNegZero);
1190 break;
1191 }
1192
1193 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
1194 Known.knownNot(fcSubnormal);
1195
1196 if (DenormMode.Input == DenormalMode::PositiveZero ||
1197 (DenormMode.Output == DenormalMode::PositiveZero &&
1198 DenormMode.Input == DenormalMode::IEEE))
1199 Known.knownNot(fcNegZero);
1200
1201 break;
1202 }
1203 case TargetOpcode::G_VECREDUCE_FMAX:
1204 case TargetOpcode::G_VECREDUCE_FMIN:
1205 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1206 case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1207 Register Val = MI.getOperand(1).getReg();
1208 // reduce min/max will choose an element from one of the vector elements,
1209 // so we can infer and class information that is common to all elements.
1210
1211 Known =
1212 computeKnownFPClass(Val, MI.getFlags(), InterestedClasses, Depth + 1);
1213 // Can only propagate sign if output is never NaN.
1214 if (!Known.isKnownNeverNaN())
1215 Known.SignBit.reset();
1216 break;
1217 }
1218 case TargetOpcode::G_TRUNC:
1219 case TargetOpcode::G_FFLOOR:
1220 case TargetOpcode::G_FCEIL:
1221 case TargetOpcode::G_FRINT:
1222 case TargetOpcode::G_FNEARBYINT:
1223 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1224 case TargetOpcode::G_INTRINSIC_ROUND: {
1225 Register Val = MI.getOperand(1).getReg();
1226 KnownFPClass KnownSrc;
1227 FPClassTest InterestedSrcs = InterestedClasses;
1228 if (InterestedSrcs & fcPosFinite)
1229 InterestedSrcs |= fcPosFinite;
1230 if (InterestedSrcs & fcNegFinite)
1231 InterestedSrcs |= fcNegFinite;
1232 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1233
1234 // Integer results cannot be subnormal.
1235 Known.knownNot(fcSubnormal);
1236
1237 Known.propagateNaN(KnownSrc, true);
1238
1239 // TODO: handle multi unit FPTypes once LLT FPInfo lands
1240
1241 // Negative round ups to 0 produce -0
1242 if (KnownSrc.isKnownNever(fcPosFinite))
1243 Known.knownNot(fcPosFinite);
1244 if (KnownSrc.isKnownNever(fcNegFinite))
1245 Known.knownNot(fcNegFinite);
1246
1247 break;
1248 }
1249 case TargetOpcode::G_FEXP:
1250 case TargetOpcode::G_FEXP2:
1251 case TargetOpcode::G_FEXP10: {
1252 Known.knownNot(fcNegative);
1253 if ((InterestedClasses & fcNan) == fcNone)
1254 break;
1255
1256 Register Val = MI.getOperand(1).getReg();
1257 KnownFPClass KnownSrc;
1258 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1259 Depth + 1);
1260 if (KnownSrc.isKnownNeverNaN()) {
1261 Known.knownNot(fcNan);
1262 Known.signBitMustBeZero();
1263 }
1264
1265 break;
1266 }
1267 case TargetOpcode::G_FLOG:
1268 case TargetOpcode::G_FLOG2:
1269 case TargetOpcode::G_FLOG10: {
1270 // log(+inf) -> +inf
1271 // log([+-]0.0) -> -inf
1272 // log(-inf) -> nan
1273 // log(-x) -> nan
1274 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1275 break;
1276
1277 FPClassTest InterestedSrcs = InterestedClasses;
1278 if ((InterestedClasses & fcNegInf) != fcNone)
1279 InterestedSrcs |= fcZero | fcSubnormal;
1280 if ((InterestedClasses & fcNan) != fcNone)
1281 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
1282
1283 Register Val = MI.getOperand(1).getReg();
1284 KnownFPClass KnownSrc;
1285 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1286
1287 if (KnownSrc.isKnownNeverPosInfinity())
1288 Known.knownNot(fcPosInf);
1289
1290 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1291 Known.knownNot(fcNan);
1292
1293 LLT Ty = MRI.getType(Val).getScalarType();
1294 const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1295 DenormalMode Mode = MF->getDenormalMode(FltSem);
1296
1297 if (KnownSrc.isKnownNeverLogicalZero(Mode))
1298 Known.knownNot(fcNegInf);
1299
1300 break;
1301 }
1302 case TargetOpcode::G_FPOWI: {
1303 if ((InterestedClasses & fcNegative) == fcNone)
1304 break;
1305
1306 Register Exp = MI.getOperand(2).getReg();
1307 LLT ExpTy = MRI.getType(Exp);
1308 KnownBits ExponentKnownBits = getKnownBits(
1309 Exp, ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth + 1);
1310
1311 if (ExponentKnownBits.Zero[0]) { // Is even
1312 Known.knownNot(fcNegative);
1313 break;
1314 }
1315
1316 // Given that exp is an integer, here are the
1317 // ways that pow can return a negative value:
1318 //
1319 // pow(-x, exp) --> negative if exp is odd and x is negative.
1320 // pow(-0, exp) --> -inf if exp is negative odd.
1321 // pow(-0, exp) --> -0 if exp is positive odd.
1322 // pow(-inf, exp) --> -0 if exp is negative odd.
1323 // pow(-inf, exp) --> -inf if exp is positive odd.
1324 Register Val = MI.getOperand(1).getReg();
1325 KnownFPClass KnownSrc;
1326 computeKnownFPClass(Val, DemandedElts, fcNegative, KnownSrc, Depth + 1);
1327 if (KnownSrc.isKnownNever(fcNegative))
1328 Known.knownNot(fcNegative);
1329 break;
1330 }
1331 case TargetOpcode::G_FLDEXP:
1332 case TargetOpcode::G_STRICT_FLDEXP: {
1333 Register Val = MI.getOperand(1).getReg();
1334 KnownFPClass KnownSrc;
1335 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1336 Depth + 1);
1337 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
1338
1339 // Sign is preserved, but underflows may produce zeroes.
1340 if (KnownSrc.isKnownNever(fcNegative))
1341 Known.knownNot(fcNegative);
1342 else if (KnownSrc.cannotBeOrderedLessThanZero())
1344
1345 if (KnownSrc.isKnownNever(fcPositive))
1346 Known.knownNot(fcPositive);
1347 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
1349
1350 // Can refine inf/zero handling based on the exponent operand.
1351 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1352 if ((InterestedClasses & ExpInfoMask) == fcNone)
1353 break;
1354 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
1355 break;
1356
1357 // TODO: Handle constant range of Exp
1358
1359 break;
1360 }
1361 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1362 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1363 Depth);
1364 break;
1365 }
1366 case TargetOpcode::G_FADD:
1367 case TargetOpcode::G_STRICT_FADD:
1368 case TargetOpcode::G_FSUB:
1369 case TargetOpcode::G_STRICT_FSUB: {
1370 Register LHS = MI.getOperand(1).getReg();
1371 Register RHS = MI.getOperand(2).getReg();
1372 KnownFPClass KnownLHS, KnownRHS;
1373 bool WantNegative =
1374 (Opcode == TargetOpcode::G_FADD ||
1375 Opcode == TargetOpcode::G_STRICT_FADD) &&
1376 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1377 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1378 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1379
1380 if (!WantNaN && !WantNegative && !WantNegZero)
1381 break;
1382
1383 FPClassTest InterestedSrcs = InterestedClasses;
1384 if (WantNegative)
1385 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1386 if (InterestedClasses & fcNan)
1387 InterestedSrcs |= fcInf;
1388 computeKnownFPClass(RHS, DemandedElts, InterestedSrcs, KnownRHS, Depth + 1);
1389
1390 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1391 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1392 WantNegZero ||
1393 (Opcode == TargetOpcode::G_FSUB ||
1394 Opcode == TargetOpcode::G_STRICT_FSUB)) {
1395
1396 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1397 // there's no point.
1398 computeKnownFPClass(LHS, DemandedElts, InterestedSrcs, KnownLHS,
1399 Depth + 1);
1400 // Adding positive and negative infinity produces NaN.
1401 // TODO: Check sign of infinities.
1402 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1403 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
1404 Known.knownNot(fcNan);
1405
1406 if (Opcode == TargetOpcode::G_FADD ||
1407 Opcode == TargetOpcode::G_STRICT_FADD) {
1408 if (KnownLHS.cannotBeOrderedLessThanZero() &&
1409 KnownRHS.cannotBeOrderedLessThanZero())
1411
1412 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
1413 if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1415 KnownRHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1416 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1417 // Make sure output negative denormal can't flush to -0
1419 Known.knownNot(fcNegZero);
1420 } else {
1421 // Only fsub -0, +0 can return -0
1422 if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1424 KnownRHS.isKnownNeverLogicalPosZero(MF->getDenormalMode(
1425 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1426 // Make sure output negative denormal can't flush to -0
1428 Known.knownNot(fcNegZero);
1429 }
1430 }
1431
1432 break;
1433 }
1434 case TargetOpcode::G_FMUL:
1435 case TargetOpcode::G_STRICT_FMUL: {
1436 Register LHS = MI.getOperand(1).getReg();
1437 Register RHS = MI.getOperand(2).getReg();
1438 // X * X is always non-negative or a NaN.
1439 if (LHS == RHS)
1440 Known.knownNot(fcNegative);
1441
1442 if ((InterestedClasses & fcNan) != fcNan)
1443 break;
1444
1445 // fcSubnormal is only needed in case of DAZ.
1446 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
1447
1448 KnownFPClass KnownLHS, KnownRHS;
1449 computeKnownFPClass(RHS, DemandedElts, NeedForNan, KnownRHS, Depth + 1);
1450 if (!KnownRHS.isKnownNeverNaN())
1451 break;
1452
1453 computeKnownFPClass(LHS, DemandedElts, NeedForNan, KnownLHS, Depth + 1);
1454 if (!KnownLHS.isKnownNeverNaN())
1455 break;
1456
1457 if (KnownLHS.SignBit && KnownRHS.SignBit) {
1458 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
1459 Known.signBitMustBeZero();
1460 else
1461 Known.signBitMustBeOne();
1462 }
1463
1464 // If 0 * +/-inf produces NaN.
1465 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
1466 Known.knownNot(fcNan);
1467 break;
1468 }
1469
1470 if ((KnownRHS.isKnownNeverInfinity() ||
1471 KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1472 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1473 (KnownLHS.isKnownNeverInfinity() ||
1474 KnownRHS.isKnownNeverLogicalZero(
1475 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))
1476 Known.knownNot(fcNan);
1477
1478 break;
1479 }
1480 case TargetOpcode::G_FDIV:
1481 case TargetOpcode::G_FREM: {
1482 Register LHS = MI.getOperand(1).getReg();
1483 Register RHS = MI.getOperand(2).getReg();
1484
1485 if (LHS == RHS) {
1486 // TODO: Could filter out snan if we inspect the operand
1487 if (Opcode == TargetOpcode::G_FDIV) {
1488 // X / X is always exactly 1.0 or a NaN.
1490 } else {
1491 // X % X is always exactly [+-]0.0 or a NaN.
1492 Known.KnownFPClasses = fcNan | fcZero;
1493 }
1494
1495 break;
1496 }
1497
1498 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1499 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1500 const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1501 (InterestedClasses & fcPositive) != fcNone;
1502 if (!WantNan && !WantNegative && !WantPositive)
1503 break;
1504
1505 KnownFPClass KnownLHS, KnownRHS;
1506
1507 computeKnownFPClass(RHS, DemandedElts, fcNan | fcInf | fcZero | fcNegative,
1508 KnownRHS, Depth + 1);
1509
1510 bool KnowSomethingUseful =
1511 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative);
1512
1513 if (KnowSomethingUseful || WantPositive) {
1514 const FPClassTest InterestedLHS =
1515 WantPositive ? fcAllFlags
1517
1518 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & InterestedLHS,
1519 KnownLHS, Depth + 1);
1520 }
1521
1522 if (Opcode == TargetOpcode::G_FDIV) {
1523 // Only 0/0, Inf/Inf produce NaN.
1524 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1525 (KnownLHS.isKnownNeverInfinity() ||
1526 KnownRHS.isKnownNeverInfinity()) &&
1527 ((KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1528 getFltSemanticForLLT(DstTy.getScalarType())))) ||
1529 (KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1530 getFltSemanticForLLT(DstTy.getScalarType())))))) {
1531 Known.knownNot(fcNan);
1532 }
1533
1534 // X / -0.0 is -Inf (or NaN).
1535 // +X / +X is +X
1536 if (KnownLHS.isKnownNever(fcNegative) &&
1537 KnownRHS.isKnownNever(fcNegative))
1538 Known.knownNot(fcNegative);
1539 } else {
1540 // Inf REM x and x REM 0 produce NaN.
1541 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1542 KnownLHS.isKnownNeverInfinity() &&
1543 KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1545 Known.knownNot(fcNan);
1546 }
1547
1548 // The sign for frem is the same as the first operand.
1549 if (KnownLHS.cannotBeOrderedLessThanZero())
1551 if (KnownLHS.cannotBeOrderedGreaterThanZero())
1553
1554 // See if we can be more aggressive about the sign of 0.
1555 if (KnownLHS.isKnownNever(fcNegative))
1556 Known.knownNot(fcNegative);
1557 if (KnownLHS.isKnownNever(fcPositive))
1558 Known.knownNot(fcPositive);
1559 }
1560
1561 break;
1562 }
1563 case TargetOpcode::G_FPEXT: {
1564 Register Dst = MI.getOperand(0).getReg();
1565 Register Src = MI.getOperand(1).getReg();
1566 // Infinity, nan and zero propagate from source.
1567 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
1568
1569 LLT DstTy = MRI.getType(Dst).getScalarType();
1570 const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
1571 LLT SrcTy = MRI.getType(Src).getScalarType();
1572 const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy);
1573
1574 // All subnormal inputs should be in the normal range in the result type.
1575 if (APFloat::isRepresentableAsNormalIn(SrcSem, DstSem)) {
1576 if (Known.KnownFPClasses & fcPosSubnormal)
1577 Known.KnownFPClasses |= fcPosNormal;
1578 if (Known.KnownFPClasses & fcNegSubnormal)
1579 Known.KnownFPClasses |= fcNegNormal;
1580 Known.knownNot(fcSubnormal);
1581 }
1582
1583 // Sign bit of a nan isn't guaranteed.
1584 if (!Known.isKnownNeverNaN())
1585 Known.SignBit = std::nullopt;
1586 break;
1587 }
1588 case TargetOpcode::G_FPTRUNC: {
1589 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1590 Depth);
1591 break;
1592 }
1593 case TargetOpcode::G_SITOFP:
1594 case TargetOpcode::G_UITOFP: {
1595 // Cannot produce nan
1596 Known.knownNot(fcNan);
1597
1598 // Integers cannot be subnormal
1599 Known.knownNot(fcSubnormal);
1600
1601 // sitofp and uitofp turn into +0.0 for zero.
1602 Known.knownNot(fcNegZero);
1603 if (Opcode == TargetOpcode::G_UITOFP)
1604 Known.signBitMustBeZero();
1605
1606 Register Val = MI.getOperand(1).getReg();
1607 LLT Ty = MRI.getType(Val);
1608
1609 if (InterestedClasses & fcInf) {
1610 // Get width of largest magnitude integer (remove a bit if signed).
1611 // This still works for a signed minimum value because the largest FP
1612 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).;
1613 int IntSize = Ty.getScalarSizeInBits();
1614 if (Opcode == TargetOpcode::G_SITOFP)
1615 --IntSize;
1616
1617 // If the exponent of the largest finite FP value can hold the largest
1618 // integer, the result of the cast must be finite.
1619 LLT FPTy = DstTy.getScalarType();
1620 const fltSemantics &FltSem = getFltSemanticForLLT(FPTy);
1621 if (ilogb(APFloat::getLargest(FltSem)) >= IntSize)
1622 Known.knownNot(fcInf);
1623 }
1624
1625 break;
1626 }
1627 // case TargetOpcode::G_MERGE_VALUES:
1628 case TargetOpcode::G_BUILD_VECTOR:
1629 case TargetOpcode::G_CONCAT_VECTORS: {
1630 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(MI);
1631
1632 if (!DstTy.isFixedVector())
1633 break;
1634
1635 bool First = true;
1636 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1637 // We know the index we are inserting to, so clear it from Vec check.
1638 bool NeedsElt = DemandedElts[Idx];
1639
1640 // Do we demand the inserted element?
1641 if (NeedsElt) {
1642 Register Src = Merge.getSourceReg(Idx);
1643 if (First) {
1644 computeKnownFPClass(Src, Known, InterestedClasses, Depth + 1);
1645 First = false;
1646 } else {
1647 KnownFPClass Known2;
1648 computeKnownFPClass(Src, Known2, InterestedClasses, Depth + 1);
1649 Known |= Known2;
1650 }
1651
1652 // If we don't know any bits, early out.
1653 if (Known.isUnknown())
1654 break;
1655 }
1656 }
1657
1658 break;
1659 }
1660 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1661 // Look through extract element. If the index is non-constant or
1662 // out-of-range demand all elements, otherwise just the extracted
1663 // element.
1664 GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI);
1665 Register Vec = Extract.getVectorReg();
1666 Register Idx = Extract.getIndexReg();
1667
1668 auto CIdx = getIConstantVRegVal(Idx, MRI);
1669
1670 LLT VecTy = MRI.getType(Vec);
1671
1672 if (VecTy.isFixedVector()) {
1673 unsigned NumElts = VecTy.getNumElements();
1674 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1675 if (CIdx && CIdx->ult(NumElts))
1676 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1677 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known,
1678 Depth + 1);
1679 }
1680
1681 break;
1682 }
1683 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1684 GInsertVectorElement &Insert = cast<GInsertVectorElement>(MI);
1685 Register Vec = Insert.getVectorReg();
1686 Register Elt = Insert.getElementReg();
1687 Register Idx = Insert.getIndexReg();
1688
1689 LLT VecTy = MRI.getType(Vec);
1690
1691 if (VecTy.isScalableVector())
1692 return;
1693
1694 auto CIdx = getIConstantVRegVal(Idx, MRI);
1695
1696 unsigned NumElts = DemandedElts.getBitWidth();
1697 APInt DemandedVecElts = DemandedElts;
1698 bool NeedsElt = true;
1699 // If we know the index we are inserting to, clear it from Vec check.
1700 if (CIdx && CIdx->ult(NumElts)) {
1701 DemandedVecElts.clearBit(CIdx->getZExtValue());
1702 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1703 }
1704
1705 // Do we demand the inserted element?
1706 if (NeedsElt) {
1707 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1);
1708 // If we don't know any bits, early out.
1709 if (Known.isUnknown())
1710 break;
1711 } else {
1712 Known.KnownFPClasses = fcNone;
1713 }
1714
1715 // Do we need anymore elements from Vec?
1716 if (!DemandedVecElts.isZero()) {
1717 KnownFPClass Known2;
1718 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
1719 Depth + 1);
1720 Known |= Known2;
1721 }
1722
1723 break;
1724 }
1725 case TargetOpcode::G_SHUFFLE_VECTOR: {
1726 // For undef elements, we don't know anything about the common state of
1727 // the shuffle result.
1728 GShuffleVector &Shuf = cast<GShuffleVector>(MI);
1729 APInt DemandedLHS, DemandedRHS;
1730 if (DstTy.isScalableVector()) {
1731 assert(DemandedElts == APInt(1, 1));
1732 DemandedLHS = DemandedRHS = DemandedElts;
1733 } else {
1735 DemandedElts, DemandedLHS,
1736 DemandedRHS)) {
1737 Known.resetAll();
1738 return;
1739 }
1740 }
1741
1742 if (!!DemandedLHS) {
1743 Register LHS = Shuf.getSrc1Reg();
1744 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known,
1745 Depth + 1);
1746
1747 // If we don't know any bits, early out.
1748 if (Known.isUnknown())
1749 break;
1750 } else {
1751 Known.KnownFPClasses = fcNone;
1752 }
1753
1754 if (!!DemandedRHS) {
1755 KnownFPClass Known2;
1756 Register RHS = Shuf.getSrc2Reg();
1757 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2,
1758 Depth + 1);
1759 Known |= Known2;
1760 }
1761 break;
1762 }
1763 case TargetOpcode::COPY: {
1764 Register Src = MI.getOperand(1).getReg();
1765
1766 if (!Src.isVirtual())
1767 return;
1768
1769 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1);
1770 break;
1771 }
1772 }
1773}
1774
1776GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1777 FPClassTest InterestedClasses,
1778 unsigned Depth) {
1779 KnownFPClass KnownClasses;
1780 computeKnownFPClass(R, DemandedElts, InterestedClasses, KnownClasses, Depth);
1781 return KnownClasses;
1782}
1783
1784KnownFPClass GISelValueTracking::computeKnownFPClass(
1785 Register R, FPClassTest InterestedClasses, unsigned Depth) {
1786 KnownFPClass Known;
1787 computeKnownFPClass(R, Known, InterestedClasses, Depth);
1788 return Known;
1789}
1790
1791KnownFPClass GISelValueTracking::computeKnownFPClass(
1792 Register R, const APInt &DemandedElts, uint32_t Flags,
1793 FPClassTest InterestedClasses, unsigned Depth) {
1795 InterestedClasses &= ~fcNan;
1797 InterestedClasses &= ~fcInf;
1798
1799 KnownFPClass Result =
1800 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1801
1803 Result.KnownFPClasses &= ~fcNan;
1805 Result.KnownFPClasses &= ~fcInf;
1806 return Result;
1807}
1808
1809KnownFPClass GISelValueTracking::computeKnownFPClass(
1810 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1811 LLT Ty = MRI.getType(R);
1812 APInt DemandedElts =
1813 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
1814 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1815}
1816
1817/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
1818unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
1819 const APInt &DemandedElts,
1820 unsigned Depth) {
1821 // Test src1 first, since we canonicalize simpler expressions to the RHS.
1822 unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
1823 if (Src1SignBits == 1)
1824 return 1;
1825 return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
1826}
1827
1828/// Compute the known number of sign bits with attached range metadata in the
1829/// memory operand. If this is an extending load, accounts for the behavior of
1830/// the high bits.
1832 unsigned TyBits) {
1833 const MDNode *Ranges = Ld->getRanges();
1834 if (!Ranges)
1835 return 1;
1836
1838 if (TyBits > CR.getBitWidth()) {
1839 switch (Ld->getOpcode()) {
1840 case TargetOpcode::G_SEXTLOAD:
1841 CR = CR.signExtend(TyBits);
1842 break;
1843 case TargetOpcode::G_ZEXTLOAD:
1844 CR = CR.zeroExtend(TyBits);
1845 break;
1846 default:
1847 break;
1848 }
1849 }
1850
1851 return std::min(CR.getSignedMin().getNumSignBits(),
1853}
1854
1856 const APInt &DemandedElts,
1857 unsigned Depth) {
1858 MachineInstr &MI = *MRI.getVRegDef(R);
1859 unsigned Opcode = MI.getOpcode();
1860
1861 if (Opcode == TargetOpcode::G_CONSTANT)
1862 return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
1863
1864 if (Depth == getMaxDepth())
1865 return 1;
1866
1867 if (!DemandedElts)
1868 return 1; // No demanded elts, better to assume we don't know anything.
1869
1870 LLT DstTy = MRI.getType(R);
1871 const unsigned TyBits = DstTy.getScalarSizeInBits();
1872
1873 // Handle the case where this is called on a register that does not have a
1874 // type constraint. This is unlikely to occur except by looking through copies
1875 // but it is possible for the initial register being queried to be in this
1876 // state.
1877 if (!DstTy.isValid())
1878 return 1;
1879
1880 unsigned FirstAnswer = 1;
1881 switch (Opcode) {
1882 case TargetOpcode::COPY: {
1883 MachineOperand &Src = MI.getOperand(1);
1884 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
1885 MRI.getType(Src.getReg()).isValid()) {
1886 // Don't increment Depth for this one since we didn't do any work.
1887 return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
1888 }
1889
1890 return 1;
1891 }
1892 case TargetOpcode::G_SEXT: {
1893 Register Src = MI.getOperand(1).getReg();
1894 LLT SrcTy = MRI.getType(Src);
1895 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
1896 return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
1897 }
1898 case TargetOpcode::G_ASSERT_SEXT:
1899 case TargetOpcode::G_SEXT_INREG: {
1900 // Max of the input and what this extends.
1901 Register Src = MI.getOperand(1).getReg();
1902 unsigned SrcBits = MI.getOperand(2).getImm();
1903 unsigned InRegBits = TyBits - SrcBits + 1;
1904 return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1),
1905 InRegBits);
1906 }
1907 case TargetOpcode::G_LOAD: {
1908 GLoad *Ld = cast<GLoad>(&MI);
1909 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
1910 break;
1911
1912 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1913 }
1914 case TargetOpcode::G_SEXTLOAD: {
1916
1917 // FIXME: We need an in-memory type representation.
1918 if (DstTy.isVector())
1919 return 1;
1920
1921 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1922 if (NumBits != 1)
1923 return NumBits;
1924
1925 // e.g. i16->i32 = '17' bits known.
1926 const MachineMemOperand *MMO = *MI.memoperands_begin();
1927 return TyBits - MMO->getSizeInBits().getValue() + 1;
1928 }
1929 case TargetOpcode::G_ZEXTLOAD: {
1931
1932 // FIXME: We need an in-memory type representation.
1933 if (DstTy.isVector())
1934 return 1;
1935
1936 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1937 if (NumBits != 1)
1938 return NumBits;
1939
1940 // e.g. i16->i32 = '16' bits known.
1941 const MachineMemOperand *MMO = *MI.memoperands_begin();
1942 return TyBits - MMO->getSizeInBits().getValue();
1943 }
1944 case TargetOpcode::G_AND:
1945 case TargetOpcode::G_OR:
1946 case TargetOpcode::G_XOR: {
1947 Register Src1 = MI.getOperand(1).getReg();
1948 unsigned Src1NumSignBits =
1949 computeNumSignBits(Src1, DemandedElts, Depth + 1);
1950 if (Src1NumSignBits != 1) {
1951 Register Src2 = MI.getOperand(2).getReg();
1952 unsigned Src2NumSignBits =
1953 computeNumSignBits(Src2, DemandedElts, Depth + 1);
1954 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
1955 }
1956 break;
1957 }
1958 case TargetOpcode::G_ASHR: {
1959 Register Src1 = MI.getOperand(1).getReg();
1960 Register Src2 = MI.getOperand(2).getReg();
1961 FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1);
1962 if (auto C = getValidMinimumShiftAmount(Src2, DemandedElts, Depth + 1))
1963 FirstAnswer = std::min<uint64_t>(FirstAnswer + *C, TyBits);
1964 break;
1965 }
1966 case TargetOpcode::G_SHL: {
1967 Register Src1 = MI.getOperand(1).getReg();
1968 Register Src2 = MI.getOperand(2).getReg();
1969 if (std::optional<ConstantRange> ShAmtRange =
1970 getValidShiftAmountRange(Src2, DemandedElts, Depth + 1)) {
1971 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
1972 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
1973
1974 MachineInstr &ExtMI = *MRI.getVRegDef(Src1);
1975 unsigned ExtOpc = ExtMI.getOpcode();
1976
1977 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
1978 // shifted out, then we can compute the number of sign bits for the
1979 // operand being extended. A future improvement could be to pass along the
1980 // "shifted left by" information in the recursive calls to
1981 // ComputeKnownSignBits. Allowing us to handle this more generically.
1982 if (ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ZEXT ||
1983 ExtOpc == TargetOpcode::G_ANYEXT) {
1984 LLT ExtTy = MRI.getType(Src1);
1985 Register Extendee = ExtMI.getOperand(1).getReg();
1986 LLT ExtendeeTy = MRI.getType(Extendee);
1987 uint64_t SizeDiff =
1988 ExtTy.getScalarSizeInBits() - ExtendeeTy.getScalarSizeInBits();
1989
1990 if (SizeDiff <= MinShAmt) {
1991 unsigned Tmp =
1992 SizeDiff + computeNumSignBits(Extendee, DemandedElts, Depth + 1);
1993 if (MaxShAmt < Tmp)
1994 return Tmp - MaxShAmt;
1995 }
1996 }
1997 // shl destroys sign bits, ensure it doesn't shift out all sign bits.
1998 unsigned Tmp = computeNumSignBits(Src1, DemandedElts, Depth + 1);
1999 if (MaxShAmt < Tmp)
2000 return Tmp - MaxShAmt;
2001 }
2002 break;
2003 }
2004 case TargetOpcode::G_TRUNC: {
2005 Register Src = MI.getOperand(1).getReg();
2006 LLT SrcTy = MRI.getType(Src);
2007
2008 // Check if the sign bits of source go down as far as the truncated value.
2009 unsigned DstTyBits = DstTy.getScalarSizeInBits();
2010 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
2011 unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
2012 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
2013 return NumSrcSignBits - (NumSrcBits - DstTyBits);
2014 break;
2015 }
2016 case TargetOpcode::G_SELECT: {
2017 return computeNumSignBitsMin(MI.getOperand(2).getReg(),
2018 MI.getOperand(3).getReg(), DemandedElts,
2019 Depth + 1);
2020 }
2021 case TargetOpcode::G_SMIN:
2022 case TargetOpcode::G_SMAX:
2023 case TargetOpcode::G_UMIN:
2024 case TargetOpcode::G_UMAX:
2025 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
2026 return computeNumSignBitsMin(MI.getOperand(1).getReg(),
2027 MI.getOperand(2).getReg(), DemandedElts,
2028 Depth + 1);
2029 case TargetOpcode::G_SADDO:
2030 case TargetOpcode::G_SADDE:
2031 case TargetOpcode::G_UADDO:
2032 case TargetOpcode::G_UADDE:
2033 case TargetOpcode::G_SSUBO:
2034 case TargetOpcode::G_SSUBE:
2035 case TargetOpcode::G_USUBO:
2036 case TargetOpcode::G_USUBE:
2037 case TargetOpcode::G_SMULO:
2038 case TargetOpcode::G_UMULO: {
2039 // If compares returns 0/-1, all bits are sign bits.
2040 // We know that we have an integer-based boolean since these operations
2041 // are only available for integer.
2042 if (MI.getOperand(1).getReg() == R) {
2043 if (TL.getBooleanContents(DstTy.isVector(), false) ==
2045 return TyBits;
2046 }
2047
2048 break;
2049 }
2050 case TargetOpcode::G_SUB: {
2051 Register Src2 = MI.getOperand(2).getReg();
2052 unsigned Src2NumSignBits =
2053 computeNumSignBits(Src2, DemandedElts, Depth + 1);
2054 if (Src2NumSignBits == 1)
2055 return 1; // Early out.
2056
2057 // Handle NEG.
2058 Register Src1 = MI.getOperand(1).getReg();
2059 KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
2060 if (Known1.isZero()) {
2061 KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
2062 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2063 // sign bits set.
2064 if ((Known2.Zero | 1).isAllOnes())
2065 return TyBits;
2066
2067 // If the input is known to be positive (the sign bit is known clear),
2068 // the output of the NEG has, at worst, the same number of sign bits as
2069 // the input.
2070 if (Known2.isNonNegative()) {
2071 FirstAnswer = Src2NumSignBits;
2072 break;
2073 }
2074
2075 // Otherwise, we treat this like a SUB.
2076 }
2077
2078 unsigned Src1NumSignBits =
2079 computeNumSignBits(Src1, DemandedElts, Depth + 1);
2080 if (Src1NumSignBits == 1)
2081 return 1; // Early Out.
2082
2083 // Sub can have at most one carry bit. Thus we know that the output
2084 // is, at worst, one more bit than the inputs.
2085 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
2086 break;
2087 }
2088 case TargetOpcode::G_ADD: {
2089 Register Src2 = MI.getOperand(2).getReg();
2090 unsigned Src2NumSignBits =
2091 computeNumSignBits(Src2, DemandedElts, Depth + 1);
2092 if (Src2NumSignBits <= 2)
2093 return 1; // Early out.
2094
2095 Register Src1 = MI.getOperand(1).getReg();
2096 unsigned Src1NumSignBits =
2097 computeNumSignBits(Src1, DemandedElts, Depth + 1);
2098 if (Src1NumSignBits == 1)
2099 return 1; // Early Out.
2100
2101 // Special case decrementing a value (ADD X, -1):
2102 KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
2103 if (Known2.isAllOnes()) {
2104 KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
2105 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2106 // sign bits set.
2107 if ((Known1.Zero | 1).isAllOnes())
2108 return TyBits;
2109
2110 // If we are subtracting one from a positive number, there is no carry
2111 // out of the result.
2112 if (Known1.isNonNegative()) {
2113 FirstAnswer = Src1NumSignBits;
2114 break;
2115 }
2116
2117 // Otherwise, we treat this like an ADD.
2118 }
2119
2120 // Add can have at most one carry bit. Thus we know that the output
2121 // is, at worst, one more bit than the inputs.
2122 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
2123 break;
2124 }
2125 case TargetOpcode::G_FCMP:
2126 case TargetOpcode::G_ICMP: {
2127 bool IsFP = Opcode == TargetOpcode::G_FCMP;
2128 if (TyBits == 1)
2129 break;
2130 auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
2132 return TyBits; // All bits are sign bits.
2134 return TyBits - 1; // Every always-zero bit is a sign bit.
2135 break;
2136 }
2137 case TargetOpcode::G_BUILD_VECTOR: {
2138 // Collect the known bits that are shared by every demanded vector element.
2139 FirstAnswer = TyBits;
2140 APInt SingleDemandedElt(1, 1);
2141 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
2142 if (!DemandedElts[I])
2143 continue;
2144
2145 unsigned Tmp2 =
2146 computeNumSignBits(MO.getReg(), SingleDemandedElt, Depth + 1);
2147 FirstAnswer = std::min(FirstAnswer, Tmp2);
2148
2149 // If we don't know any bits, early out.
2150 if (FirstAnswer == 1)
2151 break;
2152 }
2153 break;
2154 }
2155 case TargetOpcode::G_CONCAT_VECTORS: {
2156 if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector())
2157 break;
2158 FirstAnswer = TyBits;
2159 // Determine the minimum number of sign bits across all demanded
2160 // elts of the input vectors. Early out if the result is already 1.
2161 unsigned NumSubVectorElts =
2162 MRI.getType(MI.getOperand(1).getReg()).getNumElements();
2163 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
2164 APInt DemandedSub =
2165 DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
2166 if (!DemandedSub)
2167 continue;
2168 unsigned Tmp2 = computeNumSignBits(MO.getReg(), DemandedSub, Depth + 1);
2169
2170 FirstAnswer = std::min(FirstAnswer, Tmp2);
2171
2172 // If we don't know any bits, early out.
2173 if (FirstAnswer == 1)
2174 break;
2175 }
2176 break;
2177 }
2178 case TargetOpcode::G_SHUFFLE_VECTOR: {
2179 // Collect the minimum number of sign bits that are shared by every vector
2180 // element referenced by the shuffle.
2181 APInt DemandedLHS, DemandedRHS;
2182 Register Src1 = MI.getOperand(1).getReg();
2183 unsigned NumElts = MRI.getType(Src1).getNumElements();
2184 if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
2185 DemandedElts, DemandedLHS, DemandedRHS))
2186 return 1;
2187
2188 if (!!DemandedLHS)
2189 FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1);
2190 // If we don't know anything, early out and try computeKnownBits fall-back.
2191 if (FirstAnswer == 1)
2192 break;
2193 if (!!DemandedRHS) {
2194 unsigned Tmp2 =
2195 computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
2196 FirstAnswer = std::min(FirstAnswer, Tmp2);
2197 }
2198 break;
2199 }
2200 case TargetOpcode::G_SPLAT_VECTOR: {
2201 // Check if the sign bits of source go down as far as the truncated value.
2202 Register Src = MI.getOperand(1).getReg();
2203 unsigned NumSrcSignBits = computeNumSignBits(Src, APInt(1, 1), Depth + 1);
2204 unsigned NumSrcBits = MRI.getType(Src).getSizeInBits();
2205 if (NumSrcSignBits > (NumSrcBits - TyBits))
2206 return NumSrcSignBits - (NumSrcBits - TyBits);
2207 break;
2208 }
2209 case TargetOpcode::G_INTRINSIC:
2210 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2211 case TargetOpcode::G_INTRINSIC_CONVERGENT:
2212 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2213 default: {
2214 unsigned NumBits =
2215 TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
2216 if (NumBits > 1)
2217 FirstAnswer = std::max(FirstAnswer, NumBits);
2218 break;
2219 }
2220 }
2221
2222 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2223 // use this information.
2224 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2225 APInt Mask;
2226 if (Known.isNonNegative()) { // sign bit is 0
2227 Mask = Known.Zero;
2228 } else if (Known.isNegative()) { // sign bit is 1;
2229 Mask = Known.One;
2230 } else {
2231 // Nothing known.
2232 return FirstAnswer;
2233 }
2234
2235 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
2236 // the number of identical bits in the top of the input value.
2237 Mask <<= Mask.getBitWidth() - TyBits;
2238 return std::max(FirstAnswer, Mask.countl_one());
2239}
2240
2242 LLT Ty = MRI.getType(R);
2243 APInt DemandedElts =
2244 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
2245 return computeNumSignBits(R, DemandedElts, Depth);
2246}
2247
2249 Register R, const APInt &DemandedElts, unsigned Depth) {
2250 // Shifting more than the bitwidth is not valid.
2251 MachineInstr &MI = *MRI.getVRegDef(R);
2252 unsigned Opcode = MI.getOpcode();
2253
2254 LLT Ty = MRI.getType(R);
2255 unsigned BitWidth = Ty.getScalarSizeInBits();
2256
2257 if (Opcode == TargetOpcode::G_CONSTANT) {
2258 const APInt &ShAmt = MI.getOperand(1).getCImm()->getValue();
2259 if (ShAmt.uge(BitWidth))
2260 return std::nullopt;
2261 return ConstantRange(ShAmt);
2262 }
2263
2264 if (Opcode == TargetOpcode::G_BUILD_VECTOR) {
2265 const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
2266 for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
2267 if (!DemandedElts[I])
2268 continue;
2269 MachineInstr *Op = MRI.getVRegDef(MI.getOperand(I + 1).getReg());
2270 if (Op->getOpcode() != TargetOpcode::G_CONSTANT) {
2271 MinAmt = MaxAmt = nullptr;
2272 break;
2273 }
2274
2275 const APInt &ShAmt = Op->getOperand(1).getCImm()->getValue();
2276 if (ShAmt.uge(BitWidth))
2277 return std::nullopt;
2278 if (!MinAmt || MinAmt->ugt(ShAmt))
2279 MinAmt = &ShAmt;
2280 if (!MaxAmt || MaxAmt->ult(ShAmt))
2281 MaxAmt = &ShAmt;
2282 }
2283 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
2284 "Failed to find matching min/max shift amounts");
2285 if (MinAmt && MaxAmt)
2286 return ConstantRange(*MinAmt, *MaxAmt + 1);
2287 }
2288
2289 // Use computeKnownBits to find a hidden constant/knownbits (usually type
2290 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
2291 KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
2292 if (KnownAmt.getMaxValue().ult(BitWidth))
2293 return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false);
2294
2295 return std::nullopt;
2296}
2297
2299 Register R, const APInt &DemandedElts, unsigned Depth) {
2300 if (std::optional<ConstantRange> AmtRange =
2301 getValidShiftAmountRange(R, DemandedElts, Depth))
2302 return AmtRange->getUnsignedMin().getZExtValue();
2303 return std::nullopt;
2304}
2305
2311
2316
2318 if (!Info) {
2319 unsigned MaxDepth =
2321 Info = std::make_unique<GISelValueTracking>(MF, MaxDepth);
2322 }
2323 return *Info;
2324}
2325
2326AnalysisKey GISelValueTrackingAnalysis::Key;
2327
2333
2337 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(MF);
2338 const auto &MRI = MF.getRegInfo();
2339 OS << "name: ";
2340 MF.getFunction().printAsOperand(OS, /*PrintType=*/false);
2341 OS << '\n';
2342
2343 for (MachineBasicBlock &BB : MF) {
2344 for (MachineInstr &MI : BB) {
2345 for (MachineOperand &MO : MI.defs()) {
2346 if (!MO.isReg() || MO.getReg().isPhysical())
2347 continue;
2348 Register Reg = MO.getReg();
2349 if (!MRI.getType(Reg).isValid())
2350 continue;
2351 KnownBits Known = VTA.getKnownBits(Reg);
2352 unsigned SignedBits = VTA.computeNumSignBits(Reg);
2353 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2354 << '\n';
2355 };
2356 }
2357 }
2358 return PreservedAnalyses::all();
2359}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
static void dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth)
static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, unsigned TyBits)
Compute the known number of sign bits with attached range metadata in the memory operand.
static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty)
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
Value * RHS
Value * LHS
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:340
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
unsigned getNumSignBits() const
Computes the number of leading bits of this APInt that are equal to its sign bit.
Definition APInt.h:1629
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1436
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
void setAllBits()
Set every bit to 1.
Definition APInt.h:1320
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
This class represents a range of values.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
LLVM_ABI ConstantRange zeroExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
LLVM_ABI APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
LLVM_ABI ConstantRange signExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
LLVM_ABI APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
Represents any generic load, including sign/zero extending variants.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Represents an extract vector element.
static LLVM_ABI std::optional< GFConstant > getConstant(Register Const, const MachineRegisterInfo &MRI)
Definition Utils.cpp:2106
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
GISelValueTracking & get(MachineFunction &MF)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
KnownBits getKnownBits(Register R)
Align computeKnownAlignment(Register R, unsigned Depth=0)
std::optional< ConstantRange > getValidShiftAmountRange(Register R, const APInt &DemandedElts, unsigned Depth)
If a G_SHL/G_ASHR/G_LSHR node with shift operand R has shift amounts that are all less than the eleme...
bool maskedValueIsZero(Register Val, const APInt &Mask)
std::optional< uint64_t > getValidMinimumShiftAmount(Register R, const APInt &DemandedElts, unsigned Depth=0)
If a G_SHL/G_ASHR/G_LSHR node with shift operand R has shift amounts that are all less than the eleme...
const DataLayout & getDataLayout() const
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(MachineInstr &MI)
void computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth=0)
Represents a G_LOAD.
Represents a G_SEXTLOAD.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
ArrayRef< int > getMask() const
Represents a G_ZEXTLOAD.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
TypeSize getValue() const
Metadata node.
Definition Metadata.h:1078
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
const MDNode * getRanges() const
Return the range tag for the memory reference.
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
operand_type_match m_Reg()
operand_type_match m_Pred()
bind_ty< FPClassTest > m_FPClassTest(FPClassTest &T)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
ClassifyOp_match< LHS, Test, TargetOpcode::G_IS_FPCLASS > m_GIsFPClass(const LHS &L, const Test &T)
Matches the register and immediate used in a fpclass test G_IS_FPCLASS val, 96.
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:295
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
scope_exit(Callable) -> scope_exit< Callable >
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1516
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
std::tuple< Value *, FPClassTest, FPClassTest > fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, FPClassTest RHSClass, bool LookThroughSrc=true)
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
constexpr unsigned MaxAnalysisRecursionDepth
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
DWARFExpression::Operation Op
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static uint32_t extractBits(uint64_t Val, uint32_t Hi, uint32_t Lo)
LLVM_ABI void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known)
Compute known bits from the range metadata.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
constexpr bool outputsAreZero() const
Return true if output denormals should be flushed to 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
static constexpr DenormalMode getIEEE()
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:301
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
LLVM_ABI KnownBits sextInReg(unsigned SrcBitWidth) const
Return known bits for a in-register sign extension of the value we're tracking.
static LLVM_ABI KnownBits mulhu(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits from zero-extended multiply-hi.
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
KnownBits byteSwap() const
Definition KnownBits.h:514
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:289
void setAllZero()
Make all bits known to be zero and discard any previous information.
Definition KnownBits.h:86
KnownBits reverseBits() const
Definition KnownBits.h:518
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const
Return a subset of the known bits from [bitPosition,bitPosition+numBits).
Definition KnownBits.h:225
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
Definition KnownBits.h:196
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits from sign-extended multiply-hi.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
Definition KnownBits.h:129
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static LLVM_ABI KnownBits computeForAddCarry(const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry)
Compute known bits resulting from adding LHS, RHS and a 1-bit Carry.
Definition KnownBits.cpp:53
static KnownBits sub(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from subtraction of LHS and RHS.
Definition KnownBits.h:353
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
void insertBits(const KnownBits &SubBits, unsigned BitPosition)
Insert the bits from a smaller known bits starting at bitPosition.
Definition KnownBits.h:219
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:167
LLVM_ABI KnownBits abs(bool IntMinIsPoison=false) const
Compute known bits for the absolute value.
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
bool isKnownNeverInfinity() const
Return true if it's known this can never be an infinity.
bool cannotBeOrderedGreaterThanZero() const
Return true if we can prove that the analyzed floating-point value is either NaN or never greater tha...
static constexpr FPClassTest OrderedGreaterThanZeroMask
static constexpr FPClassTest OrderedLessThanZeroMask
void knownNot(FPClassTest RuleOut)
void copysign(const KnownFPClass &Sign)
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.
LLVM_ABI bool isKnownNeverLogicalZero(DenormalMode Mode) const
Return true if it's know this can never be interpreted as a zero.
bool isUnknown() const
bool isKnownNeverPosZero() const
Return true if it's known this can never be a literal positive zero.
std::optional< bool > SignBit
std::nullopt if the sign bit is unknown, true if the sign bit is definitely set or false if the sign ...
bool isKnownNeverNaN() const
Return true if it's known this can never be a nan.
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
bool isKnownNeverNegZero() const
Return true if it's known this can never be a negative zero.
void propagateNaN(const KnownFPClass &Src, bool PreserveSign=false)
bool cannotBeOrderedLessThanZero() const
Return true if we can prove that the analyzed floating-point value is either NaN or never less than -...
void signBitMustBeOne()
Assume the sign bit is one.
void signBitMustBeZero()
Assume the sign bit is zero.
LLVM_ABI bool isKnownNeverLogicalPosZero(DenormalMode Mode) const
Return true if it's know this can never be interpreted as a positive zero.
bool isKnownNeverPosInfinity() const
Return true if it's known this can never be +infinity.
LLVM_ABI bool isKnownNeverLogicalNegZero(DenormalMode Mode) const
Return true if it's know this can never be interpreted as a negative zero.