LLVM 22.0.0git
GISelValueTracking.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2//*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10/// Provides analysis for querying information about KnownBits during GISel
11/// passes.
12//
13//===----------------------------------------------------------------------===//
15#include "llvm/ADT/APFloat.h"
17#include "llvm/ADT/ScopeExit.h"
35#include "llvm/IR/FMF.h"
40
41#define DEBUG_TYPE "gisel-known-bits"
42
43using namespace llvm;
44using namespace MIPatternMatch;
45
47
49 "Analysis for ComputingKnownBits", false, true)
50
52 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
53 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
54
56 const MachineInstr *MI = MRI.getVRegDef(R);
57 switch (MI->getOpcode()) {
58 case TargetOpcode::COPY:
59 return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
60 case TargetOpcode::G_ASSERT_ALIGN: {
61 // TODO: Min with source
62 return Align(MI->getOperand(2).getImm());
63 }
64 case TargetOpcode::G_FRAME_INDEX: {
65 int FrameIdx = MI->getOperand(1).getIndex();
66 return MF.getFrameInfo().getObjectAlign(FrameIdx);
67 }
68 case TargetOpcode::G_INTRINSIC:
69 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
70 case TargetOpcode::G_INTRINSIC_CONVERGENT:
71 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
72 default:
73 return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
74 }
75}
76
78 assert(MI.getNumExplicitDefs() == 1 &&
79 "expected single return generic instruction");
80 return getKnownBits(MI.getOperand(0).getReg());
81}
82
84 const LLT Ty = MRI.getType(R);
85 // Since the number of lanes in a scalable vector is unknown at compile time,
86 // we track one bit which is implicitly broadcast to all lanes. This means
87 // that all lanes in a scalable vector are considered demanded.
88 APInt DemandedElts =
89 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
90 return getKnownBits(R, DemandedElts);
91}
92
94 const APInt &DemandedElts,
95 unsigned Depth) {
96 KnownBits Known;
97 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
98 return Known;
99}
100
102 LLT Ty = MRI.getType(R);
103 unsigned BitWidth = Ty.getScalarSizeInBits();
105}
106
110
114
115[[maybe_unused]] static void
116dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
117 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
118 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
119 << toString(Known.Zero | Known.One, 16, false) << "\n"
120 << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
121 << "\n"
122 << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false)
123 << "\n";
124}
125
126/// Compute known bits for the intersection of \p Src0 and \p Src1
127void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
128 KnownBits &Known,
129 const APInt &DemandedElts,
130 unsigned Depth) {
131 // Test src1 first, since we canonicalize simpler expressions to the RHS.
132 computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
133
134 // If we don't know any bits, early out.
135 if (Known.isUnknown())
136 return;
137
138 KnownBits Known2;
139 computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
140
141 // Only known if known in both the LHS and RHS.
142 Known = Known.intersectWith(Known2);
143}
144
145// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
146// created using Width. Use this function when the inputs are KnownBits
147// objects. TODO: Move this KnownBits.h if this is usable in more cases.
148static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
149 const KnownBits &OffsetKnown,
150 const KnownBits &WidthKnown) {
151 KnownBits Mask(BitWidth);
152 Mask.Zero = APInt::getBitsSetFrom(
154 Mask.One = APInt::getLowBitsSet(
156 return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
157}
158
160 const APInt &DemandedElts,
161 unsigned Depth) {
162 MachineInstr &MI = *MRI.getVRegDef(R);
163 unsigned Opcode = MI.getOpcode();
164 LLT DstTy = MRI.getType(R);
165
166 // Handle the case where this is called on a register that does not have a
167 // type constraint. For example, it may be post-ISel or this target might not
168 // preserve the type when early-selecting instructions.
169 if (!DstTy.isValid()) {
170 Known = KnownBits();
171 return;
172 }
173
174#ifndef NDEBUG
175 if (DstTy.isFixedVector()) {
176 assert(
177 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
178 "DemandedElt width should equal the fixed vector number of elements");
179 } else {
180 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
181 "DemandedElt width should be 1 for scalars or scalable vectors");
182 }
183#endif
184
185 unsigned BitWidth = DstTy.getScalarSizeInBits();
186 Known = KnownBits(BitWidth); // Don't know anything
187
188 // Depth may get bigger than max depth if it gets passed to a different
189 // GISelValueTracking object.
190 // This may happen when say a generic part uses a GISelValueTracking object
191 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
192 // which creates a new GISelValueTracking object with a different and smaller
193 // depth. If we just check for equality, we would never exit if the depth
194 // that is passed down to the target specific GISelValueTracking object is
195 // already bigger than its max depth.
196 if (Depth >= getMaxDepth())
197 return;
198
199 if (!DemandedElts)
200 return; // No demanded elts, better to assume we don't know anything.
201
202 KnownBits Known2;
203
204 switch (Opcode) {
205 default:
206 TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
207 Depth);
208 break;
209 case TargetOpcode::G_BUILD_VECTOR: {
210 // Collect the known bits that are shared by every demanded vector element.
211 Known.Zero.setAllBits();
212 Known.One.setAllBits();
213 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
214 if (!DemandedElts[I])
215 continue;
216
217 computeKnownBitsImpl(MO.getReg(), Known2, APInt(1, 1), Depth + 1);
218
219 // Known bits are the values that are shared by every demanded element.
220 Known = Known.intersectWith(Known2);
221
222 // If we don't know any bits, early out.
223 if (Known.isUnknown())
224 break;
225 }
226 break;
227 }
228 case TargetOpcode::G_SPLAT_VECTOR: {
229 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, APInt(1, 1),
230 Depth + 1);
231 // Implicitly truncate the bits to match the official semantics of
232 // G_SPLAT_VECTOR.
233 Known = Known.trunc(BitWidth);
234 break;
235 }
236 case TargetOpcode::COPY:
237 case TargetOpcode::G_PHI:
238 case TargetOpcode::PHI: {
241 // Destination registers should not have subregisters at this
242 // point of the pipeline, otherwise the main live-range will be
243 // defined more than once, which is against SSA.
244 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
245 // PHI's operand are a mix of registers and basic blocks interleaved.
246 // We only care about the register ones.
247 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
248 const MachineOperand &Src = MI.getOperand(Idx);
249 Register SrcReg = Src.getReg();
250 LLT SrcTy = MRI.getType(SrcReg);
251 // Look through trivial copies and phis but don't look through trivial
252 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
253 // analysis is currently unable to determine the bit width of a
254 // register class.
255 //
256 // We can't use NoSubRegister by name as it's defined by each target but
257 // it's always defined to be 0 by tablegen.
258 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
259 SrcTy.isValid()) {
260 // In case we're forwarding from a vector register to a non-vector
261 // register we need to update the demanded elements to reflect this
262 // before recursing.
263 APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
264 ? APInt::getAllOnes(SrcTy.getNumElements())
265 : DemandedElts; // Known to be APInt(1, 1)
266 // For COPYs we don't do anything, don't increase the depth.
267 computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
268 Depth + (Opcode != TargetOpcode::COPY));
269 Known2 = Known2.anyextOrTrunc(BitWidth);
270 Known = Known.intersectWith(Known2);
271 // If we reach a point where we don't know anything
272 // just stop looking through the operands.
273 if (Known.isUnknown())
274 break;
275 } else {
276 // We know nothing.
277 Known = KnownBits(BitWidth);
278 break;
279 }
280 }
281 break;
282 }
283 case TargetOpcode::G_CONSTANT: {
284 Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
285 break;
286 }
287 case TargetOpcode::G_FRAME_INDEX: {
288 int FrameIdx = MI.getOperand(1).getIndex();
289 TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
290 break;
291 }
292 case TargetOpcode::G_SUB: {
293 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
294 Depth + 1);
295 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
296 Depth + 1);
297 Known = KnownBits::sub(Known, Known2);
298 break;
299 }
300 case TargetOpcode::G_XOR: {
301 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
302 Depth + 1);
303 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
304 Depth + 1);
305
306 Known ^= Known2;
307 break;
308 }
309 case TargetOpcode::G_PTR_ADD: {
310 if (DstTy.isVector())
311 break;
312 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
313 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
314 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
315 break;
316 [[fallthrough]];
317 }
318 case TargetOpcode::G_ADD: {
319 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
320 Depth + 1);
321 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
322 Depth + 1);
323 Known = KnownBits::add(Known, Known2);
324 break;
325 }
326 case TargetOpcode::G_AND: {
327 // If either the LHS or the RHS are Zero, the result is zero.
328 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
329 Depth + 1);
330 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
331 Depth + 1);
332
333 Known &= Known2;
334 break;
335 }
336 case TargetOpcode::G_OR: {
337 // If either the LHS or the RHS are Zero, the result is zero.
338 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
339 Depth + 1);
340 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
341 Depth + 1);
342
343 Known |= Known2;
344 break;
345 }
346 case TargetOpcode::G_MUL: {
347 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
348 Depth + 1);
349 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
350 Depth + 1);
351 Known = KnownBits::mul(Known, Known2);
352 break;
353 }
354 case TargetOpcode::G_UMULH: {
355 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
356 Depth + 1);
357 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
358 Depth + 1);
359 Known = KnownBits::mulhu(Known, Known2);
360 break;
361 }
362 case TargetOpcode::G_SMULH: {
363 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
364 Depth + 1);
365 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
366 Depth + 1);
367 Known = KnownBits::mulhs(Known, Known2);
368 break;
369 }
370 case TargetOpcode::G_SELECT: {
371 computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
372 Known, DemandedElts, Depth + 1);
373 break;
374 }
375 case TargetOpcode::G_SMIN: {
376 // TODO: Handle clamp pattern with number of sign bits
377 KnownBits KnownRHS;
378 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
379 Depth + 1);
380 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
381 Depth + 1);
382 Known = KnownBits::smin(Known, KnownRHS);
383 break;
384 }
385 case TargetOpcode::G_SMAX: {
386 // TODO: Handle clamp pattern with number of sign bits
387 KnownBits KnownRHS;
388 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
389 Depth + 1);
390 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
391 Depth + 1);
392 Known = KnownBits::smax(Known, KnownRHS);
393 break;
394 }
395 case TargetOpcode::G_UMIN: {
396 KnownBits KnownRHS;
397 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
398 Depth + 1);
399 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
400 Depth + 1);
401 Known = KnownBits::umin(Known, KnownRHS);
402 break;
403 }
404 case TargetOpcode::G_UMAX: {
405 KnownBits KnownRHS;
406 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
407 Depth + 1);
408 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
409 Depth + 1);
410 Known = KnownBits::umax(Known, KnownRHS);
411 break;
412 }
413 case TargetOpcode::G_FCMP:
414 case TargetOpcode::G_ICMP: {
415 if (DstTy.isVector())
416 break;
417 if (TL.getBooleanContents(DstTy.isVector(),
418 Opcode == TargetOpcode::G_FCMP) ==
420 BitWidth > 1)
421 Known.Zero.setBitsFrom(1);
422 break;
423 }
424 case TargetOpcode::G_SEXT: {
425 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
426 Depth + 1);
427 // If the sign bit is known to be zero or one, then sext will extend
428 // it to the top bits, else it will just zext.
429 Known = Known.sext(BitWidth);
430 break;
431 }
432 case TargetOpcode::G_ASSERT_SEXT:
433 case TargetOpcode::G_SEXT_INREG: {
434 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
435 Depth + 1);
436 Known = Known.sextInReg(MI.getOperand(2).getImm());
437 break;
438 }
439 case TargetOpcode::G_ANYEXT: {
440 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
441 Depth + 1);
442 Known = Known.anyext(BitWidth);
443 break;
444 }
445 case TargetOpcode::G_LOAD: {
446 const MachineMemOperand *MMO = *MI.memoperands_begin();
447 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
448 if (const MDNode *Ranges = MMO->getRanges())
449 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
450 Known = KnownRange.anyext(Known.getBitWidth());
451 break;
452 }
453 case TargetOpcode::G_SEXTLOAD:
454 case TargetOpcode::G_ZEXTLOAD: {
455 if (DstTy.isVector())
456 break;
457 const MachineMemOperand *MMO = *MI.memoperands_begin();
458 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
459 if (const MDNode *Ranges = MMO->getRanges())
460 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
461 Known = Opcode == TargetOpcode::G_SEXTLOAD
462 ? KnownRange.sext(Known.getBitWidth())
463 : KnownRange.zext(Known.getBitWidth());
464 break;
465 }
466 case TargetOpcode::G_ASHR: {
467 KnownBits LHSKnown, RHSKnown;
468 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
469 Depth + 1);
470 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
471 Depth + 1);
472 Known = KnownBits::ashr(LHSKnown, RHSKnown);
473 break;
474 }
475 case TargetOpcode::G_LSHR: {
476 KnownBits LHSKnown, RHSKnown;
477 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
478 Depth + 1);
479 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
480 Depth + 1);
481 Known = KnownBits::lshr(LHSKnown, RHSKnown);
482 break;
483 }
484 case TargetOpcode::G_SHL: {
485 KnownBits LHSKnown, RHSKnown;
486 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
487 Depth + 1);
488 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
489 Depth + 1);
490 Known = KnownBits::shl(LHSKnown, RHSKnown);
491 break;
492 }
493 case TargetOpcode::G_INTTOPTR:
494 case TargetOpcode::G_PTRTOINT:
495 if (DstTy.isVector())
496 break;
497 // Fall through and handle them the same as zext/trunc.
498 [[fallthrough]];
499 case TargetOpcode::G_ZEXT:
500 case TargetOpcode::G_TRUNC: {
501 Register SrcReg = MI.getOperand(1).getReg();
502 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
503 Known = Known.zextOrTrunc(BitWidth);
504 break;
505 }
506 case TargetOpcode::G_ASSERT_ZEXT: {
507 Register SrcReg = MI.getOperand(1).getReg();
508 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
509
510 unsigned SrcBitWidth = MI.getOperand(2).getImm();
511 assert(SrcBitWidth && "SrcBitWidth can't be zero");
512 APInt InMask = APInt::getLowBitsSet(BitWidth, SrcBitWidth);
513 Known.Zero |= (~InMask);
514 Known.One &= (~Known.Zero);
515 break;
516 }
517 case TargetOpcode::G_ASSERT_ALIGN: {
518 int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
519
520 // TODO: Should use maximum with source
521 // If a node is guaranteed to be aligned, set low zero bits accordingly as
522 // well as clearing one bits.
523 Known.Zero.setLowBits(LogOfAlign);
524 Known.One.clearLowBits(LogOfAlign);
525 break;
526 }
527 case TargetOpcode::G_MERGE_VALUES: {
528 unsigned NumOps = MI.getNumOperands();
529 unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
530
531 for (unsigned I = 0; I != NumOps - 1; ++I) {
532 KnownBits SrcOpKnown;
533 computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
534 DemandedElts, Depth + 1);
535 Known.insertBits(SrcOpKnown, I * OpSize);
536 }
537 break;
538 }
539 case TargetOpcode::G_UNMERGE_VALUES: {
540 unsigned NumOps = MI.getNumOperands();
541 Register SrcReg = MI.getOperand(NumOps - 1).getReg();
542 LLT SrcTy = MRI.getType(SrcReg);
543
544 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
545 return; // TODO: Handle vector->subelement unmerges
546
547 // Figure out the result operand index
548 unsigned DstIdx = 0;
549 for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
550 ++DstIdx)
551 ;
552
553 APInt SubDemandedElts = DemandedElts;
554 if (SrcTy.isVector()) {
555 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
556 SubDemandedElts =
557 DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
558 }
559
560 KnownBits SrcOpKnown;
561 computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
562
563 if (SrcTy.isVector())
564 Known = std::move(SrcOpKnown);
565 else
566 Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
567 break;
568 }
569 case TargetOpcode::G_BSWAP: {
570 Register SrcReg = MI.getOperand(1).getReg();
571 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
572 Known = Known.byteSwap();
573 break;
574 }
575 case TargetOpcode::G_BITREVERSE: {
576 Register SrcReg = MI.getOperand(1).getReg();
577 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
578 Known = Known.reverseBits();
579 break;
580 }
581 case TargetOpcode::G_CTPOP: {
582 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
583 Depth + 1);
584 // We can bound the space the count needs. Also, bits known to be zero
585 // can't contribute to the population.
586 unsigned BitsPossiblySet = Known2.countMaxPopulation();
587 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
588 Known.Zero.setBitsFrom(LowBits);
589 // TODO: we could bound Known.One using the lower bound on the number of
590 // bits which might be set provided by popcnt KnownOne2.
591 break;
592 }
593 case TargetOpcode::G_UBFX: {
594 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
595 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
596 Depth + 1);
597 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
598 Depth + 1);
599 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
600 Depth + 1);
601 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
602 break;
603 }
604 case TargetOpcode::G_SBFX: {
605 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
606 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
607 Depth + 1);
608 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
609 Depth + 1);
610 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
611 Depth + 1);
612 OffsetKnown = OffsetKnown.sext(BitWidth);
613 WidthKnown = WidthKnown.sext(BitWidth);
614 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
615 // Sign extend the extracted value using shift left and arithmetic shift
616 // right.
618 KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown);
619 Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
620 break;
621 }
622 case TargetOpcode::G_UADDO:
623 case TargetOpcode::G_UADDE:
624 case TargetOpcode::G_SADDO:
625 case TargetOpcode::G_SADDE:
626 case TargetOpcode::G_USUBO:
627 case TargetOpcode::G_USUBE:
628 case TargetOpcode::G_SSUBO:
629 case TargetOpcode::G_SSUBE:
630 case TargetOpcode::G_UMULO:
631 case TargetOpcode::G_SMULO: {
632 if (MI.getOperand(1).getReg() == R) {
633 // If we know the result of a compare has the top bits zero, use this
634 // info.
635 if (TL.getBooleanContents(DstTy.isVector(), false) ==
637 BitWidth > 1)
638 Known.Zero.setBitsFrom(1);
639 }
640 break;
641 }
642 case TargetOpcode::G_CTLZ:
643 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
644 KnownBits SrcOpKnown;
645 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
646 Depth + 1);
647 // If we have a known 1, its position is our upper bound.
648 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
649 unsigned LowBits = llvm::bit_width(PossibleLZ);
650 Known.Zero.setBitsFrom(LowBits);
651 break;
652 }
653 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
655 Register InVec = Extract.getVectorReg();
656 Register EltNo = Extract.getIndexReg();
657
658 auto ConstEltNo = getIConstantVRegVal(EltNo, MRI);
659
660 LLT VecVT = MRI.getType(InVec);
661 // computeKnownBits not yet implemented for scalable vectors.
662 if (VecVT.isScalableVector())
663 break;
664
665 const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
666 const unsigned NumSrcElts = VecVT.getNumElements();
667 // A return type different from the vector's element type may lead to
668 // issues with pattern selection. Bail out to avoid that.
669 if (BitWidth > EltBitWidth)
670 break;
671
672 Known.Zero.setAllBits();
673 Known.One.setAllBits();
674
675 // If we know the element index, just demand that vector element, else for
676 // an unknown element index, ignore DemandedElts and demand them all.
677 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
678 if (ConstEltNo && ConstEltNo->ult(NumSrcElts))
679 DemandedSrcElts =
680 APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
681
682 computeKnownBitsImpl(InVec, Known, DemandedSrcElts, Depth + 1);
683 break;
684 }
685 case TargetOpcode::G_SHUFFLE_VECTOR: {
686 APInt DemandedLHS, DemandedRHS;
687 // Collect the known bits that are shared by every vector element referenced
688 // by the shuffle.
689 unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
690 if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
691 DemandedElts, DemandedLHS, DemandedRHS))
692 break;
693
694 // Known bits are the values that are shared by every demanded element.
695 Known.Zero.setAllBits();
696 Known.One.setAllBits();
697 if (!!DemandedLHS) {
698 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedLHS,
699 Depth + 1);
700 Known = Known.intersectWith(Known2);
701 }
702 // If we don't know any bits, early out.
703 if (Known.isUnknown())
704 break;
705 if (!!DemandedRHS) {
706 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedRHS,
707 Depth + 1);
708 Known = Known.intersectWith(Known2);
709 }
710 break;
711 }
712 case TargetOpcode::G_CONCAT_VECTORS: {
713 if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector())
714 break;
715 // Split DemandedElts and test each of the demanded subvectors.
716 Known.Zero.setAllBits();
717 Known.One.setAllBits();
718 unsigned NumSubVectorElts =
719 MRI.getType(MI.getOperand(1).getReg()).getNumElements();
720
721 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
722 APInt DemandedSub =
723 DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
724 if (!!DemandedSub) {
725 computeKnownBitsImpl(MO.getReg(), Known2, DemandedSub, Depth + 1);
726
727 Known = Known.intersectWith(Known2);
728 }
729 // If we don't know any bits, early out.
730 if (Known.isUnknown())
731 break;
732 }
733 break;
734 }
735 case TargetOpcode::G_ABS: {
736 Register SrcReg = MI.getOperand(1).getReg();
737 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
738 Known = Known.abs();
739 Known.Zero.setHighBits(computeNumSignBits(SrcReg, DemandedElts, Depth + 1) -
740 1);
741 break;
742 }
743 }
744
745 LLVM_DEBUG(dumpResult(MI, Known, Depth));
746}
747
749 Ty = Ty.getScalarType();
751 return Mode.Output == DenormalMode::IEEE ||
753}
754
755void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
756 FPClassTest InterestedClasses,
757 unsigned Depth) {
758 LLT Ty = MRI.getType(R);
759 APInt DemandedElts =
760 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
761 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
762}
763
764void GISelValueTracking::computeKnownFPClassForFPTrunc(
765 const MachineInstr &MI, const APInt &DemandedElts,
766 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
767 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
768 fcNone)
769 return;
770
771 Register Val = MI.getOperand(1).getReg();
772 KnownFPClass KnownSrc;
773 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
774 Depth + 1);
775
776 // Sign should be preserved
777 // TODO: Handle cannot be ordered greater than zero
778 if (KnownSrc.cannotBeOrderedLessThanZero())
780
781 Known.propagateNaN(KnownSrc, true);
782
783 // Infinity needs a range check.
784}
785
786void GISelValueTracking::computeKnownFPClass(Register R,
787 const APInt &DemandedElts,
788 FPClassTest InterestedClasses,
789 KnownFPClass &Known,
790 unsigned Depth) {
791 assert(Known.isUnknown() && "should not be called with known information");
792
793 if (!DemandedElts) {
794 // No demanded elts, better to assume we don't know anything.
795 Known.resetAll();
796 return;
797 }
798
799 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
800
801 MachineInstr &MI = *MRI.getVRegDef(R);
802 unsigned Opcode = MI.getOpcode();
803 LLT DstTy = MRI.getType(R);
804
805 if (!DstTy.isValid()) {
806 Known.resetAll();
807 return;
808 }
809
810 if (auto Cst = GFConstant::getConstant(R, MRI)) {
811 switch (Cst->getKind()) {
813 auto APF = Cst->getScalarValue();
814 Known.KnownFPClasses = APF.classify();
815 Known.SignBit = APF.isNegative();
816 break;
817 }
819 Known.KnownFPClasses = fcNone;
820 bool SignBitAllZero = true;
821 bool SignBitAllOne = true;
822
823 for (auto C : *Cst) {
824 Known.KnownFPClasses |= C.classify();
825 if (C.isNegative())
826 SignBitAllZero = false;
827 else
828 SignBitAllOne = false;
829 }
830
831 if (SignBitAllOne != SignBitAllZero)
832 Known.SignBit = SignBitAllOne;
833
834 break;
835 }
837 Known.resetAll();
838 break;
839 }
840 }
841
842 return;
843 }
844
845 FPClassTest KnownNotFromFlags = fcNone;
847 KnownNotFromFlags |= fcNan;
849 KnownNotFromFlags |= fcInf;
850
851 // We no longer need to find out about these bits from inputs if we can
852 // assume this from flags/attributes.
853 InterestedClasses &= ~KnownNotFromFlags;
854
855 auto ClearClassesFromFlags =
856 make_scope_exit([=, &Known] { Known.knownNot(KnownNotFromFlags); });
857
858 // All recursive calls that increase depth must come after this.
860 return;
861
862 const MachineFunction *MF = MI.getMF();
863
864 switch (Opcode) {
865 default:
866 TL.computeKnownFPClassForTargetInstr(*this, R, Known, DemandedElts, MRI,
867 Depth);
868 break;
869 case TargetOpcode::G_FNEG: {
870 Register Val = MI.getOperand(1).getReg();
871 computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known, Depth + 1);
872 Known.fneg();
873 break;
874 }
875 case TargetOpcode::G_SELECT: {
876 GSelect &SelMI = cast<GSelect>(MI);
877 Register Cond = SelMI.getCondReg();
878 Register LHS = SelMI.getTrueReg();
879 Register RHS = SelMI.getFalseReg();
880
881 FPClassTest FilterLHS = fcAllFlags;
882 FPClassTest FilterRHS = fcAllFlags;
883
884 Register TestedValue;
885 FPClassTest MaskIfTrue = fcAllFlags;
886 FPClassTest MaskIfFalse = fcAllFlags;
887 FPClassTest ClassVal = fcNone;
888
890 Register CmpLHS, CmpRHS;
891 if (mi_match(Cond, MRI,
892 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) {
893 // If the select filters out a value based on the class, it no longer
894 // participates in the class of the result
895
896 // TODO: In some degenerate cases we can infer something if we try again
897 // without looking through sign operations.
898 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
899 std::tie(TestedValue, MaskIfTrue, MaskIfFalse) =
900 fcmpImpliesClass(Pred, *MF, CmpLHS, CmpRHS, LookThroughFAbsFNeg);
901 } else if (mi_match(
902 Cond, MRI,
903 m_GIsFPClass(m_Reg(TestedValue), m_FPClassTest(ClassVal)))) {
904 FPClassTest TestedMask = ClassVal;
905 MaskIfTrue = TestedMask;
906 MaskIfFalse = ~TestedMask;
907 }
908
909 if (TestedValue == LHS) {
910 // match !isnan(x) ? x : y
911 FilterLHS = MaskIfTrue;
912 } else if (TestedValue == RHS) { // && IsExactClass
913 // match !isnan(x) ? y : x
914 FilterRHS = MaskIfFalse;
915 }
916
917 KnownFPClass Known2;
918 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known,
919 Depth + 1);
920 Known.KnownFPClasses &= FilterLHS;
921
922 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS,
923 Known2, Depth + 1);
924 Known2.KnownFPClasses &= FilterRHS;
925
926 Known |= Known2;
927 break;
928 }
929 case TargetOpcode::G_FCOPYSIGN: {
930 Register Magnitude = MI.getOperand(1).getReg();
931 Register Sign = MI.getOperand(2).getReg();
932
933 KnownFPClass KnownSign;
934
935 computeKnownFPClass(Magnitude, DemandedElts, InterestedClasses, Known,
936 Depth + 1);
937 computeKnownFPClass(Sign, DemandedElts, InterestedClasses, KnownSign,
938 Depth + 1);
939 Known.copysign(KnownSign);
940 break;
941 }
942 case TargetOpcode::G_FMA:
943 case TargetOpcode::G_STRICT_FMA:
944 case TargetOpcode::G_FMAD: {
945 if ((InterestedClasses & fcNegative) == fcNone)
946 break;
947
948 Register A = MI.getOperand(1).getReg();
949 Register B = MI.getOperand(2).getReg();
950 Register C = MI.getOperand(3).getReg();
951
952 if (A != B)
953 break;
954
955 // The multiply cannot be -0 and therefore the add can't be -0
956 Known.knownNot(fcNegZero);
957
958 // x * x + y is non-negative if y is non-negative.
959 KnownFPClass KnownAddend;
960 computeKnownFPClass(C, DemandedElts, InterestedClasses, KnownAddend,
961 Depth + 1);
962
963 if (KnownAddend.cannotBeOrderedLessThanZero())
964 Known.knownNot(fcNegative);
965 break;
966 }
967 case TargetOpcode::G_FSQRT:
968 case TargetOpcode::G_STRICT_FSQRT: {
969 KnownFPClass KnownSrc;
970 FPClassTest InterestedSrcs = InterestedClasses;
971 if (InterestedClasses & fcNan)
973
974 Register Val = MI.getOperand(1).getReg();
975
976 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
977
978 if (KnownSrc.isKnownNeverPosInfinity())
979 Known.knownNot(fcPosInf);
980 if (KnownSrc.isKnownNever(fcSNan))
981 Known.knownNot(fcSNan);
982
983 // Any negative value besides -0 returns a nan.
984 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
985 Known.knownNot(fcNan);
986
987 // The only negative value that can be returned is -0 for -0 inputs.
989 break;
990 }
991 case TargetOpcode::G_FABS: {
992 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
993 Register Val = MI.getOperand(1).getReg();
994 // If we only care about the sign bit we don't need to inspect the
995 // operand.
996 computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known,
997 Depth + 1);
998 }
999 Known.fabs();
1000 break;
1001 }
1002 case TargetOpcode::G_FSIN:
1003 case TargetOpcode::G_FCOS:
1004 case TargetOpcode::G_FSINCOS: {
1005 // Return NaN on infinite inputs.
1006 Register Val = MI.getOperand(1).getReg();
1007 KnownFPClass KnownSrc;
1008
1009 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1010 Depth + 1);
1011 Known.knownNot(fcInf);
1012
1013 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
1014 Known.knownNot(fcNan);
1015 break;
1016 }
1017 case TargetOpcode::G_FMAXNUM:
1018 case TargetOpcode::G_FMINNUM:
1019 case TargetOpcode::G_FMINNUM_IEEE:
1020 case TargetOpcode::G_FMAXIMUM:
1021 case TargetOpcode::G_FMINIMUM:
1022 case TargetOpcode::G_FMAXNUM_IEEE:
1023 case TargetOpcode::G_FMAXIMUMNUM:
1024 case TargetOpcode::G_FMINIMUMNUM: {
1025 Register LHS = MI.getOperand(1).getReg();
1026 Register RHS = MI.getOperand(2).getReg();
1027 KnownFPClass KnownLHS, KnownRHS;
1028
1029 computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS,
1030 Depth + 1);
1031 computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS,
1032 Depth + 1);
1033
1034 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
1035 Known = KnownLHS | KnownRHS;
1036
1037 // If either operand is not NaN, the result is not NaN.
1038 if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
1039 Opcode == TargetOpcode::G_FMAXNUM ||
1040 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1041 Opcode == TargetOpcode::G_FMAXIMUMNUM))
1042 Known.knownNot(fcNan);
1043
1044 if (Opcode == TargetOpcode::G_FMAXNUM ||
1045 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1046 Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
1047 // If at least one operand is known to be positive, the result must be
1048 // positive.
1049 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
1050 KnownLHS.isKnownNeverNaN()) ||
1051 (KnownRHS.cannotBeOrderedLessThanZero() &&
1052 KnownRHS.isKnownNeverNaN()))
1054 } else if (Opcode == TargetOpcode::G_FMAXIMUM) {
1055 // If at least one operand is known to be positive, the result must be
1056 // positive.
1057 if (KnownLHS.cannotBeOrderedLessThanZero() ||
1058 KnownRHS.cannotBeOrderedLessThanZero())
1060 } else if (Opcode == TargetOpcode::G_FMINNUM ||
1061 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1062 Opcode == TargetOpcode::G_FMINNUM_IEEE) {
1063 // If at least one operand is known to be negative, the result must be
1064 // negative.
1065 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
1066 KnownLHS.isKnownNeverNaN()) ||
1067 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
1068 KnownRHS.isKnownNeverNaN()))
1070 } else if (Opcode == TargetOpcode::G_FMINIMUM) {
1071 // If at least one operand is known to be negative, the result must be
1072 // negative.
1073 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
1076 } else {
1077 llvm_unreachable("unhandled intrinsic");
1078 }
1079
1080 // Fixup zero handling if denormals could be returned as a zero.
1081 //
1082 // As there's no spec for denormal flushing, be conservative with the
1083 // treatment of denormals that could be flushed to zero. For older
1084 // subtargets on AMDGPU the min/max instructions would not flush the
1085 // output and return the original value.
1086 //
1087 if ((Known.KnownFPClasses & fcZero) != fcNone &&
1088 !Known.isKnownNeverSubnormal()) {
1089 DenormalMode Mode =
1090 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()));
1091 if (Mode != DenormalMode::getIEEE())
1092 Known.KnownFPClasses |= fcZero;
1093 }
1094
1095 if (Known.isKnownNeverNaN()) {
1096 if (KnownLHS.SignBit && KnownRHS.SignBit &&
1097 *KnownLHS.SignBit == *KnownRHS.SignBit) {
1098 if (*KnownLHS.SignBit)
1099 Known.signBitMustBeOne();
1100 else
1101 Known.signBitMustBeZero();
1102 } else if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1103 Opcode == TargetOpcode::G_FMINIMUM) ||
1104 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1105 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1106 Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
1107 Opcode == TargetOpcode::G_FMINNUM_IEEE ||
1108 // FIXME: Should be using logical zero versions
1109 ((KnownLHS.isKnownNeverNegZero() ||
1110 KnownRHS.isKnownNeverPosZero()) &&
1111 (KnownLHS.isKnownNeverPosZero() ||
1112 KnownRHS.isKnownNeverNegZero()))) {
1113 if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1114 Opcode == TargetOpcode::G_FMAXNUM ||
1115 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1116 Opcode == TargetOpcode::G_FMAXNUM_IEEE) &&
1117 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
1118 Known.signBitMustBeZero();
1119 else if ((Opcode == TargetOpcode::G_FMINIMUM ||
1120 Opcode == TargetOpcode::G_FMINNUM ||
1121 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1122 Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
1123 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
1124 Known.signBitMustBeOne();
1125 }
1126 }
1127 break;
1128 }
1129 case TargetOpcode::G_FCANONICALIZE: {
1130 Register Val = MI.getOperand(1).getReg();
1131 KnownFPClass KnownSrc;
1132 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1133 Depth + 1);
1134
1135 // This is essentially a stronger form of
1136 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
1137 // actually have an IR canonicalization guarantee.
1138
1139 // Canonicalize may flush denormals to zero, so we have to consider the
1140 // denormal mode to preserve known-not-0 knowledge.
1141 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
1142
1143 // Stronger version of propagateNaN
1144 // Canonicalize is guaranteed to quiet signaling nans.
1145 if (KnownSrc.isKnownNeverNaN())
1146 Known.knownNot(fcNan);
1147 else
1148 Known.knownNot(fcSNan);
1149
1150 // If the parent function flushes denormals, the canonical output cannot
1151 // be a denormal.
1152 LLT Ty = MRI.getType(Val).getScalarType();
1153 const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1154 DenormalMode DenormMode = MF->getDenormalMode(FPType);
1155 if (DenormMode == DenormalMode::getIEEE()) {
1156 if (KnownSrc.isKnownNever(fcPosZero))
1157 Known.knownNot(fcPosZero);
1158 if (KnownSrc.isKnownNever(fcNegZero))
1159 Known.knownNot(fcNegZero);
1160 break;
1161 }
1162
1163 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
1164 Known.knownNot(fcSubnormal);
1165
1166 if (DenormMode.Input == DenormalMode::PositiveZero ||
1167 (DenormMode.Output == DenormalMode::PositiveZero &&
1168 DenormMode.Input == DenormalMode::IEEE))
1169 Known.knownNot(fcNegZero);
1170
1171 break;
1172 }
1173 case TargetOpcode::G_VECREDUCE_FMAX:
1174 case TargetOpcode::G_VECREDUCE_FMIN:
1175 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1176 case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1177 Register Val = MI.getOperand(1).getReg();
1178 // reduce min/max will choose an element from one of the vector elements,
1179 // so we can infer and class information that is common to all elements.
1180
1181 Known =
1182 computeKnownFPClass(Val, MI.getFlags(), InterestedClasses, Depth + 1);
1183 // Can only propagate sign if output is never NaN.
1184 if (!Known.isKnownNeverNaN())
1185 Known.SignBit.reset();
1186 break;
1187 }
1188 case TargetOpcode::G_TRUNC:
1189 case TargetOpcode::G_FFLOOR:
1190 case TargetOpcode::G_FCEIL:
1191 case TargetOpcode::G_FRINT:
1192 case TargetOpcode::G_FNEARBYINT:
1193 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1194 case TargetOpcode::G_INTRINSIC_ROUND: {
1195 Register Val = MI.getOperand(1).getReg();
1196 KnownFPClass KnownSrc;
1197 FPClassTest InterestedSrcs = InterestedClasses;
1198 if (InterestedSrcs & fcPosFinite)
1199 InterestedSrcs |= fcPosFinite;
1200 if (InterestedSrcs & fcNegFinite)
1201 InterestedSrcs |= fcNegFinite;
1202 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1203
1204 // Integer results cannot be subnormal.
1205 Known.knownNot(fcSubnormal);
1206
1207 Known.propagateNaN(KnownSrc, true);
1208
1209 // TODO: handle multi unit FPTypes once LLT FPInfo lands
1210
1211 // Negative round ups to 0 produce -0
1212 if (KnownSrc.isKnownNever(fcPosFinite))
1213 Known.knownNot(fcPosFinite);
1214 if (KnownSrc.isKnownNever(fcNegFinite))
1215 Known.knownNot(fcNegFinite);
1216
1217 break;
1218 }
1219 case TargetOpcode::G_FEXP:
1220 case TargetOpcode::G_FEXP2:
1221 case TargetOpcode::G_FEXP10: {
1222 Known.knownNot(fcNegative);
1223 if ((InterestedClasses & fcNan) == fcNone)
1224 break;
1225
1226 Register Val = MI.getOperand(1).getReg();
1227 KnownFPClass KnownSrc;
1228 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1229 Depth + 1);
1230 if (KnownSrc.isKnownNeverNaN()) {
1231 Known.knownNot(fcNan);
1232 Known.signBitMustBeZero();
1233 }
1234
1235 break;
1236 }
1237 case TargetOpcode::G_FLOG:
1238 case TargetOpcode::G_FLOG2:
1239 case TargetOpcode::G_FLOG10: {
1240 // log(+inf) -> +inf
1241 // log([+-]0.0) -> -inf
1242 // log(-inf) -> nan
1243 // log(-x) -> nan
1244 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1245 break;
1246
1247 FPClassTest InterestedSrcs = InterestedClasses;
1248 if ((InterestedClasses & fcNegInf) != fcNone)
1249 InterestedSrcs |= fcZero | fcSubnormal;
1250 if ((InterestedClasses & fcNan) != fcNone)
1251 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
1252
1253 Register Val = MI.getOperand(1).getReg();
1254 KnownFPClass KnownSrc;
1255 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1256
1257 if (KnownSrc.isKnownNeverPosInfinity())
1258 Known.knownNot(fcPosInf);
1259
1260 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1261 Known.knownNot(fcNan);
1262
1263 LLT Ty = MRI.getType(Val).getScalarType();
1264 const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1265 DenormalMode Mode = MF->getDenormalMode(FltSem);
1266
1267 if (KnownSrc.isKnownNeverLogicalZero(Mode))
1268 Known.knownNot(fcNegInf);
1269
1270 break;
1271 }
1272 case TargetOpcode::G_FPOWI: {
1273 if ((InterestedClasses & fcNegative) == fcNone)
1274 break;
1275
1276 Register Exp = MI.getOperand(2).getReg();
1277 LLT ExpTy = MRI.getType(Exp);
1278 KnownBits ExponentKnownBits = getKnownBits(
1279 Exp, ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth + 1);
1280
1281 if (ExponentKnownBits.Zero[0]) { // Is even
1282 Known.knownNot(fcNegative);
1283 break;
1284 }
1285
1286 // Given that exp is an integer, here are the
1287 // ways that pow can return a negative value:
1288 //
1289 // pow(-x, exp) --> negative if exp is odd and x is negative.
1290 // pow(-0, exp) --> -inf if exp is negative odd.
1291 // pow(-0, exp) --> -0 if exp is positive odd.
1292 // pow(-inf, exp) --> -0 if exp is negative odd.
1293 // pow(-inf, exp) --> -inf if exp is positive odd.
1294 Register Val = MI.getOperand(1).getReg();
1295 KnownFPClass KnownSrc;
1296 computeKnownFPClass(Val, DemandedElts, fcNegative, KnownSrc, Depth + 1);
1297 if (KnownSrc.isKnownNever(fcNegative))
1298 Known.knownNot(fcNegative);
1299 break;
1300 }
1301 case TargetOpcode::G_FLDEXP:
1302 case TargetOpcode::G_STRICT_FLDEXP: {
1303 Register Val = MI.getOperand(1).getReg();
1304 KnownFPClass KnownSrc;
1305 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1306 Depth + 1);
1307 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
1308
1309 // Sign is preserved, but underflows may produce zeroes.
1310 if (KnownSrc.isKnownNever(fcNegative))
1311 Known.knownNot(fcNegative);
1312 else if (KnownSrc.cannotBeOrderedLessThanZero())
1314
1315 if (KnownSrc.isKnownNever(fcPositive))
1316 Known.knownNot(fcPositive);
1317 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
1319
1320 // Can refine inf/zero handling based on the exponent operand.
1321 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1322 if ((InterestedClasses & ExpInfoMask) == fcNone)
1323 break;
1324 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
1325 break;
1326
1327 // TODO: Handle constant range of Exp
1328
1329 break;
1330 }
1331 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1332 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1333 Depth);
1334 break;
1335 }
1336 case TargetOpcode::G_FADD:
1337 case TargetOpcode::G_STRICT_FADD:
1338 case TargetOpcode::G_FSUB:
1339 case TargetOpcode::G_STRICT_FSUB: {
1340 Register LHS = MI.getOperand(1).getReg();
1341 Register RHS = MI.getOperand(2).getReg();
1342 KnownFPClass KnownLHS, KnownRHS;
1343 bool WantNegative =
1344 (Opcode == TargetOpcode::G_FADD ||
1345 Opcode == TargetOpcode::G_STRICT_FADD) &&
1346 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1347 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1348 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1349
1350 if (!WantNaN && !WantNegative && !WantNegZero)
1351 break;
1352
1353 FPClassTest InterestedSrcs = InterestedClasses;
1354 if (WantNegative)
1355 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1356 if (InterestedClasses & fcNan)
1357 InterestedSrcs |= fcInf;
1358 computeKnownFPClass(RHS, DemandedElts, InterestedSrcs, KnownRHS, Depth + 1);
1359
1360 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1361 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1362 WantNegZero ||
1363 (Opcode == TargetOpcode::G_FSUB ||
1364 Opcode == TargetOpcode::G_STRICT_FSUB)) {
1365
1366 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1367 // there's no point.
1368 computeKnownFPClass(LHS, DemandedElts, InterestedSrcs, KnownLHS,
1369 Depth + 1);
1370 // Adding positive and negative infinity produces NaN.
1371 // TODO: Check sign of infinities.
1372 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1373 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
1374 Known.knownNot(fcNan);
1375
1376 if (Opcode == TargetOpcode::G_FADD ||
1377 Opcode == TargetOpcode::G_STRICT_FADD) {
1378 if (KnownLHS.cannotBeOrderedLessThanZero() &&
1379 KnownRHS.cannotBeOrderedLessThanZero())
1381
1382 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
1383 if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1385 KnownRHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1386 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1387 // Make sure output negative denormal can't flush to -0
1389 Known.knownNot(fcNegZero);
1390 } else {
1391 // Only fsub -0, +0 can return -0
1392 if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1394 KnownRHS.isKnownNeverLogicalPosZero(MF->getDenormalMode(
1395 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1396 // Make sure output negative denormal can't flush to -0
1398 Known.knownNot(fcNegZero);
1399 }
1400 }
1401
1402 break;
1403 }
1404 case TargetOpcode::G_FMUL:
1405 case TargetOpcode::G_STRICT_FMUL: {
1406 Register LHS = MI.getOperand(1).getReg();
1407 Register RHS = MI.getOperand(2).getReg();
1408 // X * X is always non-negative or a NaN.
1409 if (LHS == RHS)
1410 Known.knownNot(fcNegative);
1411
1412 if ((InterestedClasses & fcNan) != fcNan)
1413 break;
1414
1415 // fcSubnormal is only needed in case of DAZ.
1416 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
1417
1418 KnownFPClass KnownLHS, KnownRHS;
1419 computeKnownFPClass(RHS, DemandedElts, NeedForNan, KnownRHS, Depth + 1);
1420 if (!KnownRHS.isKnownNeverNaN())
1421 break;
1422
1423 computeKnownFPClass(LHS, DemandedElts, NeedForNan, KnownLHS, Depth + 1);
1424 if (!KnownLHS.isKnownNeverNaN())
1425 break;
1426
1427 if (KnownLHS.SignBit && KnownRHS.SignBit) {
1428 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
1429 Known.signBitMustBeZero();
1430 else
1431 Known.signBitMustBeOne();
1432 }
1433
1434 // If 0 * +/-inf produces NaN.
1435 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
1436 Known.knownNot(fcNan);
1437 break;
1438 }
1439
1440 if ((KnownRHS.isKnownNeverInfinity() ||
1441 KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1442 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1443 (KnownLHS.isKnownNeverInfinity() ||
1444 KnownRHS.isKnownNeverLogicalZero(
1445 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))
1446 Known.knownNot(fcNan);
1447
1448 break;
1449 }
1450 case TargetOpcode::G_FDIV:
1451 case TargetOpcode::G_FREM: {
1452 Register LHS = MI.getOperand(1).getReg();
1453 Register RHS = MI.getOperand(2).getReg();
1454
1455 if (LHS == RHS) {
1456 // TODO: Could filter out snan if we inspect the operand
1457 if (Opcode == TargetOpcode::G_FDIV) {
1458 // X / X is always exactly 1.0 or a NaN.
1460 } else {
1461 // X % X is always exactly [+-]0.0 or a NaN.
1462 Known.KnownFPClasses = fcNan | fcZero;
1463 }
1464
1465 break;
1466 }
1467
1468 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1469 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1470 const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1471 (InterestedClasses & fcPositive) != fcNone;
1472 if (!WantNan && !WantNegative && !WantPositive)
1473 break;
1474
1475 KnownFPClass KnownLHS, KnownRHS;
1476
1477 computeKnownFPClass(RHS, DemandedElts, fcNan | fcInf | fcZero | fcNegative,
1478 KnownRHS, Depth + 1);
1479
1480 bool KnowSomethingUseful =
1481 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative);
1482
1483 if (KnowSomethingUseful || WantPositive) {
1484 const FPClassTest InterestedLHS =
1485 WantPositive ? fcAllFlags
1487
1488 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & InterestedLHS,
1489 KnownLHS, Depth + 1);
1490 }
1491
1492 if (Opcode == TargetOpcode::G_FDIV) {
1493 // Only 0/0, Inf/Inf produce NaN.
1494 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1495 (KnownLHS.isKnownNeverInfinity() ||
1496 KnownRHS.isKnownNeverInfinity()) &&
1497 ((KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1498 getFltSemanticForLLT(DstTy.getScalarType())))) ||
1499 (KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1500 getFltSemanticForLLT(DstTy.getScalarType())))))) {
1501 Known.knownNot(fcNan);
1502 }
1503
1504 // X / -0.0 is -Inf (or NaN).
1505 // +X / +X is +X
1506 if (KnownLHS.isKnownNever(fcNegative) &&
1507 KnownRHS.isKnownNever(fcNegative))
1508 Known.knownNot(fcNegative);
1509 } else {
1510 // Inf REM x and x REM 0 produce NaN.
1511 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1512 KnownLHS.isKnownNeverInfinity() &&
1513 KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1515 Known.knownNot(fcNan);
1516 }
1517
1518 // The sign for frem is the same as the first operand.
1519 if (KnownLHS.cannotBeOrderedLessThanZero())
1521 if (KnownLHS.cannotBeOrderedGreaterThanZero())
1523
1524 // See if we can be more aggressive about the sign of 0.
1525 if (KnownLHS.isKnownNever(fcNegative))
1526 Known.knownNot(fcNegative);
1527 if (KnownLHS.isKnownNever(fcPositive))
1528 Known.knownNot(fcPositive);
1529 }
1530
1531 break;
1532 }
1533 case TargetOpcode::G_FPEXT: {
1534 Register Dst = MI.getOperand(0).getReg();
1535 Register Src = MI.getOperand(1).getReg();
1536 // Infinity, nan and zero propagate from source.
1537 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
1538
1539 LLT DstTy = MRI.getType(Dst).getScalarType();
1540 const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
1541 LLT SrcTy = MRI.getType(Src).getScalarType();
1542 const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy);
1543
1544 // All subnormal inputs should be in the normal range in the result type.
1545 if (APFloat::isRepresentableAsNormalIn(SrcSem, DstSem)) {
1546 if (Known.KnownFPClasses & fcPosSubnormal)
1547 Known.KnownFPClasses |= fcPosNormal;
1548 if (Known.KnownFPClasses & fcNegSubnormal)
1549 Known.KnownFPClasses |= fcNegNormal;
1550 Known.knownNot(fcSubnormal);
1551 }
1552
1553 // Sign bit of a nan isn't guaranteed.
1554 if (!Known.isKnownNeverNaN())
1555 Known.SignBit = std::nullopt;
1556 break;
1557 }
1558 case TargetOpcode::G_FPTRUNC: {
1559 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1560 Depth);
1561 break;
1562 }
1563 case TargetOpcode::G_SITOFP:
1564 case TargetOpcode::G_UITOFP: {
1565 // Cannot produce nan
1566 Known.knownNot(fcNan);
1567
1568 // Integers cannot be subnormal
1569 Known.knownNot(fcSubnormal);
1570
1571 // sitofp and uitofp turn into +0.0 for zero.
1572 Known.knownNot(fcNegZero);
1573 if (Opcode == TargetOpcode::G_UITOFP)
1574 Known.signBitMustBeZero();
1575
1576 Register Val = MI.getOperand(1).getReg();
1577 LLT Ty = MRI.getType(Val);
1578
1579 if (InterestedClasses & fcInf) {
1580 // Get width of largest magnitude integer (remove a bit if signed).
1581 // This still works for a signed minimum value because the largest FP
1582 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).;
1583 int IntSize = Ty.getScalarSizeInBits();
1584 if (Opcode == TargetOpcode::G_SITOFP)
1585 --IntSize;
1586
1587 // If the exponent of the largest finite FP value can hold the largest
1588 // integer, the result of the cast must be finite.
1589 LLT FPTy = DstTy.getScalarType();
1590 const fltSemantics &FltSem = getFltSemanticForLLT(FPTy);
1591 if (ilogb(APFloat::getLargest(FltSem)) >= IntSize)
1592 Known.knownNot(fcInf);
1593 }
1594
1595 break;
1596 }
1597 // case TargetOpcode::G_MERGE_VALUES:
1598 case TargetOpcode::G_BUILD_VECTOR:
1599 case TargetOpcode::G_CONCAT_VECTORS: {
1600 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(MI);
1601
1602 if (!DstTy.isFixedVector())
1603 break;
1604
1605 bool First = true;
1606 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1607 // We know the index we are inserting to, so clear it from Vec check.
1608 bool NeedsElt = DemandedElts[Idx];
1609
1610 // Do we demand the inserted element?
1611 if (NeedsElt) {
1612 Register Src = Merge.getSourceReg(Idx);
1613 if (First) {
1614 computeKnownFPClass(Src, Known, InterestedClasses, Depth + 1);
1615 First = false;
1616 } else {
1617 KnownFPClass Known2;
1618 computeKnownFPClass(Src, Known2, InterestedClasses, Depth + 1);
1619 Known |= Known2;
1620 }
1621
1622 // If we don't know any bits, early out.
1623 if (Known.isUnknown())
1624 break;
1625 }
1626 }
1627
1628 break;
1629 }
1630 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1631 // Look through extract element. If the index is non-constant or
1632 // out-of-range demand all elements, otherwise just the extracted
1633 // element.
1634 GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI);
1635 Register Vec = Extract.getVectorReg();
1636 Register Idx = Extract.getIndexReg();
1637
1638 auto CIdx = getIConstantVRegVal(Idx, MRI);
1639
1640 LLT VecTy = MRI.getType(Vec);
1641
1642 if (VecTy.isFixedVector()) {
1643 unsigned NumElts = VecTy.getNumElements();
1644 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1645 if (CIdx && CIdx->ult(NumElts))
1646 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1647 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known,
1648 Depth + 1);
1649 }
1650
1651 break;
1652 }
1653 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1654 GInsertVectorElement &Insert = cast<GInsertVectorElement>(MI);
1655 Register Vec = Insert.getVectorReg();
1656 Register Elt = Insert.getElementReg();
1657 Register Idx = Insert.getIndexReg();
1658
1659 LLT VecTy = MRI.getType(Vec);
1660
1661 if (VecTy.isScalableVector())
1662 return;
1663
1664 auto CIdx = getIConstantVRegVal(Idx, MRI);
1665
1666 unsigned NumElts = DemandedElts.getBitWidth();
1667 APInt DemandedVecElts = DemandedElts;
1668 bool NeedsElt = true;
1669 // If we know the index we are inserting to, clear it from Vec check.
1670 if (CIdx && CIdx->ult(NumElts)) {
1671 DemandedVecElts.clearBit(CIdx->getZExtValue());
1672 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1673 }
1674
1675 // Do we demand the inserted element?
1676 if (NeedsElt) {
1677 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1);
1678 // If we don't know any bits, early out.
1679 if (Known.isUnknown())
1680 break;
1681 } else {
1682 Known.KnownFPClasses = fcNone;
1683 }
1684
1685 // Do we need anymore elements from Vec?
1686 if (!DemandedVecElts.isZero()) {
1687 KnownFPClass Known2;
1688 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
1689 Depth + 1);
1690 Known |= Known2;
1691 }
1692
1693 break;
1694 }
1695 case TargetOpcode::G_SHUFFLE_VECTOR: {
1696 // For undef elements, we don't know anything about the common state of
1697 // the shuffle result.
1698 GShuffleVector &Shuf = cast<GShuffleVector>(MI);
1699 APInt DemandedLHS, DemandedRHS;
1700 if (DstTy.isScalableVector()) {
1701 assert(DemandedElts == APInt(1, 1));
1702 DemandedLHS = DemandedRHS = DemandedElts;
1703 } else {
1705 DemandedElts, DemandedLHS,
1706 DemandedRHS)) {
1707 Known.resetAll();
1708 return;
1709 }
1710 }
1711
1712 if (!!DemandedLHS) {
1713 Register LHS = Shuf.getSrc1Reg();
1714 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known,
1715 Depth + 1);
1716
1717 // If we don't know any bits, early out.
1718 if (Known.isUnknown())
1719 break;
1720 } else {
1721 Known.KnownFPClasses = fcNone;
1722 }
1723
1724 if (!!DemandedRHS) {
1725 KnownFPClass Known2;
1726 Register RHS = Shuf.getSrc2Reg();
1727 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2,
1728 Depth + 1);
1729 Known |= Known2;
1730 }
1731 break;
1732 }
1733 case TargetOpcode::COPY: {
1734 Register Src = MI.getOperand(1).getReg();
1735
1736 if (!Src.isVirtual())
1737 return;
1738
1739 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1);
1740 break;
1741 }
1742 }
1743}
1744
1746GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1747 FPClassTest InterestedClasses,
1748 unsigned Depth) {
1749 KnownFPClass KnownClasses;
1750 computeKnownFPClass(R, DemandedElts, InterestedClasses, KnownClasses, Depth);
1751 return KnownClasses;
1752}
1753
1754KnownFPClass GISelValueTracking::computeKnownFPClass(
1755 Register R, FPClassTest InterestedClasses, unsigned Depth) {
1756 KnownFPClass Known;
1757 computeKnownFPClass(R, Known, InterestedClasses, Depth);
1758 return Known;
1759}
1760
1761KnownFPClass GISelValueTracking::computeKnownFPClass(
1762 Register R, const APInt &DemandedElts, uint32_t Flags,
1763 FPClassTest InterestedClasses, unsigned Depth) {
1765 InterestedClasses &= ~fcNan;
1767 InterestedClasses &= ~fcInf;
1768
1769 KnownFPClass Result =
1770 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1771
1773 Result.KnownFPClasses &= ~fcNan;
1775 Result.KnownFPClasses &= ~fcInf;
1776 return Result;
1777}
1778
1779KnownFPClass GISelValueTracking::computeKnownFPClass(
1780 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1781 LLT Ty = MRI.getType(R);
1782 APInt DemandedElts =
1783 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
1784 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1785}
1786
1787/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
1788unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
1789 const APInt &DemandedElts,
1790 unsigned Depth) {
1791 // Test src1 first, since we canonicalize simpler expressions to the RHS.
1792 unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
1793 if (Src1SignBits == 1)
1794 return 1;
1795 return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
1796}
1797
1798/// Compute the known number of sign bits with attached range metadata in the
1799/// memory operand. If this is an extending load, accounts for the behavior of
1800/// the high bits.
1802 unsigned TyBits) {
1803 const MDNode *Ranges = Ld->getRanges();
1804 if (!Ranges)
1805 return 1;
1806
1808 if (TyBits > CR.getBitWidth()) {
1809 switch (Ld->getOpcode()) {
1810 case TargetOpcode::G_SEXTLOAD:
1811 CR = CR.signExtend(TyBits);
1812 break;
1813 case TargetOpcode::G_ZEXTLOAD:
1814 CR = CR.zeroExtend(TyBits);
1815 break;
1816 default:
1817 break;
1818 }
1819 }
1820
1821 return std::min(CR.getSignedMin().getNumSignBits(),
1823}
1824
1826 const APInt &DemandedElts,
1827 unsigned Depth) {
1828 MachineInstr &MI = *MRI.getVRegDef(R);
1829 unsigned Opcode = MI.getOpcode();
1830
1831 if (Opcode == TargetOpcode::G_CONSTANT)
1832 return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
1833
1834 if (Depth == getMaxDepth())
1835 return 1;
1836
1837 if (!DemandedElts)
1838 return 1; // No demanded elts, better to assume we don't know anything.
1839
1840 LLT DstTy = MRI.getType(R);
1841 const unsigned TyBits = DstTy.getScalarSizeInBits();
1842
1843 // Handle the case where this is called on a register that does not have a
1844 // type constraint. This is unlikely to occur except by looking through copies
1845 // but it is possible for the initial register being queried to be in this
1846 // state.
1847 if (!DstTy.isValid())
1848 return 1;
1849
1850 unsigned FirstAnswer = 1;
1851 switch (Opcode) {
1852 case TargetOpcode::COPY: {
1853 MachineOperand &Src = MI.getOperand(1);
1854 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
1855 MRI.getType(Src.getReg()).isValid()) {
1856 // Don't increment Depth for this one since we didn't do any work.
1857 return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
1858 }
1859
1860 return 1;
1861 }
1862 case TargetOpcode::G_SEXT: {
1863 Register Src = MI.getOperand(1).getReg();
1864 LLT SrcTy = MRI.getType(Src);
1865 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
1866 return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
1867 }
1868 case TargetOpcode::G_ASSERT_SEXT:
1869 case TargetOpcode::G_SEXT_INREG: {
1870 // Max of the input and what this extends.
1871 Register Src = MI.getOperand(1).getReg();
1872 unsigned SrcBits = MI.getOperand(2).getImm();
1873 unsigned InRegBits = TyBits - SrcBits + 1;
1874 return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1),
1875 InRegBits);
1876 }
1877 case TargetOpcode::G_LOAD: {
1878 GLoad *Ld = cast<GLoad>(&MI);
1879 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
1880 break;
1881
1882 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1883 }
1884 case TargetOpcode::G_SEXTLOAD: {
1886
1887 // FIXME: We need an in-memory type representation.
1888 if (DstTy.isVector())
1889 return 1;
1890
1891 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1892 if (NumBits != 1)
1893 return NumBits;
1894
1895 // e.g. i16->i32 = '17' bits known.
1896 const MachineMemOperand *MMO = *MI.memoperands_begin();
1897 return TyBits - MMO->getSizeInBits().getValue() + 1;
1898 }
1899 case TargetOpcode::G_ZEXTLOAD: {
1901
1902 // FIXME: We need an in-memory type representation.
1903 if (DstTy.isVector())
1904 return 1;
1905
1906 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1907 if (NumBits != 1)
1908 return NumBits;
1909
1910 // e.g. i16->i32 = '16' bits known.
1911 const MachineMemOperand *MMO = *MI.memoperands_begin();
1912 return TyBits - MMO->getSizeInBits().getValue();
1913 }
1914 case TargetOpcode::G_AND:
1915 case TargetOpcode::G_OR:
1916 case TargetOpcode::G_XOR: {
1917 Register Src1 = MI.getOperand(1).getReg();
1918 unsigned Src1NumSignBits =
1919 computeNumSignBits(Src1, DemandedElts, Depth + 1);
1920 if (Src1NumSignBits != 1) {
1921 Register Src2 = MI.getOperand(2).getReg();
1922 unsigned Src2NumSignBits =
1923 computeNumSignBits(Src2, DemandedElts, Depth + 1);
1924 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
1925 }
1926 break;
1927 }
1928 case TargetOpcode::G_ASHR: {
1929 Register Src1 = MI.getOperand(1).getReg();
1930 Register Src2 = MI.getOperand(2).getReg();
1931 FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1);
1932 if (auto C = getValidMinimumShiftAmount(Src2, DemandedElts, Depth + 1))
1933 FirstAnswer = std::min<uint64_t>(FirstAnswer + *C, TyBits);
1934 break;
1935 }
1936 case TargetOpcode::G_SHL: {
1937 Register Src1 = MI.getOperand(1).getReg();
1938 Register Src2 = MI.getOperand(2).getReg();
1939 if (std::optional<ConstantRange> ShAmtRange =
1940 getValidShiftAmountRange(Src2, DemandedElts, Depth + 1)) {
1941 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
1942 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
1943
1944 MachineInstr &ExtMI = *MRI.getVRegDef(Src1);
1945 unsigned ExtOpc = ExtMI.getOpcode();
1946
1947 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
1948 // shifted out, then we can compute the number of sign bits for the
1949 // operand being extended. A future improvement could be to pass along the
1950 // "shifted left by" information in the recursive calls to
1951 // ComputeKnownSignBits. Allowing us to handle this more generically.
1952 if (ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ZEXT ||
1953 ExtOpc == TargetOpcode::G_ANYEXT) {
1954 LLT ExtTy = MRI.getType(Src1);
1955 Register Extendee = ExtMI.getOperand(1).getReg();
1956 LLT ExtendeeTy = MRI.getType(Extendee);
1957 uint64_t SizeDiff =
1958 ExtTy.getScalarSizeInBits() - ExtendeeTy.getScalarSizeInBits();
1959
1960 if (SizeDiff <= MinShAmt) {
1961 unsigned Tmp =
1962 SizeDiff + computeNumSignBits(Extendee, DemandedElts, Depth + 1);
1963 if (MaxShAmt < Tmp)
1964 return Tmp - MaxShAmt;
1965 }
1966 }
1967 // shl destroys sign bits, ensure it doesn't shift out all sign bits.
1968 unsigned Tmp = computeNumSignBits(Src1, DemandedElts, Depth + 1);
1969 if (MaxShAmt < Tmp)
1970 return Tmp - MaxShAmt;
1971 }
1972 break;
1973 }
1974 case TargetOpcode::G_TRUNC: {
1975 Register Src = MI.getOperand(1).getReg();
1976 LLT SrcTy = MRI.getType(Src);
1977
1978 // Check if the sign bits of source go down as far as the truncated value.
1979 unsigned DstTyBits = DstTy.getScalarSizeInBits();
1980 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
1981 unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
1982 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
1983 return NumSrcSignBits - (NumSrcBits - DstTyBits);
1984 break;
1985 }
1986 case TargetOpcode::G_SELECT: {
1987 return computeNumSignBitsMin(MI.getOperand(2).getReg(),
1988 MI.getOperand(3).getReg(), DemandedElts,
1989 Depth + 1);
1990 }
1991 case TargetOpcode::G_SMIN:
1992 case TargetOpcode::G_SMAX:
1993 case TargetOpcode::G_UMIN:
1994 case TargetOpcode::G_UMAX:
1995 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
1996 return computeNumSignBitsMin(MI.getOperand(1).getReg(),
1997 MI.getOperand(2).getReg(), DemandedElts,
1998 Depth + 1);
1999 case TargetOpcode::G_SADDO:
2000 case TargetOpcode::G_SADDE:
2001 case TargetOpcode::G_UADDO:
2002 case TargetOpcode::G_UADDE:
2003 case TargetOpcode::G_SSUBO:
2004 case TargetOpcode::G_SSUBE:
2005 case TargetOpcode::G_USUBO:
2006 case TargetOpcode::G_USUBE:
2007 case TargetOpcode::G_SMULO:
2008 case TargetOpcode::G_UMULO: {
2009 // If compares returns 0/-1, all bits are sign bits.
2010 // We know that we have an integer-based boolean since these operations
2011 // are only available for integer.
2012 if (MI.getOperand(1).getReg() == R) {
2013 if (TL.getBooleanContents(DstTy.isVector(), false) ==
2015 return TyBits;
2016 }
2017
2018 break;
2019 }
2020 case TargetOpcode::G_SUB: {
2021 Register Src2 = MI.getOperand(2).getReg();
2022 unsigned Src2NumSignBits =
2023 computeNumSignBits(Src2, DemandedElts, Depth + 1);
2024 if (Src2NumSignBits == 1)
2025 return 1; // Early out.
2026
2027 // Handle NEG.
2028 Register Src1 = MI.getOperand(1).getReg();
2029 KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
2030 if (Known1.isZero()) {
2031 KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
2032 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2033 // sign bits set.
2034 if ((Known2.Zero | 1).isAllOnes())
2035 return TyBits;
2036
2037 // If the input is known to be positive (the sign bit is known clear),
2038 // the output of the NEG has, at worst, the same number of sign bits as
2039 // the input.
2040 if (Known2.isNonNegative()) {
2041 FirstAnswer = Src2NumSignBits;
2042 break;
2043 }
2044
2045 // Otherwise, we treat this like a SUB.
2046 }
2047
2048 unsigned Src1NumSignBits =
2049 computeNumSignBits(Src1, DemandedElts, Depth + 1);
2050 if (Src1NumSignBits == 1)
2051 return 1; // Early Out.
2052
2053 // Sub can have at most one carry bit. Thus we know that the output
2054 // is, at worst, one more bit than the inputs.
2055 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
2056 break;
2057 }
2058 case TargetOpcode::G_ADD: {
2059 Register Src2 = MI.getOperand(2).getReg();
2060 unsigned Src2NumSignBits =
2061 computeNumSignBits(Src2, DemandedElts, Depth + 1);
2062 if (Src2NumSignBits <= 2)
2063 return 1; // Early out.
2064
2065 Register Src1 = MI.getOperand(1).getReg();
2066 unsigned Src1NumSignBits =
2067 computeNumSignBits(Src1, DemandedElts, Depth + 1);
2068 if (Src1NumSignBits == 1)
2069 return 1; // Early Out.
2070
2071 // Special case decrementing a value (ADD X, -1):
2072 KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
2073 if (Known2.isAllOnes()) {
2074 KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
2075 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2076 // sign bits set.
2077 if ((Known1.Zero | 1).isAllOnes())
2078 return TyBits;
2079
2080 // If we are subtracting one from a positive number, there is no carry
2081 // out of the result.
2082 if (Known1.isNonNegative()) {
2083 FirstAnswer = Src1NumSignBits;
2084 break;
2085 }
2086
2087 // Otherwise, we treat this like an ADD.
2088 }
2089
2090 // Add can have at most one carry bit. Thus we know that the output
2091 // is, at worst, one more bit than the inputs.
2092 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
2093 break;
2094 }
2095 case TargetOpcode::G_FCMP:
2096 case TargetOpcode::G_ICMP: {
2097 bool IsFP = Opcode == TargetOpcode::G_FCMP;
2098 if (TyBits == 1)
2099 break;
2100 auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
2102 return TyBits; // All bits are sign bits.
2104 return TyBits - 1; // Every always-zero bit is a sign bit.
2105 break;
2106 }
2107 case TargetOpcode::G_BUILD_VECTOR: {
2108 // Collect the known bits that are shared by every demanded vector element.
2109 FirstAnswer = TyBits;
2110 APInt SingleDemandedElt(1, 1);
2111 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
2112 if (!DemandedElts[I])
2113 continue;
2114
2115 unsigned Tmp2 =
2116 computeNumSignBits(MO.getReg(), SingleDemandedElt, Depth + 1);
2117 FirstAnswer = std::min(FirstAnswer, Tmp2);
2118
2119 // If we don't know any bits, early out.
2120 if (FirstAnswer == 1)
2121 break;
2122 }
2123 break;
2124 }
2125 case TargetOpcode::G_CONCAT_VECTORS: {
2126 if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector())
2127 break;
2128 FirstAnswer = TyBits;
2129 // Determine the minimum number of sign bits across all demanded
2130 // elts of the input vectors. Early out if the result is already 1.
2131 unsigned NumSubVectorElts =
2132 MRI.getType(MI.getOperand(1).getReg()).getNumElements();
2133 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
2134 APInt DemandedSub =
2135 DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
2136 if (!DemandedSub)
2137 continue;
2138 unsigned Tmp2 = computeNumSignBits(MO.getReg(), DemandedSub, Depth + 1);
2139
2140 FirstAnswer = std::min(FirstAnswer, Tmp2);
2141
2142 // If we don't know any bits, early out.
2143 if (FirstAnswer == 1)
2144 break;
2145 }
2146 break;
2147 }
2148 case TargetOpcode::G_SHUFFLE_VECTOR: {
2149 // Collect the minimum number of sign bits that are shared by every vector
2150 // element referenced by the shuffle.
2151 APInt DemandedLHS, DemandedRHS;
2152 Register Src1 = MI.getOperand(1).getReg();
2153 unsigned NumElts = MRI.getType(Src1).getNumElements();
2154 if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
2155 DemandedElts, DemandedLHS, DemandedRHS))
2156 return 1;
2157
2158 if (!!DemandedLHS)
2159 FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1);
2160 // If we don't know anything, early out and try computeKnownBits fall-back.
2161 if (FirstAnswer == 1)
2162 break;
2163 if (!!DemandedRHS) {
2164 unsigned Tmp2 =
2165 computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
2166 FirstAnswer = std::min(FirstAnswer, Tmp2);
2167 }
2168 break;
2169 }
2170 case TargetOpcode::G_SPLAT_VECTOR: {
2171 // Check if the sign bits of source go down as far as the truncated value.
2172 Register Src = MI.getOperand(1).getReg();
2173 unsigned NumSrcSignBits = computeNumSignBits(Src, APInt(1, 1), Depth + 1);
2174 unsigned NumSrcBits = MRI.getType(Src).getSizeInBits();
2175 if (NumSrcSignBits > (NumSrcBits - TyBits))
2176 return NumSrcSignBits - (NumSrcBits - TyBits);
2177 break;
2178 }
2179 case TargetOpcode::G_INTRINSIC:
2180 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2181 case TargetOpcode::G_INTRINSIC_CONVERGENT:
2182 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2183 default: {
2184 unsigned NumBits =
2185 TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
2186 if (NumBits > 1)
2187 FirstAnswer = std::max(FirstAnswer, NumBits);
2188 break;
2189 }
2190 }
2191
2192 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2193 // use this information.
2194 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2195 APInt Mask;
2196 if (Known.isNonNegative()) { // sign bit is 0
2197 Mask = Known.Zero;
2198 } else if (Known.isNegative()) { // sign bit is 1;
2199 Mask = Known.One;
2200 } else {
2201 // Nothing known.
2202 return FirstAnswer;
2203 }
2204
2205 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
2206 // the number of identical bits in the top of the input value.
2207 Mask <<= Mask.getBitWidth() - TyBits;
2208 return std::max(FirstAnswer, Mask.countl_one());
2209}
2210
2212 LLT Ty = MRI.getType(R);
2213 APInt DemandedElts =
2214 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
2215 return computeNumSignBits(R, DemandedElts, Depth);
2216}
2217
2219 Register R, const APInt &DemandedElts, unsigned Depth) {
2220 // Shifting more than the bitwidth is not valid.
2221 MachineInstr &MI = *MRI.getVRegDef(R);
2222 unsigned Opcode = MI.getOpcode();
2223
2224 LLT Ty = MRI.getType(R);
2225 unsigned BitWidth = Ty.getScalarSizeInBits();
2226
2227 if (Opcode == TargetOpcode::G_CONSTANT) {
2228 const APInt &ShAmt = MI.getOperand(1).getCImm()->getValue();
2229 if (ShAmt.uge(BitWidth))
2230 return std::nullopt;
2231 return ConstantRange(ShAmt);
2232 }
2233
2234 if (Opcode == TargetOpcode::G_BUILD_VECTOR) {
2235 const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
2236 for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
2237 if (!DemandedElts[I])
2238 continue;
2239 MachineInstr *Op = MRI.getVRegDef(MI.getOperand(I + 1).getReg());
2240 if (Op->getOpcode() != TargetOpcode::G_CONSTANT) {
2241 MinAmt = MaxAmt = nullptr;
2242 break;
2243 }
2244
2245 const APInt &ShAmt = Op->getOperand(1).getCImm()->getValue();
2246 if (ShAmt.uge(BitWidth))
2247 return std::nullopt;
2248 if (!MinAmt || MinAmt->ugt(ShAmt))
2249 MinAmt = &ShAmt;
2250 if (!MaxAmt || MaxAmt->ult(ShAmt))
2251 MaxAmt = &ShAmt;
2252 }
2253 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
2254 "Failed to find matching min/max shift amounts");
2255 if (MinAmt && MaxAmt)
2256 return ConstantRange(*MinAmt, *MaxAmt + 1);
2257 }
2258
2259 // Use computeKnownBits to find a hidden constant/knownbits (usually type
2260 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
2261 KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
2262 if (KnownAmt.getMaxValue().ult(BitWidth))
2263 return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false);
2264
2265 return std::nullopt;
2266}
2267
2269 Register R, const APInt &DemandedElts, unsigned Depth) {
2270 if (std::optional<ConstantRange> AmtRange =
2271 getValidShiftAmountRange(R, DemandedElts, Depth))
2272 return AmtRange->getUnsignedMin().getZExtValue();
2273 return std::nullopt;
2274}
2275
2281
2286
2288 if (!Info) {
2289 unsigned MaxDepth =
2291 Info = std::make_unique<GISelValueTracking>(MF, MaxDepth);
2292 }
2293 return *Info;
2294}
2295
2296AnalysisKey GISelValueTrackingAnalysis::Key;
2297
2303
2307 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(MF);
2308 const auto &MRI = MF.getRegInfo();
2309 OS << "name: ";
2310 MF.getFunction().printAsOperand(OS, /*PrintType=*/false);
2311 OS << '\n';
2312
2313 for (MachineBasicBlock &BB : MF) {
2314 for (MachineInstr &MI : BB) {
2315 for (MachineOperand &MO : MI.defs()) {
2316 if (!MO.isReg() || MO.getReg().isPhysical())
2317 continue;
2318 Register Reg = MO.getReg();
2319 if (!MRI.getType(Reg).isValid())
2320 continue;
2321 KnownBits Known = VTA.getKnownBits(Reg);
2322 unsigned SignedBits = VTA.computeNumSignBits(Reg);
2323 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2324 << '\n';
2325 };
2326 }
2327 }
2328 return PreservedAnalyses::all();
2329}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
static void dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth)
static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, unsigned TyBits)
Compute the known number of sign bits with attached range metadata in the memory operand.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty)
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
Value * RHS
Value * LHS
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:340
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
unsigned getNumSignBits() const
Computes the number of leading bits of this APInt that are equal to its sign bit.
Definition APInt.h:1629
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1436
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
void setAllBits()
Set every bit to 1.
Definition APInt.h:1320
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
This class represents a range of values.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
LLVM_ABI ConstantRange zeroExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
LLVM_ABI APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
LLVM_ABI ConstantRange signExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
LLVM_ABI APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
Represents any generic load, including sign/zero extending variants.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Represents an extract vector element.
static LLVM_ABI std::optional< GFConstant > getConstant(Register Const, const MachineRegisterInfo &MRI)
Definition Utils.cpp:2104
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
GISelValueTracking & get(MachineFunction &MF)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
KnownBits getKnownBits(Register R)
Align computeKnownAlignment(Register R, unsigned Depth=0)
std::optional< ConstantRange > getValidShiftAmountRange(Register R, const APInt &DemandedElts, unsigned Depth)
If a G_SHL/G_ASHR/G_LSHR node with shift operand R has shift amounts that are all less than the eleme...
bool maskedValueIsZero(Register Val, const APInt &Mask)
std::optional< uint64_t > getValidMinimumShiftAmount(Register R, const APInt &DemandedElts, unsigned Depth=0)
If a G_SHL/G_ASHR/G_LSHR node with shift operand R has shift amounts that are all less than the eleme...
const DataLayout & getDataLayout() const
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(MachineInstr &MI)
void computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth=0)
Represents a G_LOAD.
Represents a G_SEXTLOAD.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
ArrayRef< int > getMask() const
Represents a G_ZEXTLOAD.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
TypeSize getValue() const
Metadata node.
Definition Metadata.h:1078
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
const MDNode * getRanges() const
Return the range tag for the memory reference.
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
operand_type_match m_Reg()
operand_type_match m_Pred()
bind_ty< FPClassTest > m_FPClassTest(FPClassTest &T)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
ClassifyOp_match< LHS, Test, TargetOpcode::G_IS_FPCLASS > m_GIsFPClass(const LHS &L, const Test &T)
Matches the register and immediate used in a fpclass test G_IS_FPCLASS val, 96.
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:295
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2494
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1516
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
std::tuple< Value *, FPClassTest, FPClassTest > fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, FPClassTest RHSClass, bool LookThroughSrc=true)
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
constexpr unsigned MaxAnalysisRecursionDepth
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
DWARFExpression::Operation Op
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static uint32_t extractBits(uint64_t Val, uint32_t Hi, uint32_t Lo)
LLVM_ABI void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known)
Compute known bits from the range metadata.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
constexpr bool outputsAreZero() const
Return true if output denormals should be flushed to 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
static constexpr DenormalMode getIEEE()
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:301
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
LLVM_ABI KnownBits sextInReg(unsigned SrcBitWidth) const
Return known bits for a in-register sign extension of the value we're tracking.
static LLVM_ABI KnownBits mulhu(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits from zero-extended multiply-hi.
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
KnownBits byteSwap() const
Definition KnownBits.h:514
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:289
KnownBits reverseBits() const
Definition KnownBits.h:518
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const
Return a subset of the known bits from [bitPosition,bitPosition+numBits).
Definition KnownBits.h:225
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
Definition KnownBits.h:196
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits from sign-extended multiply-hi.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
Definition KnownBits.h:129
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static KnownBits sub(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from subtraction of LHS and RHS.
Definition KnownBits.h:353
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
void insertBits(const KnownBits &SubBits, unsigned BitPosition)
Insert the bits from a smaller known bits starting at bitPosition.
Definition KnownBits.h:219
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:167
LLVM_ABI KnownBits abs(bool IntMinIsPoison=false) const
Compute known bits for the absolute value.
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
bool isKnownNeverInfinity() const
Return true if it's known this can never be an infinity.
bool cannotBeOrderedGreaterThanZero() const
Return true if we can prove that the analyzed floating-point value is either NaN or never greater tha...
static constexpr FPClassTest OrderedGreaterThanZeroMask
static constexpr FPClassTest OrderedLessThanZeroMask
void knownNot(FPClassTest RuleOut)
void copysign(const KnownFPClass &Sign)
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.
LLVM_ABI bool isKnownNeverLogicalZero(DenormalMode Mode) const
Return true if it's know this can never be interpreted as a zero.
bool isUnknown() const
bool isKnownNeverPosZero() const
Return true if it's known this can never be a literal positive zero.
std::optional< bool > SignBit
std::nullopt if the sign bit is unknown, true if the sign bit is definitely set or false if the sign ...
bool isKnownNeverNaN() const
Return true if it's known this can never be a nan.
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
bool isKnownNeverNegZero() const
Return true if it's known this can never be a negative zero.
void propagateNaN(const KnownFPClass &Src, bool PreserveSign=false)
bool cannotBeOrderedLessThanZero() const
Return true if we can prove that the analyzed floating-point value is either NaN or never less than -...
void signBitMustBeOne()
Assume the sign bit is one.
void signBitMustBeZero()
Assume the sign bit is zero.
LLVM_ABI bool isKnownNeverLogicalPosZero(DenormalMode Mode) const
Return true if it's know this can never be interpreted as a positive zero.
bool isKnownNeverPosInfinity() const
Return true if it's known this can never be +infinity.
LLVM_ABI bool isKnownNeverLogicalNegZero(DenormalMode Mode) const
Return true if it's know this can never be interpreted as a negative zero.