LLVM 17.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
24#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/LLVMContext.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCExpr.h"
35#include <cctype>
36using namespace llvm;
37
38/// NOTE: The TargetMachine owns TLOF.
40 : TargetLoweringBase(tm) {}
41
42const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43 return nullptr;
44}
45
48}
49
50/// Check whether a given call node is in tail position within its function. If
51/// so, it sets Chain to the input chain of the tail call.
53 SDValue &Chain) const {
55
56 // First, check if tail calls have been disabled in this function.
57 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
58 return false;
59
60 // Conservatively require the attributes of the call to match those of
61 // the return. Ignore following attributes because they don't affect the
62 // call sequence.
63 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
64 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
65 Attribute::DereferenceableOrNull, Attribute::NoAlias,
66 Attribute::NonNull, Attribute::NoUndef})
67 CallerAttrs.removeAttribute(Attr);
68
69 if (CallerAttrs.hasAttributes())
70 return false;
71
72 // It's not safe to eliminate the sign / zero extension of the return value.
73 if (CallerAttrs.contains(Attribute::ZExt) ||
74 CallerAttrs.contains(Attribute::SExt))
75 return false;
76
77 // Check if the only use is a function return node.
78 return isUsedByReturnOnly(Node, Chain);
79}
80
82 const uint32_t *CallerPreservedMask,
83 const SmallVectorImpl<CCValAssign> &ArgLocs,
84 const SmallVectorImpl<SDValue> &OutVals) const {
85 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
86 const CCValAssign &ArgLoc = ArgLocs[I];
87 if (!ArgLoc.isRegLoc())
88 continue;
89 MCRegister Reg = ArgLoc.getLocReg();
90 // Only look at callee saved registers.
91 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
92 continue;
93 // Check that we pass the value used for the caller.
94 // (We look for a CopyFromReg reading a virtual register that is used
95 // for the function live-in value of register Reg)
96 SDValue Value = OutVals[I];
97 if (Value->getOpcode() == ISD::AssertZext)
98 Value = Value.getOperand(0);
99 if (Value->getOpcode() != ISD::CopyFromReg)
100 return false;
101 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
102 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
103 return false;
104 }
105 return true;
106}
107
108/// Set CallLoweringInfo attribute flags based on a call instruction
109/// and called function attributes.
111 unsigned ArgIdx) {
112 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
113 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
114 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
115 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
116 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
117 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
118 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
119 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
120 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
121 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
122 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
123 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
124 Alignment = Call->getParamStackAlign(ArgIdx);
125 IndirectType = nullptr;
127 "multiple ABI attributes?");
128 if (IsByVal) {
129 IndirectType = Call->getParamByValType(ArgIdx);
130 if (!Alignment)
131 Alignment = Call->getParamAlign(ArgIdx);
132 }
133 if (IsPreallocated)
134 IndirectType = Call->getParamPreallocatedType(ArgIdx);
135 if (IsInAlloca)
136 IndirectType = Call->getParamInAllocaType(ArgIdx);
137 if (IsSRet)
138 IndirectType = Call->getParamStructRetType(ArgIdx);
139}
140
141/// Generate a libcall taking the given operands as arguments and returning a
142/// result of type RetVT.
143std::pair<SDValue, SDValue>
146 MakeLibCallOptions CallOptions,
147 const SDLoc &dl,
148 SDValue InChain) const {
149 if (!InChain)
150 InChain = DAG.getEntryNode();
151
153 Args.reserve(Ops.size());
154
156 for (unsigned i = 0; i < Ops.size(); ++i) {
157 SDValue NewOp = Ops[i];
158 Entry.Node = NewOp;
159 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
160 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
161 CallOptions.IsSExt);
162 Entry.IsZExt = !Entry.IsSExt;
163
164 if (CallOptions.IsSoften &&
166 Entry.IsSExt = Entry.IsZExt = false;
167 }
168 Args.push_back(Entry);
169 }
170
171 if (LC == RTLIB::UNKNOWN_LIBCALL)
172 report_fatal_error("Unsupported library call operation!");
175
176 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
178 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
179 bool zeroExtend = !signExtend;
180
181 if (CallOptions.IsSoften &&
183 signExtend = zeroExtend = false;
184 }
185
186 CLI.setDebugLoc(dl)
187 .setChain(InChain)
188 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
189 .setNoReturn(CallOptions.DoesNotReturn)
192 .setSExtResult(signExtend)
193 .setZExtResult(zeroExtend);
194 return LowerCallTo(CLI);
195}
196
198 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
199 unsigned SrcAS, const AttributeList &FuncAttributes) const {
200 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
201 Op.getSrcAlign() < Op.getDstAlign())
202 return false;
203
204 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
205
206 if (VT == MVT::Other) {
207 // Use the largest integer type whose alignment constraints are satisfied.
208 // We only need to check DstAlign here as SrcAlign is always greater or
209 // equal to DstAlign (or zero).
210 VT = MVT::i64;
211 if (Op.isFixedDstAlign())
212 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
213 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
215 assert(VT.isInteger());
216
217 // Find the largest legal integer type.
218 MVT LVT = MVT::i64;
219 while (!isTypeLegal(LVT))
220 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
221 assert(LVT.isInteger());
222
223 // If the type we've chosen is larger than the largest legal integer type
224 // then use that instead.
225 if (VT.bitsGT(LVT))
226 VT = LVT;
227 }
228
229 unsigned NumMemOps = 0;
230 uint64_t Size = Op.size();
231 while (Size) {
232 unsigned VTSize = VT.getSizeInBits() / 8;
233 while (VTSize > Size) {
234 // For now, only use non-vector load / store's for the left-over pieces.
235 EVT NewVT = VT;
236 unsigned NewVTSize;
237
238 bool Found = false;
239 if (VT.isVector() || VT.isFloatingPoint()) {
240 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
243 Found = true;
244 else if (NewVT == MVT::i64 &&
247 // i64 is usually not legal on 32-bit targets, but f64 may be.
248 NewVT = MVT::f64;
249 Found = true;
250 }
251 }
252
253 if (!Found) {
254 do {
255 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
256 if (NewVT == MVT::i8)
257 break;
258 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
259 }
260 NewVTSize = NewVT.getSizeInBits() / 8;
261
262 // If the new VT cannot cover all of the remaining bits, then consider
263 // issuing a (or a pair of) unaligned and overlapping load / store.
264 unsigned Fast;
265 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
269 Fast)
270 VTSize = Size;
271 else {
272 VT = NewVT;
273 VTSize = NewVTSize;
274 }
275 }
276
277 if (++NumMemOps > Limit)
278 return false;
279
280 MemOps.push_back(VT);
281 Size -= VTSize;
282 }
283
284 return true;
285}
286
287/// Soften the operands of a comparison. This code is shared among BR_CC,
288/// SELECT_CC, and SETCC handlers.
290 SDValue &NewLHS, SDValue &NewRHS,
291 ISD::CondCode &CCCode,
292 const SDLoc &dl, const SDValue OldLHS,
293 const SDValue OldRHS) const {
294 SDValue Chain;
295 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
296 OldRHS, Chain);
297}
298
300 SDValue &NewLHS, SDValue &NewRHS,
301 ISD::CondCode &CCCode,
302 const SDLoc &dl, const SDValue OldLHS,
303 const SDValue OldRHS,
304 SDValue &Chain,
305 bool IsSignaling) const {
306 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
307 // not supporting it. We can update this code when libgcc provides such
308 // functions.
309
310 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
311 && "Unsupported setcc type!");
312
313 // Expand into one or more soft-fp libcall(s).
314 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
315 bool ShouldInvertCC = false;
316 switch (CCCode) {
317 case ISD::SETEQ:
318 case ISD::SETOEQ:
319 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
320 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
321 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
322 break;
323 case ISD::SETNE:
324 case ISD::SETUNE:
325 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
326 (VT == MVT::f64) ? RTLIB::UNE_F64 :
327 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
328 break;
329 case ISD::SETGE:
330 case ISD::SETOGE:
331 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
332 (VT == MVT::f64) ? RTLIB::OGE_F64 :
333 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
334 break;
335 case ISD::SETLT:
336 case ISD::SETOLT:
337 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
338 (VT == MVT::f64) ? RTLIB::OLT_F64 :
339 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
340 break;
341 case ISD::SETLE:
342 case ISD::SETOLE:
343 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
344 (VT == MVT::f64) ? RTLIB::OLE_F64 :
345 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
346 break;
347 case ISD::SETGT:
348 case ISD::SETOGT:
349 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
350 (VT == MVT::f64) ? RTLIB::OGT_F64 :
351 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
352 break;
353 case ISD::SETO:
354 ShouldInvertCC = true;
355 [[fallthrough]];
356 case ISD::SETUO:
357 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
358 (VT == MVT::f64) ? RTLIB::UO_F64 :
359 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
360 break;
361 case ISD::SETONE:
362 // SETONE = O && UNE
363 ShouldInvertCC = true;
364 [[fallthrough]];
365 case ISD::SETUEQ:
366 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
367 (VT == MVT::f64) ? RTLIB::UO_F64 :
368 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
369 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
370 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
371 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
372 break;
373 default:
374 // Invert CC for unordered comparisons
375 ShouldInvertCC = true;
376 switch (CCCode) {
377 case ISD::SETULT:
378 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
379 (VT == MVT::f64) ? RTLIB::OGE_F64 :
380 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
381 break;
382 case ISD::SETULE:
383 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
384 (VT == MVT::f64) ? RTLIB::OGT_F64 :
385 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
386 break;
387 case ISD::SETUGT:
388 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
389 (VT == MVT::f64) ? RTLIB::OLE_F64 :
390 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
391 break;
392 case ISD::SETUGE:
393 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
394 (VT == MVT::f64) ? RTLIB::OLT_F64 :
395 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
396 break;
397 default: llvm_unreachable("Do not know how to soften this setcc!");
398 }
399 }
400
401 // Use the target specific return value for comparison lib calls.
403 SDValue Ops[2] = {NewLHS, NewRHS};
405 EVT OpsVT[2] = { OldLHS.getValueType(),
406 OldRHS.getValueType() };
407 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
408 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
409 NewLHS = Call.first;
410 NewRHS = DAG.getConstant(0, dl, RetVT);
411
412 CCCode = getCmpLibcallCC(LC1);
413 if (ShouldInvertCC) {
414 assert(RetVT.isInteger());
415 CCCode = getSetCCInverse(CCCode, RetVT);
416 }
417
418 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
419 // Update Chain.
420 Chain = Call.second;
421 } else {
422 EVT SetCCVT =
423 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
424 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
425 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
426 CCCode = getCmpLibcallCC(LC2);
427 if (ShouldInvertCC)
428 CCCode = getSetCCInverse(CCCode, RetVT);
429 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
430 if (Chain)
431 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
432 Call2.second);
433 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
434 Tmp.getValueType(), Tmp, NewLHS);
435 NewRHS = SDValue();
436 }
437}
438
439/// Return the entry encoding for a jump table in the current function. The
440/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442 // In non-pic modes, just use the address of a block.
443 if (!isPositionIndependent())
445
446 // In PIC mode, if the target supports a GPRel32 directive, use it.
447 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449
450 // Otherwise, use a label difference.
452}
453
455 SelectionDAG &DAG) const {
456 // If our PIC model is GP relative, use the global offset table as the base.
457 unsigned JTEncoding = getJumpTableEncoding();
458
462
463 return Table;
464}
465
466/// This returns the relocation base for the given PIC jumptable, the same as
467/// getPICJumpTableRelocBase, but as an MCExpr.
468const MCExpr *
470 unsigned JTI,MCContext &Ctx) const{
471 // The normal PIC reloc base is the label at the start of the jump table.
472 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
473}
474
475bool
477 const TargetMachine &TM = getTargetMachine();
478 const GlobalValue *GV = GA->getGlobal();
479
480 // If the address is not even local to this DSO we will have to load it from
481 // a got and then add the offset.
482 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
483 return false;
484
485 // If the code is position independent we will have to add a base register.
486 if (isPositionIndependent())
487 return false;
488
489 // Otherwise we can do it.
490 return true;
491}
492
493//===----------------------------------------------------------------------===//
494// Optimization Methods
495//===----------------------------------------------------------------------===//
496
497/// If the specified instruction has a constant integer operand and there are
498/// bits set in that constant that are not demanded, then clear those bits and
499/// return true.
501 const APInt &DemandedBits,
502 const APInt &DemandedElts,
503 TargetLoweringOpt &TLO) const {
504 SDLoc DL(Op);
505 unsigned Opcode = Op.getOpcode();
506
507 // Do target-specific constant optimization.
508 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
509 return TLO.New.getNode();
510
511 // FIXME: ISD::SELECT, ISD::SELECT_CC
512 switch (Opcode) {
513 default:
514 break;
515 case ISD::XOR:
516 case ISD::AND:
517 case ISD::OR: {
518 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
519 if (!Op1C || Op1C->isOpaque())
520 return false;
521
522 // If this is a 'not' op, don't touch it because that's a canonical form.
523 const APInt &C = Op1C->getAPIntValue();
524 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
525 return false;
526
527 if (!C.isSubsetOf(DemandedBits)) {
528 EVT VT = Op.getValueType();
529 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
530 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
531 return TLO.CombineTo(Op, NewOp);
532 }
533
534 break;
535 }
536 }
537
538 return false;
539}
540
542 const APInt &DemandedBits,
543 TargetLoweringOpt &TLO) const {
544 EVT VT = Op.getValueType();
545 APInt DemandedElts = VT.isVector()
547 : APInt(1, 1);
548 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
549}
550
551/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
552/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
553/// generalized for targets with other types of implicit widening casts.
555 const APInt &DemandedBits,
556 TargetLoweringOpt &TLO) const {
557 assert(Op.getNumOperands() == 2 &&
558 "ShrinkDemandedOp only supports binary operators!");
559 assert(Op.getNode()->getNumValues() == 1 &&
560 "ShrinkDemandedOp only supports nodes with one result!");
561
562 EVT VT = Op.getValueType();
563 SelectionDAG &DAG = TLO.DAG;
564 SDLoc dl(Op);
565
566 // Early return, as this function cannot handle vector types.
567 if (VT.isVector())
568 return false;
569
570 // Don't do this if the node has another user, which may require the
571 // full value.
572 if (!Op.getNode()->hasOneUse())
573 return false;
574
575 // Search for the smallest integer type with free casts to and from
576 // Op's type. For expedience, just check power-of-2 integer types.
577 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
578 unsigned DemandedSize = DemandedBits.getActiveBits();
579 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
580 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
581 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
582 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
583 // We found a type with free casts.
584 SDValue X = DAG.getNode(
585 Op.getOpcode(), dl, SmallVT,
586 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
587 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
588 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
589 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
590 return TLO.CombineTo(Op, Z);
591 }
592 }
593 return false;
594}
595
597 DAGCombinerInfo &DCI) const {
598 SelectionDAG &DAG = DCI.DAG;
599 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
600 !DCI.isBeforeLegalizeOps());
601 KnownBits Known;
602
603 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
604 if (Simplified) {
605 DCI.AddToWorklist(Op.getNode());
607 }
608 return Simplified;
609}
610
612 const APInt &DemandedElts,
613 DAGCombinerInfo &DCI) const {
614 SelectionDAG &DAG = DCI.DAG;
615 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
616 !DCI.isBeforeLegalizeOps());
617 KnownBits Known;
618
619 bool Simplified =
620 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
621 if (Simplified) {
622 DCI.AddToWorklist(Op.getNode());
624 }
625 return Simplified;
626}
627
629 KnownBits &Known,
631 unsigned Depth,
632 bool AssumeSingleUse) const {
633 EVT VT = Op.getValueType();
634
635 // Since the number of lanes in a scalable vector is unknown at compile time,
636 // we track one bit which is implicitly broadcast to all lanes. This means
637 // that all lanes in a scalable vector are considered demanded.
638 APInt DemandedElts = VT.isFixedLengthVector()
640 : APInt(1, 1);
641 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
642 AssumeSingleUse);
643}
644
645// TODO: Under what circumstances can we create nodes? Constant folding?
647 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
648 SelectionDAG &DAG, unsigned Depth) const {
649 EVT VT = Op.getValueType();
650
651 // Limit search depth.
653 return SDValue();
654
655 // Ignore UNDEFs.
656 if (Op.isUndef())
657 return SDValue();
658
659 // Not demanding any bits/elts from Op.
660 if (DemandedBits == 0 || DemandedElts == 0)
661 return DAG.getUNDEF(VT);
662
663 bool IsLE = DAG.getDataLayout().isLittleEndian();
664 unsigned NumElts = DemandedElts.getBitWidth();
665 unsigned BitWidth = DemandedBits.getBitWidth();
666 KnownBits LHSKnown, RHSKnown;
667 switch (Op.getOpcode()) {
668 case ISD::BITCAST: {
669 if (VT.isScalableVector())
670 return SDValue();
671
672 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
673 EVT SrcVT = Src.getValueType();
674 EVT DstVT = Op.getValueType();
675 if (SrcVT == DstVT)
676 return Src;
677
678 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
679 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
680 if (NumSrcEltBits == NumDstEltBits)
681 if (SDValue V = SimplifyMultipleUseDemandedBits(
682 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
683 return DAG.getBitcast(DstVT, V);
684
685 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
686 unsigned Scale = NumDstEltBits / NumSrcEltBits;
687 unsigned NumSrcElts = SrcVT.getVectorNumElements();
688 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
689 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
690 for (unsigned i = 0; i != Scale; ++i) {
691 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
692 unsigned BitOffset = EltOffset * NumSrcEltBits;
693 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
694 if (!Sub.isZero()) {
695 DemandedSrcBits |= Sub;
696 for (unsigned j = 0; j != NumElts; ++j)
697 if (DemandedElts[j])
698 DemandedSrcElts.setBit((j * Scale) + i);
699 }
700 }
701
702 if (SDValue V = SimplifyMultipleUseDemandedBits(
703 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
704 return DAG.getBitcast(DstVT, V);
705 }
706
707 // TODO - bigendian once we have test coverage.
708 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
709 unsigned Scale = NumSrcEltBits / NumDstEltBits;
710 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
711 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
712 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
713 for (unsigned i = 0; i != NumElts; ++i)
714 if (DemandedElts[i]) {
715 unsigned Offset = (i % Scale) * NumDstEltBits;
716 DemandedSrcBits.insertBits(DemandedBits, Offset);
717 DemandedSrcElts.setBit(i / Scale);
718 }
719
720 if (SDValue V = SimplifyMultipleUseDemandedBits(
721 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
722 return DAG.getBitcast(DstVT, V);
723 }
724
725 break;
726 }
727 case ISD::AND: {
728 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
729 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
730
731 // If all of the demanded bits are known 1 on one side, return the other.
732 // These bits cannot contribute to the result of the 'and' in this
733 // context.
734 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
735 return Op.getOperand(0);
736 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
737 return Op.getOperand(1);
738 break;
739 }
740 case ISD::OR: {
741 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
742 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
743
744 // If all of the demanded bits are known zero on one side, return the
745 // other. These bits cannot contribute to the result of the 'or' in this
746 // context.
747 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
748 return Op.getOperand(0);
749 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
750 return Op.getOperand(1);
751 break;
752 }
753 case ISD::XOR: {
754 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
755 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
756
757 // If all of the demanded bits are known zero on one side, return the
758 // other.
759 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
760 return Op.getOperand(0);
761 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
762 return Op.getOperand(1);
763 break;
764 }
765 case ISD::SHL: {
766 // If we are only demanding sign bits then we can use the shift source
767 // directly.
768 if (const APInt *MaxSA =
769 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
770 SDValue Op0 = Op.getOperand(0);
771 unsigned ShAmt = MaxSA->getZExtValue();
772 unsigned NumSignBits =
773 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
774 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
775 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
776 return Op0;
777 }
778 break;
779 }
780 case ISD::SETCC: {
781 SDValue Op0 = Op.getOperand(0);
782 SDValue Op1 = Op.getOperand(1);
783 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
784 // If (1) we only need the sign-bit, (2) the setcc operands are the same
785 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
786 // -1, we may be able to bypass the setcc.
787 if (DemandedBits.isSignMask() &&
791 // If we're testing X < 0, then this compare isn't needed - just use X!
792 // FIXME: We're limiting to integer types here, but this should also work
793 // if we don't care about FP signed-zero. The use of SETLT with FP means
794 // that we don't care about NaNs.
795 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
797 return Op0;
798 }
799 break;
800 }
802 // If none of the extended bits are demanded, eliminate the sextinreg.
803 SDValue Op0 = Op.getOperand(0);
804 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
805 unsigned ExBits = ExVT.getScalarSizeInBits();
806 if (DemandedBits.getActiveBits() <= ExBits &&
808 return Op0;
809 // If the input is already sign extended, just drop the extension.
810 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
811 if (NumSignBits >= (BitWidth - ExBits + 1))
812 return Op0;
813 break;
814 }
818 if (VT.isScalableVector())
819 return SDValue();
820
821 // If we only want the lowest element and none of extended bits, then we can
822 // return the bitcasted source vector.
823 SDValue Src = Op.getOperand(0);
824 EVT SrcVT = Src.getValueType();
825 EVT DstVT = Op.getValueType();
826 if (IsLE && DemandedElts == 1 &&
827 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
828 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
829 return DAG.getBitcast(DstVT, Src);
830 }
831 break;
832 }
834 if (VT.isScalableVector())
835 return SDValue();
836
837 // If we don't demand the inserted element, return the base vector.
838 SDValue Vec = Op.getOperand(0);
839 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
840 EVT VecVT = Vec.getValueType();
841 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
842 !DemandedElts[CIdx->getZExtValue()])
843 return Vec;
844 break;
845 }
847 if (VT.isScalableVector())
848 return SDValue();
849
850 SDValue Vec = Op.getOperand(0);
851 SDValue Sub = Op.getOperand(1);
852 uint64_t Idx = Op.getConstantOperandVal(2);
853 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
854 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
855 // If we don't demand the inserted subvector, return the base vector.
856 if (DemandedSubElts == 0)
857 return Vec;
858 break;
859 }
860 case ISD::VECTOR_SHUFFLE: {
862 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
863
864 // If all the demanded elts are from one operand and are inline,
865 // then we can use the operand directly.
866 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
867 for (unsigned i = 0; i != NumElts; ++i) {
868 int M = ShuffleMask[i];
869 if (M < 0 || !DemandedElts[i])
870 continue;
871 AllUndef = false;
872 IdentityLHS &= (M == (int)i);
873 IdentityRHS &= ((M - NumElts) == i);
874 }
875
876 if (AllUndef)
877 return DAG.getUNDEF(Op.getValueType());
878 if (IdentityLHS)
879 return Op.getOperand(0);
880 if (IdentityRHS)
881 return Op.getOperand(1);
882 break;
883 }
884 default:
885 // TODO: Probably okay to remove after audit; here to reduce change size
886 // in initial enablement patch for scalable vectors
887 if (VT.isScalableVector())
888 return SDValue();
889
890 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
891 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
892 Op, DemandedBits, DemandedElts, DAG, Depth))
893 return V;
894 break;
895 }
896 return SDValue();
897}
898
900 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
901 unsigned Depth) const {
902 EVT VT = Op.getValueType();
903 // Since the number of lanes in a scalable vector is unknown at compile time,
904 // we track one bit which is implicitly broadcast to all lanes. This means
905 // that all lanes in a scalable vector are considered demanded.
906 APInt DemandedElts = VT.isFixedLengthVector()
908 : APInt(1, 1);
909 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
910 Depth);
911}
912
914 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
915 unsigned Depth) const {
916 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
917 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
918 Depth);
919}
920
921// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
922// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
924 const TargetLowering &TLI,
925 const APInt &DemandedBits,
926 const APInt &DemandedElts,
927 unsigned Depth) {
928 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
929 "SRL or SRA node is required here!");
930 // Is the right shift using an immediate value of 1?
931 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
932 if (!N1C || !N1C->isOne())
933 return SDValue();
934
935 // We are looking for an avgfloor
936 // add(ext, ext)
937 // or one of these as a avgceil
938 // add(add(ext, ext), 1)
939 // add(add(ext, 1), ext)
940 // add(ext, add(ext, 1))
941 SDValue Add = Op.getOperand(0);
942 if (Add.getOpcode() != ISD::ADD)
943 return SDValue();
944
945 SDValue ExtOpA = Add.getOperand(0);
946 SDValue ExtOpB = Add.getOperand(1);
947 SDValue Add2;
948 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
949 ConstantSDNode *ConstOp;
950 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
951 ConstOp->isOne()) {
952 ExtOpA = Op1;
953 ExtOpB = Op3;
954 Add2 = A;
955 return true;
956 }
957 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
958 ConstOp->isOne()) {
959 ExtOpA = Op1;
960 ExtOpB = Op2;
961 Add2 = A;
962 return true;
963 }
964 return false;
965 };
966 bool IsCeil =
967 (ExtOpA.getOpcode() == ISD::ADD &&
968 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
969 (ExtOpB.getOpcode() == ISD::ADD &&
970 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
971
972 // If the shift is signed (sra):
973 // - Needs >= 2 sign bit for both operands.
974 // - Needs >= 2 zero bits.
975 // If the shift is unsigned (srl):
976 // - Needs >= 1 zero bit for both operands.
977 // - Needs 1 demanded bit zero and >= 2 sign bits.
978 unsigned ShiftOpc = Op.getOpcode();
979 bool IsSigned = false;
980 unsigned KnownBits;
981 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
982 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
983 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
984 unsigned NumZeroA =
985 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
986 unsigned NumZeroB =
987 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
988 unsigned NumZero = std::min(NumZeroA, NumZeroB);
989
990 switch (ShiftOpc) {
991 default:
992 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
993 case ISD::SRA: {
994 if (NumZero >= 2 && NumSigned < NumZero) {
995 IsSigned = false;
996 KnownBits = NumZero;
997 break;
998 }
999 if (NumSigned >= 1) {
1000 IsSigned = true;
1001 KnownBits = NumSigned;
1002 break;
1003 }
1004 return SDValue();
1005 }
1006 case ISD::SRL: {
1007 if (NumZero >= 1 && NumSigned < NumZero) {
1008 IsSigned = false;
1009 KnownBits = NumZero;
1010 break;
1011 }
1012 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1013 IsSigned = true;
1014 KnownBits = NumSigned;
1015 break;
1016 }
1017 return SDValue();
1018 }
1019 }
1020
1021 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1022 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1023
1024 // Find the smallest power-2 type that is legal for this vector size and
1025 // operation, given the original type size and the number of known sign/zero
1026 // bits.
1027 EVT VT = Op.getValueType();
1028 unsigned MinWidth =
1029 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1030 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1031 if (VT.isVector())
1032 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1033 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1034 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1035 // larger type size to do the transform.
1036 if (((!IsSigned && Add->getFlags().hasNoUnsignedWrap() &&
1037 (!Add2 || Add2->getFlags().hasNoUnsignedWrap())) ||
1038 (IsSigned && Add->getFlags().hasNoSignedWrap() &&
1039 (!Add2 || Add2->getFlags().hasNoSignedWrap()))) &&
1040 TLI.isOperationLegalOrCustom(AVGOpc, VT)) {
1041 NVT = VT;
1042 } else
1043 return SDValue();
1044 }
1045
1046 SDLoc DL(Op);
1047 SDValue ResultAVG =
1048 DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1049 DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1050 return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1051 ResultAVG);
1052}
1053
1054/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1055/// result of Op are ever used downstream. If we can use this information to
1056/// simplify Op, create a new simplified DAG node and return true, returning the
1057/// original and new nodes in Old and New. Otherwise, analyze the expression and
1058/// return a mask of Known bits for the expression (used to simplify the
1059/// caller). The Known bits may only be accurate for those bits in the
1060/// OriginalDemandedBits and OriginalDemandedElts.
1062 SDValue Op, const APInt &OriginalDemandedBits,
1063 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1064 unsigned Depth, bool AssumeSingleUse) const {
1065 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1066 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1067 "Mask size mismatches value type size!");
1068
1069 // Don't know anything.
1070 Known = KnownBits(BitWidth);
1071
1072 EVT VT = Op.getValueType();
1073 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1074 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1075 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1076 "Unexpected vector size");
1077
1078 APInt DemandedBits = OriginalDemandedBits;
1079 APInt DemandedElts = OriginalDemandedElts;
1080 SDLoc dl(Op);
1081 auto &DL = TLO.DAG.getDataLayout();
1082
1083 // Undef operand.
1084 if (Op.isUndef())
1085 return false;
1086
1087 // We can't simplify target constants.
1088 if (Op.getOpcode() == ISD::TargetConstant)
1089 return false;
1090
1091 if (Op.getOpcode() == ISD::Constant) {
1092 // We know all of the bits for a constant!
1093 Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1094 return false;
1095 }
1096
1097 if (Op.getOpcode() == ISD::ConstantFP) {
1098 // We know all of the bits for a floating point constant!
1100 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1101 return false;
1102 }
1103
1104 // Other users may use these bits.
1105 bool HasMultiUse = false;
1106 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1108 // Limit search depth.
1109 return false;
1110 }
1111 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1113 DemandedElts = APInt::getAllOnes(NumElts);
1114 HasMultiUse = true;
1115 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1116 // Not demanding any bits/elts from Op.
1117 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1118 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1119 // Limit search depth.
1120 return false;
1121 }
1122
1123 KnownBits Known2;
1124 switch (Op.getOpcode()) {
1125 case ISD::SCALAR_TO_VECTOR: {
1126 if (VT.isScalableVector())
1127 return false;
1128 if (!DemandedElts[0])
1129 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1130
1131 KnownBits SrcKnown;
1132 SDValue Src = Op.getOperand(0);
1133 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1134 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1135 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1136 return true;
1137
1138 // Upper elements are undef, so only get the knownbits if we just demand
1139 // the bottom element.
1140 if (DemandedElts == 1)
1141 Known = SrcKnown.anyextOrTrunc(BitWidth);
1142 break;
1143 }
1144 case ISD::BUILD_VECTOR:
1145 // Collect the known bits that are shared by every demanded element.
1146 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1147 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1148 return false; // Don't fall through, will infinitely loop.
1149 case ISD::LOAD: {
1150 auto *LD = cast<LoadSDNode>(Op);
1151 if (getTargetConstantFromLoad(LD)) {
1152 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1153 return false; // Don't fall through, will infinitely loop.
1154 }
1155 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1156 // If this is a ZEXTLoad and we are looking at the loaded value.
1157 EVT MemVT = LD->getMemoryVT();
1158 unsigned MemBits = MemVT.getScalarSizeInBits();
1159 Known.Zero.setBitsFrom(MemBits);
1160 return false; // Don't fall through, will infinitely loop.
1161 }
1162 break;
1163 }
1165 if (VT.isScalableVector())
1166 return false;
1167 SDValue Vec = Op.getOperand(0);
1168 SDValue Scl = Op.getOperand(1);
1169 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1170 EVT VecVT = Vec.getValueType();
1171
1172 // If index isn't constant, assume we need all vector elements AND the
1173 // inserted element.
1174 APInt DemandedVecElts(DemandedElts);
1175 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1176 unsigned Idx = CIdx->getZExtValue();
1177 DemandedVecElts.clearBit(Idx);
1178
1179 // Inserted element is not required.
1180 if (!DemandedElts[Idx])
1181 return TLO.CombineTo(Op, Vec);
1182 }
1183
1184 KnownBits KnownScl;
1185 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1186 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1187 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1188 return true;
1189
1190 Known = KnownScl.anyextOrTrunc(BitWidth);
1191
1192 KnownBits KnownVec;
1193 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1194 Depth + 1))
1195 return true;
1196
1197 if (!!DemandedVecElts)
1198 Known = KnownBits::commonBits(Known, KnownVec);
1199
1200 return false;
1201 }
1202 case ISD::INSERT_SUBVECTOR: {
1203 if (VT.isScalableVector())
1204 return false;
1205 // Demand any elements from the subvector and the remainder from the src its
1206 // inserted into.
1207 SDValue Src = Op.getOperand(0);
1208 SDValue Sub = Op.getOperand(1);
1209 uint64_t Idx = Op.getConstantOperandVal(2);
1210 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1211 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1212 APInt DemandedSrcElts = DemandedElts;
1213 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1214
1215 KnownBits KnownSub, KnownSrc;
1216 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1217 Depth + 1))
1218 return true;
1219 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1220 Depth + 1))
1221 return true;
1222
1223 Known.Zero.setAllBits();
1224 Known.One.setAllBits();
1225 if (!!DemandedSubElts)
1226 Known = KnownBits::commonBits(Known, KnownSub);
1227 if (!!DemandedSrcElts)
1228 Known = KnownBits::commonBits(Known, KnownSrc);
1229
1230 // Attempt to avoid multi-use src if we don't need anything from it.
1231 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1232 !DemandedSrcElts.isAllOnes()) {
1233 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1234 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1235 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1236 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1237 if (NewSub || NewSrc) {
1238 NewSub = NewSub ? NewSub : Sub;
1239 NewSrc = NewSrc ? NewSrc : Src;
1240 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1241 Op.getOperand(2));
1242 return TLO.CombineTo(Op, NewOp);
1243 }
1244 }
1245 break;
1246 }
1248 if (VT.isScalableVector())
1249 return false;
1250 // Offset the demanded elts by the subvector index.
1251 SDValue Src = Op.getOperand(0);
1252 if (Src.getValueType().isScalableVector())
1253 break;
1254 uint64_t Idx = Op.getConstantOperandVal(1);
1255 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1256 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1257
1258 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1259 Depth + 1))
1260 return true;
1261
1262 // Attempt to avoid multi-use src if we don't need anything from it.
1263 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1264 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1265 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1266 if (DemandedSrc) {
1267 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1268 Op.getOperand(1));
1269 return TLO.CombineTo(Op, NewOp);
1270 }
1271 }
1272 break;
1273 }
1274 case ISD::CONCAT_VECTORS: {
1275 if (VT.isScalableVector())
1276 return false;
1277 Known.Zero.setAllBits();
1278 Known.One.setAllBits();
1279 EVT SubVT = Op.getOperand(0).getValueType();
1280 unsigned NumSubVecs = Op.getNumOperands();
1281 unsigned NumSubElts = SubVT.getVectorNumElements();
1282 for (unsigned i = 0; i != NumSubVecs; ++i) {
1283 APInt DemandedSubElts =
1284 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1285 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1286 Known2, TLO, Depth + 1))
1287 return true;
1288 // Known bits are shared by every demanded subvector element.
1289 if (!!DemandedSubElts)
1290 Known = KnownBits::commonBits(Known, Known2);
1291 }
1292 break;
1293 }
1294 case ISD::VECTOR_SHUFFLE: {
1295 assert(!VT.isScalableVector());
1296 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1297
1298 // Collect demanded elements from shuffle operands..
1299 APInt DemandedLHS, DemandedRHS;
1300 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1301 DemandedRHS))
1302 break;
1303
1304 if (!!DemandedLHS || !!DemandedRHS) {
1305 SDValue Op0 = Op.getOperand(0);
1306 SDValue Op1 = Op.getOperand(1);
1307
1308 Known.Zero.setAllBits();
1309 Known.One.setAllBits();
1310 if (!!DemandedLHS) {
1311 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1312 Depth + 1))
1313 return true;
1314 Known = KnownBits::commonBits(Known, Known2);
1315 }
1316 if (!!DemandedRHS) {
1317 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1318 Depth + 1))
1319 return true;
1320 Known = KnownBits::commonBits(Known, Known2);
1321 }
1322
1323 // Attempt to avoid multi-use ops if we don't need anything from them.
1324 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1325 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1326 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1327 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1328 if (DemandedOp0 || DemandedOp1) {
1329 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1330 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1331 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1332 return TLO.CombineTo(Op, NewOp);
1333 }
1334 }
1335 break;
1336 }
1337 case ISD::AND: {
1338 SDValue Op0 = Op.getOperand(0);
1339 SDValue Op1 = Op.getOperand(1);
1340
1341 // If the RHS is a constant, check to see if the LHS would be zero without
1342 // using the bits from the RHS. Below, we use knowledge about the RHS to
1343 // simplify the LHS, here we're using information from the LHS to simplify
1344 // the RHS.
1345 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1346 // Do not increment Depth here; that can cause an infinite loop.
1347 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1348 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1349 if ((LHSKnown.Zero & DemandedBits) ==
1350 (~RHSC->getAPIntValue() & DemandedBits))
1351 return TLO.CombineTo(Op, Op0);
1352
1353 // If any of the set bits in the RHS are known zero on the LHS, shrink
1354 // the constant.
1355 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1356 DemandedElts, TLO))
1357 return true;
1358
1359 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1360 // constant, but if this 'and' is only clearing bits that were just set by
1361 // the xor, then this 'and' can be eliminated by shrinking the mask of
1362 // the xor. For example, for a 32-bit X:
1363 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1364 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1365 LHSKnown.One == ~RHSC->getAPIntValue()) {
1366 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1367 return TLO.CombineTo(Op, Xor);
1368 }
1369 }
1370
1371 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1372 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1373 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1374 (Op0.getOperand(0).isUndef() ||
1376 Op0->hasOneUse()) {
1377 unsigned NumSubElts =
1379 unsigned SubIdx = Op0.getConstantOperandVal(2);
1380 APInt DemandedSub =
1381 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1382 KnownBits KnownSubMask =
1383 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1384 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1385 SDValue NewAnd =
1386 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1387 SDValue NewInsert =
1388 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1389 Op0.getOperand(1), Op0.getOperand(2));
1390 return TLO.CombineTo(Op, NewInsert);
1391 }
1392 }
1393
1394 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1395 Depth + 1))
1396 return true;
1397 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1398 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1399 Known2, TLO, Depth + 1))
1400 return true;
1401 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1402
1403 // If all of the demanded bits are known one on one side, return the other.
1404 // These bits cannot contribute to the result of the 'and'.
1405 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1406 return TLO.CombineTo(Op, Op0);
1407 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1408 return TLO.CombineTo(Op, Op1);
1409 // If all of the demanded bits in the inputs are known zeros, return zero.
1410 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1411 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1412 // If the RHS is a constant, see if we can simplify it.
1413 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1414 TLO))
1415 return true;
1416 // If the operation can be done in a smaller type, do so.
1417 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1418 return true;
1419
1420 // Attempt to avoid multi-use ops if we don't need anything from them.
1421 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1422 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1423 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1424 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1425 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1426 if (DemandedOp0 || DemandedOp1) {
1427 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1428 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1429 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1430 return TLO.CombineTo(Op, NewOp);
1431 }
1432 }
1433
1434 Known &= Known2;
1435 break;
1436 }
1437 case ISD::OR: {
1438 SDValue Op0 = Op.getOperand(0);
1439 SDValue Op1 = Op.getOperand(1);
1440
1441 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1442 Depth + 1))
1443 return true;
1444 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1445 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1446 Known2, TLO, Depth + 1))
1447 return true;
1448 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1449
1450 // If all of the demanded bits are known zero on one side, return the other.
1451 // These bits cannot contribute to the result of the 'or'.
1452 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1453 return TLO.CombineTo(Op, Op0);
1454 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1455 return TLO.CombineTo(Op, Op1);
1456 // If the RHS is a constant, see if we can simplify it.
1457 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1458 return true;
1459 // If the operation can be done in a smaller type, do so.
1460 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1461 return true;
1462
1463 // Attempt to avoid multi-use ops if we don't need anything from them.
1464 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1465 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1466 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1467 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1468 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1469 if (DemandedOp0 || DemandedOp1) {
1470 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1471 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1472 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1473 return TLO.CombineTo(Op, NewOp);
1474 }
1475 }
1476
1477 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1478 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1479 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1480 Op0->hasOneUse() && Op1->hasOneUse()) {
1481 // Attempt to match all commutations - m_c_Or would've been useful!
1482 for (int I = 0; I != 2; ++I) {
1483 SDValue X = Op.getOperand(I).getOperand(0);
1484 SDValue C1 = Op.getOperand(I).getOperand(1);
1485 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1486 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1487 if (Alt.getOpcode() == ISD::OR) {
1488 for (int J = 0; J != 2; ++J) {
1489 if (X == Alt.getOperand(J)) {
1490 SDValue Y = Alt.getOperand(1 - J);
1491 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1492 {C1, C2})) {
1493 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1494 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1495 return TLO.CombineTo(
1496 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1497 }
1498 }
1499 }
1500 }
1501 }
1502 }
1503
1504 Known |= Known2;
1505 break;
1506 }
1507 case ISD::XOR: {
1508 SDValue Op0 = Op.getOperand(0);
1509 SDValue Op1 = Op.getOperand(1);
1510
1511 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1512 Depth + 1))
1513 return true;
1514 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1515 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1516 Depth + 1))
1517 return true;
1518 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1519
1520 // If all of the demanded bits are known zero on one side, return the other.
1521 // These bits cannot contribute to the result of the 'xor'.
1522 if (DemandedBits.isSubsetOf(Known.Zero))
1523 return TLO.CombineTo(Op, Op0);
1524 if (DemandedBits.isSubsetOf(Known2.Zero))
1525 return TLO.CombineTo(Op, Op1);
1526 // If the operation can be done in a smaller type, do so.
1527 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1528 return true;
1529
1530 // If all of the unknown bits are known to be zero on one side or the other
1531 // turn this into an *inclusive* or.
1532 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1533 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1534 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1535
1536 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1537 if (C) {
1538 // If one side is a constant, and all of the set bits in the constant are
1539 // also known set on the other side, turn this into an AND, as we know
1540 // the bits will be cleared.
1541 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1542 // NB: it is okay if more bits are known than are requested
1543 if (C->getAPIntValue() == Known2.One) {
1544 SDValue ANDC =
1545 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1546 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1547 }
1548
1549 // If the RHS is a constant, see if we can change it. Don't alter a -1
1550 // constant because that's a 'not' op, and that is better for combining
1551 // and codegen.
1552 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1553 // We're flipping all demanded bits. Flip the undemanded bits too.
1554 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1555 return TLO.CombineTo(Op, New);
1556 }
1557
1558 unsigned Op0Opcode = Op0.getOpcode();
1559 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1560 if (ConstantSDNode *ShiftC =
1561 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1562 // Don't crash on an oversized shift. We can not guarantee that a
1563 // bogus shift has been simplified to undef.
1564 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1565 uint64_t ShiftAmt = ShiftC->getZExtValue();
1567 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1568 : Ones.lshr(ShiftAmt);
1569 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1570 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1571 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1572 // If the xor constant is a demanded mask, do a 'not' before the
1573 // shift:
1574 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1575 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1576 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1577 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1578 Op0.getOperand(1)));
1579 }
1580 }
1581 }
1582 }
1583 }
1584
1585 // If we can't turn this into a 'not', try to shrink the constant.
1586 if (!C || !C->isAllOnes())
1587 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1588 return true;
1589
1590 // Attempt to avoid multi-use ops if we don't need anything from them.
1591 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1592 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1593 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1594 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1595 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1596 if (DemandedOp0 || DemandedOp1) {
1597 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1598 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1599 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1600 return TLO.CombineTo(Op, NewOp);
1601 }
1602 }
1603
1604 Known ^= Known2;
1605 break;
1606 }
1607 case ISD::SELECT:
1608 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1609 Depth + 1))
1610 return true;
1611 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1612 Depth + 1))
1613 return true;
1614 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1615 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1616
1617 // If the operands are constants, see if we can simplify them.
1618 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1619 return true;
1620
1621 // Only known if known in both the LHS and RHS.
1622 Known = KnownBits::commonBits(Known, Known2);
1623 break;
1624 case ISD::VSELECT:
1625 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1626 Known, TLO, Depth + 1))
1627 return true;
1628 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1629 Known2, TLO, Depth + 1))
1630 return true;
1631 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1632 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1633
1634 // Only known if known in both the LHS and RHS.
1635 Known = KnownBits::commonBits(Known, Known2);
1636 break;
1637 case ISD::SELECT_CC:
1638 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1639 Depth + 1))
1640 return true;
1641 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1642 Depth + 1))
1643 return true;
1644 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1645 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1646
1647 // If the operands are constants, see if we can simplify them.
1648 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1649 return true;
1650
1651 // Only known if known in both the LHS and RHS.
1652 Known = KnownBits::commonBits(Known, Known2);
1653 break;
1654 case ISD::SETCC: {
1655 SDValue Op0 = Op.getOperand(0);
1656 SDValue Op1 = Op.getOperand(1);
1657 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1658 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1659 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1660 // -1, we may be able to bypass the setcc.
1661 if (DemandedBits.isSignMask() &&
1665 // If we're testing X < 0, then this compare isn't needed - just use X!
1666 // FIXME: We're limiting to integer types here, but this should also work
1667 // if we don't care about FP signed-zero. The use of SETLT with FP means
1668 // that we don't care about NaNs.
1669 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1671 return TLO.CombineTo(Op, Op0);
1672
1673 // TODO: Should we check for other forms of sign-bit comparisons?
1674 // Examples: X <= -1, X >= 0
1675 }
1676 if (getBooleanContents(Op0.getValueType()) ==
1678 BitWidth > 1)
1679 Known.Zero.setBitsFrom(1);
1680 break;
1681 }
1682 case ISD::SHL: {
1683 SDValue Op0 = Op.getOperand(0);
1684 SDValue Op1 = Op.getOperand(1);
1685 EVT ShiftVT = Op1.getValueType();
1686
1687 if (const APInt *SA =
1688 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1689 unsigned ShAmt = SA->getZExtValue();
1690 if (ShAmt == 0)
1691 return TLO.CombineTo(Op, Op0);
1692
1693 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1694 // single shift. We can do this if the bottom bits (which are shifted
1695 // out) are never demanded.
1696 // TODO - support non-uniform vector amounts.
1697 if (Op0.getOpcode() == ISD::SRL) {
1698 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1699 if (const APInt *SA2 =
1700 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1701 unsigned C1 = SA2->getZExtValue();
1702 unsigned Opc = ISD::SHL;
1703 int Diff = ShAmt - C1;
1704 if (Diff < 0) {
1705 Diff = -Diff;
1706 Opc = ISD::SRL;
1707 }
1708 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1709 return TLO.CombineTo(
1710 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1711 }
1712 }
1713 }
1714
1715 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1716 // are not demanded. This will likely allow the anyext to be folded away.
1717 // TODO - support non-uniform vector amounts.
1718 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1719 SDValue InnerOp = Op0.getOperand(0);
1720 EVT InnerVT = InnerOp.getValueType();
1721 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1722 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1723 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1724 SDValue NarrowShl = TLO.DAG.getNode(
1725 ISD::SHL, dl, InnerVT, InnerOp,
1726 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1727 return TLO.CombineTo(
1728 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1729 }
1730
1731 // Repeat the SHL optimization above in cases where an extension
1732 // intervenes: (shl (anyext (shr x, c1)), c2) to
1733 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1734 // aren't demanded (as above) and that the shifted upper c1 bits of
1735 // x aren't demanded.
1736 // TODO - support non-uniform vector amounts.
1737 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1738 InnerOp.hasOneUse()) {
1739 if (const APInt *SA2 =
1740 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1741 unsigned InnerShAmt = SA2->getZExtValue();
1742 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1743 DemandedBits.getActiveBits() <=
1744 (InnerBits - InnerShAmt + ShAmt) &&
1745 DemandedBits.countr_zero() >= ShAmt) {
1746 SDValue NewSA =
1747 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1748 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1749 InnerOp.getOperand(0));
1750 return TLO.CombineTo(
1751 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1752 }
1753 }
1754 }
1755 }
1756
1757 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1758 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1759 Depth + 1))
1760 return true;
1761 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1762 Known.Zero <<= ShAmt;
1763 Known.One <<= ShAmt;
1764 // low bits known zero.
1765 Known.Zero.setLowBits(ShAmt);
1766
1767 // Attempt to avoid multi-use ops if we don't need anything from them.
1768 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1769 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1770 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1771 if (DemandedOp0) {
1772 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1773 return TLO.CombineTo(Op, NewOp);
1774 }
1775 }
1776
1777 // Try shrinking the operation as long as the shift amount will still be
1778 // in range.
1779 if ((ShAmt < DemandedBits.getActiveBits()) &&
1780 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1781 return true;
1782 } else {
1783 // This is a variable shift, so we can't shift the demand mask by a known
1784 // amount. But if we are not demanding high bits, then we are not
1785 // demanding those bits from the pre-shifted operand either.
1786 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1787 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1788 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1789 Depth + 1)) {
1790 SDNodeFlags Flags = Op.getNode()->getFlags();
1791 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1792 // Disable the nsw and nuw flags. We can no longer guarantee that we
1793 // won't wrap after simplification.
1794 Flags.setNoSignedWrap(false);
1795 Flags.setNoUnsignedWrap(false);
1796 Op->setFlags(Flags);
1797 }
1798 return true;
1799 }
1800 Known.resetAll();
1801 }
1802 }
1803
1804 // If we are only demanding sign bits then we can use the shift source
1805 // directly.
1806 if (const APInt *MaxSA =
1807 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1808 unsigned ShAmt = MaxSA->getZExtValue();
1809 unsigned NumSignBits =
1810 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1811 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1812 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1813 return TLO.CombineTo(Op, Op0);
1814 }
1815 break;
1816 }
1817 case ISD::SRL: {
1818 SDValue Op0 = Op.getOperand(0);
1819 SDValue Op1 = Op.getOperand(1);
1820 EVT ShiftVT = Op1.getValueType();
1821
1822 // Try to match AVG patterns.
1823 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1824 DemandedElts, Depth + 1))
1825 return TLO.CombineTo(Op, AVG);
1826
1827 if (const APInt *SA =
1828 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1829 unsigned ShAmt = SA->getZExtValue();
1830 if (ShAmt == 0)
1831 return TLO.CombineTo(Op, Op0);
1832
1833 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1834 // single shift. We can do this if the top bits (which are shifted out)
1835 // are never demanded.
1836 // TODO - support non-uniform vector amounts.
1837 if (Op0.getOpcode() == ISD::SHL) {
1838 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1839 if (const APInt *SA2 =
1840 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1841 unsigned C1 = SA2->getZExtValue();
1842 unsigned Opc = ISD::SRL;
1843 int Diff = ShAmt - C1;
1844 if (Diff < 0) {
1845 Diff = -Diff;
1846 Opc = ISD::SHL;
1847 }
1848 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1849 return TLO.CombineTo(
1850 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1851 }
1852 }
1853 }
1854
1855 APInt InDemandedMask = (DemandedBits << ShAmt);
1856
1857 // If the shift is exact, then it does demand the low bits (and knows that
1858 // they are zero).
1859 if (Op->getFlags().hasExact())
1860 InDemandedMask.setLowBits(ShAmt);
1861
1862 // Compute the new bits that are at the top now.
1863 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1864 Depth + 1))
1865 return true;
1866 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1867 Known.Zero.lshrInPlace(ShAmt);
1868 Known.One.lshrInPlace(ShAmt);
1869 // High bits known zero.
1870 Known.Zero.setHighBits(ShAmt);
1871
1872 // Attempt to avoid multi-use ops if we don't need anything from them.
1873 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1874 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1875 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1876 if (DemandedOp0) {
1877 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1878 return TLO.CombineTo(Op, NewOp);
1879 }
1880 }
1881 }
1882 break;
1883 }
1884 case ISD::SRA: {
1885 SDValue Op0 = Op.getOperand(0);
1886 SDValue Op1 = Op.getOperand(1);
1887 EVT ShiftVT = Op1.getValueType();
1888
1889 // If we only want bits that already match the signbit then we don't need
1890 // to shift.
1891 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1892 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1893 NumHiDemandedBits)
1894 return TLO.CombineTo(Op, Op0);
1895
1896 // If this is an arithmetic shift right and only the low-bit is set, we can
1897 // always convert this into a logical shr, even if the shift amount is
1898 // variable. The low bit of the shift cannot be an input sign bit unless
1899 // the shift amount is >= the size of the datatype, which is undefined.
1900 if (DemandedBits.isOne())
1901 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1902
1903 // Try to match AVG patterns.
1904 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1905 DemandedElts, Depth + 1))
1906 return TLO.CombineTo(Op, AVG);
1907
1908 if (const APInt *SA =
1909 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1910 unsigned ShAmt = SA->getZExtValue();
1911 if (ShAmt == 0)
1912 return TLO.CombineTo(Op, Op0);
1913
1914 APInt InDemandedMask = (DemandedBits << ShAmt);
1915
1916 // If the shift is exact, then it does demand the low bits (and knows that
1917 // they are zero).
1918 if (Op->getFlags().hasExact())
1919 InDemandedMask.setLowBits(ShAmt);
1920
1921 // If any of the demanded bits are produced by the sign extension, we also
1922 // demand the input sign bit.
1923 if (DemandedBits.countl_zero() < ShAmt)
1924 InDemandedMask.setSignBit();
1925
1926 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1927 Depth + 1))
1928 return true;
1929 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1930 Known.Zero.lshrInPlace(ShAmt);
1931 Known.One.lshrInPlace(ShAmt);
1932
1933 // If the input sign bit is known to be zero, or if none of the top bits
1934 // are demanded, turn this into an unsigned shift right.
1935 if (Known.Zero[BitWidth - ShAmt - 1] ||
1936 DemandedBits.countl_zero() >= ShAmt) {
1938 Flags.setExact(Op->getFlags().hasExact());
1939 return TLO.CombineTo(
1940 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1941 }
1942
1943 int Log2 = DemandedBits.exactLogBase2();
1944 if (Log2 >= 0) {
1945 // The bit must come from the sign.
1946 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1947 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1948 }
1949
1950 if (Known.One[BitWidth - ShAmt - 1])
1951 // New bits are known one.
1952 Known.One.setHighBits(ShAmt);
1953
1954 // Attempt to avoid multi-use ops if we don't need anything from them.
1955 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1956 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1957 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1958 if (DemandedOp0) {
1959 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1960 return TLO.CombineTo(Op, NewOp);
1961 }
1962 }
1963 }
1964 break;
1965 }
1966 case ISD::FSHL:
1967 case ISD::FSHR: {
1968 SDValue Op0 = Op.getOperand(0);
1969 SDValue Op1 = Op.getOperand(1);
1970 SDValue Op2 = Op.getOperand(2);
1971 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1972
1973 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1974 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1975
1976 // For fshl, 0-shift returns the 1st arg.
1977 // For fshr, 0-shift returns the 2nd arg.
1978 if (Amt == 0) {
1979 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1980 Known, TLO, Depth + 1))
1981 return true;
1982 break;
1983 }
1984
1985 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1986 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1987 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1988 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1989 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1990 Depth + 1))
1991 return true;
1992 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1993 Depth + 1))
1994 return true;
1995
1996 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1997 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1998 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1999 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2000 Known.One |= Known2.One;
2001 Known.Zero |= Known2.Zero;
2002
2003 // Attempt to avoid multi-use ops if we don't need anything from them.
2004 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2005 !DemandedElts.isAllOnes()) {
2006 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2007 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2008 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2009 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2010 if (DemandedOp0 || DemandedOp1) {
2011 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2012 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2013 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2014 DemandedOp1, Op2);
2015 return TLO.CombineTo(Op, NewOp);
2016 }
2017 }
2018 }
2019
2020 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2021 if (isPowerOf2_32(BitWidth)) {
2022 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2023 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2024 Known2, TLO, Depth + 1))
2025 return true;
2026 }
2027 break;
2028 }
2029 case ISD::ROTL:
2030 case ISD::ROTR: {
2031 SDValue Op0 = Op.getOperand(0);
2032 SDValue Op1 = Op.getOperand(1);
2033 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2034
2035 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2036 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2037 return TLO.CombineTo(Op, Op0);
2038
2039 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2040 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2041 unsigned RevAmt = BitWidth - Amt;
2042
2043 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2044 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2045 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2046 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2047 Depth + 1))
2048 return true;
2049
2050 // rot*(x, 0) --> x
2051 if (Amt == 0)
2052 return TLO.CombineTo(Op, Op0);
2053
2054 // See if we don't demand either half of the rotated bits.
2055 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2056 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2057 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2058 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2059 }
2060 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2061 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2062 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2063 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2064 }
2065 }
2066
2067 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2068 if (isPowerOf2_32(BitWidth)) {
2069 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2070 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2071 Depth + 1))
2072 return true;
2073 }
2074 break;
2075 }
2076 case ISD::UMIN: {
2077 // Check if one arg is always less than (or equal) to the other arg.
2078 SDValue Op0 = Op.getOperand(0);
2079 SDValue Op1 = Op.getOperand(1);
2080 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2081 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2082 Known = KnownBits::umin(Known0, Known1);
2083 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2084 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2085 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2086 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2087 break;
2088 }
2089 case ISD::UMAX: {
2090 // Check if one arg is always greater than (or equal) to the other arg.
2091 SDValue Op0 = Op.getOperand(0);
2092 SDValue Op1 = Op.getOperand(1);
2093 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2094 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2095 Known = KnownBits::umax(Known0, Known1);
2096 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2097 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2098 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2099 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2100 break;
2101 }
2102 case ISD::BITREVERSE: {
2103 SDValue Src = Op.getOperand(0);
2104 APInt DemandedSrcBits = DemandedBits.reverseBits();
2105 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2106 Depth + 1))
2107 return true;
2108 Known.One = Known2.One.reverseBits();
2109 Known.Zero = Known2.Zero.reverseBits();
2110 break;
2111 }
2112 case ISD::BSWAP: {
2113 SDValue Src = Op.getOperand(0);
2114
2115 // If the only bits demanded come from one byte of the bswap result,
2116 // just shift the input byte into position to eliminate the bswap.
2117 unsigned NLZ = DemandedBits.countl_zero();
2118 unsigned NTZ = DemandedBits.countr_zero();
2119
2120 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2121 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2122 // have 14 leading zeros, round to 8.
2123 NLZ = alignDown(NLZ, 8);
2124 NTZ = alignDown(NTZ, 8);
2125 // If we need exactly one byte, we can do this transformation.
2126 if (BitWidth - NLZ - NTZ == 8) {
2127 // Replace this with either a left or right shift to get the byte into
2128 // the right place.
2129 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2130 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2131 EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2132 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2133 SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2134 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2135 return TLO.CombineTo(Op, NewOp);
2136 }
2137 }
2138
2139 APInt DemandedSrcBits = DemandedBits.byteSwap();
2140 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2141 Depth + 1))
2142 return true;
2143 Known.One = Known2.One.byteSwap();
2144 Known.Zero = Known2.Zero.byteSwap();
2145 break;
2146 }
2147 case ISD::CTPOP: {
2148 // If only 1 bit is demanded, replace with PARITY as long as we're before
2149 // op legalization.
2150 // FIXME: Limit to scalars for now.
2151 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2152 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2153 Op.getOperand(0)));
2154
2155 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2156 break;
2157 }
2159 SDValue Op0 = Op.getOperand(0);
2160 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2161 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2162
2163 // If we only care about the highest bit, don't bother shifting right.
2164 if (DemandedBits.isSignMask()) {
2165 unsigned MinSignedBits =
2166 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2167 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2168 // However if the input is already sign extended we expect the sign
2169 // extension to be dropped altogether later and do not simplify.
2170 if (!AlreadySignExtended) {
2171 // Compute the correct shift amount type, which must be getShiftAmountTy
2172 // for scalar types after legalization.
2173 SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2174 getShiftAmountTy(VT, DL));
2175 return TLO.CombineTo(Op,
2176 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2177 }
2178 }
2179
2180 // If none of the extended bits are demanded, eliminate the sextinreg.
2181 if (DemandedBits.getActiveBits() <= ExVTBits)
2182 return TLO.CombineTo(Op, Op0);
2183
2184 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2185
2186 // Since the sign extended bits are demanded, we know that the sign
2187 // bit is demanded.
2188 InputDemandedBits.setBit(ExVTBits - 1);
2189
2190 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2191 Depth + 1))
2192 return true;
2193 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2194
2195 // If the sign bit of the input is known set or clear, then we know the
2196 // top bits of the result.
2197
2198 // If the input sign bit is known zero, convert this into a zero extension.
2199 if (Known.Zero[ExVTBits - 1])
2200 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2201
2202 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2203 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2204 Known.One.setBitsFrom(ExVTBits);
2205 Known.Zero &= Mask;
2206 } else { // Input sign bit unknown
2207 Known.Zero &= Mask;
2208 Known.One &= Mask;
2209 }
2210 break;
2211 }
2212 case ISD::BUILD_PAIR: {
2213 EVT HalfVT = Op.getOperand(0).getValueType();
2214 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2215
2216 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2217 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2218
2219 KnownBits KnownLo, KnownHi;
2220
2221 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2222 return true;
2223
2224 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2225 return true;
2226
2227 Known = KnownHi.concat(KnownLo);
2228 break;
2229 }
2231 if (VT.isScalableVector())
2232 return false;
2233 [[fallthrough]];
2234 case ISD::ZERO_EXTEND: {
2235 SDValue Src = Op.getOperand(0);
2236 EVT SrcVT = Src.getValueType();
2237 unsigned InBits = SrcVT.getScalarSizeInBits();
2238 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2239 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2240
2241 // If none of the top bits are demanded, convert this into an any_extend.
2242 if (DemandedBits.getActiveBits() <= InBits) {
2243 // If we only need the non-extended bits of the bottom element
2244 // then we can just bitcast to the result.
2245 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2246 VT.getSizeInBits() == SrcVT.getSizeInBits())
2247 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2248
2249 unsigned Opc =
2251 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2252 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2253 }
2254
2255 APInt InDemandedBits = DemandedBits.trunc(InBits);
2256 APInt InDemandedElts = DemandedElts.zext(InElts);
2257 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2258 Depth + 1))
2259 return true;
2260 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2261 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2262 Known = Known.zext(BitWidth);
2263
2264 // Attempt to avoid multi-use ops if we don't need anything from them.
2265 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2266 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2267 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2268 break;
2269 }
2271 if (VT.isScalableVector())
2272 return false;
2273 [[fallthrough]];
2274 case ISD::SIGN_EXTEND: {
2275 SDValue Src = Op.getOperand(0);
2276 EVT SrcVT = Src.getValueType();
2277 unsigned InBits = SrcVT.getScalarSizeInBits();
2278 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2279 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2280
2281 // If none of the top bits are demanded, convert this into an any_extend.
2282 if (DemandedBits.getActiveBits() <= InBits) {
2283 // If we only need the non-extended bits of the bottom element
2284 // then we can just bitcast to the result.
2285 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2286 VT.getSizeInBits() == SrcVT.getSizeInBits())
2287 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2288
2289 unsigned Opc =
2291 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2292 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2293 }
2294
2295 APInt InDemandedBits = DemandedBits.trunc(InBits);
2296 APInt InDemandedElts = DemandedElts.zext(InElts);
2297
2298 // Since some of the sign extended bits are demanded, we know that the sign
2299 // bit is demanded.
2300 InDemandedBits.setBit(InBits - 1);
2301
2302 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2303 Depth + 1))
2304 return true;
2305 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2306 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2307
2308 // If the sign bit is known one, the top bits match.
2309 Known = Known.sext(BitWidth);
2310
2311 // If the sign bit is known zero, convert this to a zero extend.
2312 if (Known.isNonNegative()) {
2313 unsigned Opc =
2315 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2316 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2317 }
2318
2319 // Attempt to avoid multi-use ops if we don't need anything from them.
2320 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2321 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2322 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2323 break;
2324 }
2326 if (VT.isScalableVector())
2327 return false;
2328 [[fallthrough]];
2329 case ISD::ANY_EXTEND: {
2330 SDValue Src = Op.getOperand(0);
2331 EVT SrcVT = Src.getValueType();
2332 unsigned InBits = SrcVT.getScalarSizeInBits();
2333 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2334 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2335
2336 // If we only need the bottom element then we can just bitcast.
2337 // TODO: Handle ANY_EXTEND?
2338 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2339 VT.getSizeInBits() == SrcVT.getSizeInBits())
2340 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2341
2342 APInt InDemandedBits = DemandedBits.trunc(InBits);
2343 APInt InDemandedElts = DemandedElts.zext(InElts);
2344 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2345 Depth + 1))
2346 return true;
2347 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2348 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2349 Known = Known.anyext(BitWidth);
2350
2351 // Attempt to avoid multi-use ops if we don't need anything from them.
2352 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2353 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2354 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2355 break;
2356 }
2357 case ISD::TRUNCATE: {
2358 SDValue Src = Op.getOperand(0);
2359
2360 // Simplify the input, using demanded bit information, and compute the known
2361 // zero/one bits live out.
2362 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2363 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2364 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2365 Depth + 1))
2366 return true;
2367 Known = Known.trunc(BitWidth);
2368
2369 // Attempt to avoid multi-use ops if we don't need anything from them.
2370 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2371 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2372 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2373
2374 // If the input is only used by this truncate, see if we can shrink it based
2375 // on the known demanded bits.
2376 switch (Src.getOpcode()) {
2377 default:
2378 break;
2379 case ISD::SRL:
2380 // Shrink SRL by a constant if none of the high bits shifted in are
2381 // demanded.
2382 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2383 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2384 // undesirable.
2385 break;
2386
2387 if (Src.getNode()->hasOneUse()) {
2388 const APInt *ShAmtC =
2389 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2390 if (!ShAmtC || ShAmtC->uge(BitWidth))
2391 break;
2392 uint64_t ShVal = ShAmtC->getZExtValue();
2393
2394 APInt HighBits =
2395 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2396 HighBits.lshrInPlace(ShVal);
2397 HighBits = HighBits.trunc(BitWidth);
2398
2399 if (!(HighBits & DemandedBits)) {
2400 // None of the shifted in bits are needed. Add a truncate of the
2401 // shift input, then shift it.
2402 SDValue NewShAmt = TLO.DAG.getConstant(
2403 ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2404 SDValue NewTrunc =
2405 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2406 return TLO.CombineTo(
2407 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2408 }
2409 }
2410 break;
2411 }
2412
2413 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2414 break;
2415 }
2416 case ISD::AssertZext: {
2417 // AssertZext demands all of the high bits, plus any of the low bits
2418 // demanded by its users.
2419 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2421 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2422 TLO, Depth + 1))
2423 return true;
2424 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2425
2426 Known.Zero |= ~InMask;
2427 Known.One &= (~Known.Zero);
2428 break;
2429 }
2431 SDValue Src = Op.getOperand(0);
2432 SDValue Idx = Op.getOperand(1);
2433 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2434 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2435
2436 if (SrcEltCnt.isScalable())
2437 return false;
2438
2439 // Demand the bits from every vector element without a constant index.
2440 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2441 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2442 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2443 if (CIdx->getAPIntValue().ult(NumSrcElts))
2444 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2445
2446 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2447 // anything about the extended bits.
2448 APInt DemandedSrcBits = DemandedBits;
2449 if (BitWidth > EltBitWidth)
2450 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2451
2452 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2453 Depth + 1))
2454 return true;
2455
2456 // Attempt to avoid multi-use ops if we don't need anything from them.
2457 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2458 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2459 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2460 SDValue NewOp =
2461 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2462 return TLO.CombineTo(Op, NewOp);
2463 }
2464 }
2465
2466 Known = Known2;
2467 if (BitWidth > EltBitWidth)
2468 Known = Known.anyext(BitWidth);
2469 break;
2470 }
2471 case ISD::BITCAST: {
2472 if (VT.isScalableVector())
2473 return false;
2474 SDValue Src = Op.getOperand(0);
2475 EVT SrcVT = Src.getValueType();
2476 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2477
2478 // If this is an FP->Int bitcast and if the sign bit is the only
2479 // thing demanded, turn this into a FGETSIGN.
2480 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2481 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2482 SrcVT.isFloatingPoint()) {
2483 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2485 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2486 SrcVT != MVT::f128) {
2487 // Cannot eliminate/lower SHL for f128 yet.
2488 EVT Ty = OpVTLegal ? VT : MVT::i32;
2489 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2490 // place. We expect the SHL to be eliminated by other optimizations.
2491 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2492 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2493 if (!OpVTLegal && OpVTSizeInBits > 32)
2494 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2495 unsigned ShVal = Op.getValueSizeInBits() - 1;
2496 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2497 return TLO.CombineTo(Op,
2498 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2499 }
2500 }
2501
2502 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2503 // Demand the elt/bit if any of the original elts/bits are demanded.
2504 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2505 unsigned Scale = BitWidth / NumSrcEltBits;
2506 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2507 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2508 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2509 for (unsigned i = 0; i != Scale; ++i) {
2510 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2511 unsigned BitOffset = EltOffset * NumSrcEltBits;
2512 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2513 if (!Sub.isZero()) {
2514 DemandedSrcBits |= Sub;
2515 for (unsigned j = 0; j != NumElts; ++j)
2516 if (DemandedElts[j])
2517 DemandedSrcElts.setBit((j * Scale) + i);
2518 }
2519 }
2520
2521 APInt KnownSrcUndef, KnownSrcZero;
2522 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2523 KnownSrcZero, TLO, Depth + 1))
2524 return true;
2525
2526 KnownBits KnownSrcBits;
2527 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2528 KnownSrcBits, TLO, Depth + 1))
2529 return true;
2530 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2531 // TODO - bigendian once we have test coverage.
2532 unsigned Scale = NumSrcEltBits / BitWidth;
2533 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2534 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2535 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2536 for (unsigned i = 0; i != NumElts; ++i)
2537 if (DemandedElts[i]) {
2538 unsigned Offset = (i % Scale) * BitWidth;
2539 DemandedSrcBits.insertBits(DemandedBits, Offset);
2540 DemandedSrcElts.setBit(i / Scale);
2541 }
2542
2543 if (SrcVT.isVector()) {
2544 APInt KnownSrcUndef, KnownSrcZero;
2545 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2546 KnownSrcZero, TLO, Depth + 1))
2547 return true;
2548 }
2549
2550 KnownBits KnownSrcBits;
2551 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2552 KnownSrcBits, TLO, Depth + 1))
2553 return true;
2554 }
2555
2556 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2557 // recursive call where Known may be useful to the caller.
2558 if (Depth > 0) {
2559 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2560 return false;
2561 }
2562 break;
2563 }
2564 case ISD::MUL:
2565 if (DemandedBits.isPowerOf2()) {
2566 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2567 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2568 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2569 unsigned CTZ = DemandedBits.countr_zero();
2570 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2571 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2572 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2573 SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2574 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2575 return TLO.CombineTo(Op, Shl);
2576 }
2577 }
2578 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2579 // X * X is odd iff X is odd.
2580 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2581 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2582 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2583 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2584 return TLO.CombineTo(Op, And1);
2585 }
2586 [[fallthrough]];
2587 case ISD::ADD:
2588 case ISD::SUB: {
2589 // Add, Sub, and Mul don't demand any bits in positions beyond that
2590 // of the highest bit demanded of them.
2591 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2592 SDNodeFlags Flags = Op.getNode()->getFlags();
2593 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2594 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2595 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2596 Depth + 1) ||
2597 SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2598 Depth + 1) ||
2599 // See if the operation should be performed at a smaller bit width.
2600 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2601 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2602 // Disable the nsw and nuw flags. We can no longer guarantee that we
2603 // won't wrap after simplification.
2604 Flags.setNoSignedWrap(false);
2605 Flags.setNoUnsignedWrap(false);
2606 Op->setFlags(Flags);
2607 }
2608 return true;
2609 }
2610
2611 // neg x with only low bit demanded is simply x.
2612 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2613 isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
2614 return TLO.CombineTo(Op, Op1);
2615
2616 // Attempt to avoid multi-use ops if we don't need anything from them.
2617 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2618 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2619 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2620 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2621 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2622 if (DemandedOp0 || DemandedOp1) {
2623 Flags.setNoSignedWrap(false);
2624 Flags.setNoUnsignedWrap(false);
2625 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2626 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2627 SDValue NewOp =
2628 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2629 return TLO.CombineTo(Op, NewOp);
2630 }
2631 }
2632
2633 // If we have a constant operand, we may be able to turn it into -1 if we
2634 // do not demand the high bits. This can make the constant smaller to
2635 // encode, allow more general folding, or match specialized instruction
2636 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2637 // is probably not useful (and could be detrimental).
2639 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2640 if (C && !C->isAllOnes() && !C->isOne() &&
2641 (C->getAPIntValue() | HighMask).isAllOnes()) {
2642 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2643 // Disable the nsw and nuw flags. We can no longer guarantee that we
2644 // won't wrap after simplification.
2645 Flags.setNoSignedWrap(false);
2646 Flags.setNoUnsignedWrap(false);
2647 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2648 return TLO.CombineTo(Op, NewOp);
2649 }
2650
2651 // Match a multiply with a disguised negated-power-of-2 and convert to a
2652 // an equivalent shift-left amount.
2653 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2654 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2655 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2656 return 0;
2657
2658 // Don't touch opaque constants. Also, ignore zero and power-of-2
2659 // multiplies. Those will get folded later.
2660 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2661 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2662 !MulC->getAPIntValue().isPowerOf2()) {
2663 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2664 if (UnmaskedC.isNegatedPowerOf2())
2665 return (-UnmaskedC).logBase2();
2666 }
2667 return 0;
2668 };
2669
2670 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2671 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2672 SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2673 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2674 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2675 return TLO.CombineTo(Op, Res);
2676 };
2677
2679 if (Op.getOpcode() == ISD::ADD) {
2680 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2681 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2682 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2683 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2684 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2685 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2686 }
2687 if (Op.getOpcode() == ISD::SUB) {
2688 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2689 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2690 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2691 }
2692 }
2693
2694 [[fallthrough]];
2695 }
2696 default:
2697 // We also ask the target about intrinsics (which could be specific to it).
2698 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2699 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2700 // TODO: Probably okay to remove after audit; here to reduce change size
2701 // in initial enablement patch for scalable vectors
2702 if (Op.getValueType().isScalableVector())
2703 break;
2704 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2705 Known, TLO, Depth))
2706 return true;
2707 break;
2708 }
2709
2710 // Just use computeKnownBits to compute output bits.
2711 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2712 break;
2713 }
2714
2715 // If we know the value of all of the demanded bits, return this as a
2716 // constant.
2717 if (!isTargetCanonicalConstantNode(Op) &&
2718 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2719 // Avoid folding to a constant if any OpaqueConstant is involved.
2720 const SDNode *N = Op.getNode();
2721 for (SDNode *Op :
2723 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2724 if (C->isOpaque())
2725 return false;
2726 }
2727 if (VT.isInteger())
2728 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2729 if (VT.isFloatingPoint())
2730 return TLO.CombineTo(
2731 Op,
2732 TLO.DAG.getConstantFP(
2733 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2734 }
2735
2736 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2737 // Try again just for the original demanded elts.
2738 // Ensure we do this AFTER constant folding above.
2739 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2740 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2741
2742 return false;
2743}
2744
2746 const APInt &DemandedElts,
2747 DAGCombinerInfo &DCI) const {
2748 SelectionDAG &DAG = DCI.DAG;
2749 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2750 !DCI.isBeforeLegalizeOps());
2751
2752 APInt KnownUndef, KnownZero;
2753 bool Simplified =
2754 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2755 if (Simplified) {
2756 DCI.AddToWorklist(Op.getNode());
2757 DCI.CommitTargetLoweringOpt(TLO);
2758 }
2759
2760 return Simplified;
2761}
2762
2763/// Given a vector binary operation and known undefined elements for each input
2764/// operand, compute whether each element of the output is undefined.
2766 const APInt &UndefOp0,
2767 const APInt &UndefOp1) {
2768 EVT VT = BO.getValueType();
2770 "Vector binop only");
2771
2772 EVT EltVT = VT.getVectorElementType();
2773 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2774 assert(UndefOp0.getBitWidth() == NumElts &&
2775 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2776
2777 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2778 const APInt &UndefVals) {
2779 if (UndefVals[Index])
2780 return DAG.getUNDEF(EltVT);
2781
2782 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2783 // Try hard to make sure that the getNode() call is not creating temporary
2784 // nodes. Ignore opaque integers because they do not constant fold.
2785 SDValue Elt = BV->getOperand(Index);
2786 auto *C = dyn_cast<ConstantSDNode>(Elt);
2787 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2788 return Elt;
2789 }
2790
2791 return SDValue();
2792 };
2793
2794 APInt KnownUndef = APInt::getZero(NumElts);
2795 for (unsigned i = 0; i != NumElts; ++i) {
2796 // If both inputs for this element are either constant or undef and match
2797 // the element type, compute the constant/undef result for this element of
2798 // the vector.
2799 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2800 // not handle FP constants. The code within getNode() should be refactored
2801 // to avoid the danger of creating a bogus temporary node here.
2802 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2803 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2804 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2805 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2806 KnownUndef.setBit(i);
2807 }
2808 return KnownUndef;
2809}
2810
2812 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2813 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2814 bool AssumeSingleUse) const {
2815 EVT VT = Op.getValueType();
2816 unsigned Opcode = Op.getOpcode();
2817 APInt DemandedElts = OriginalDemandedElts;
2818 unsigned NumElts = DemandedElts.getBitWidth();
2819 assert(VT.isVector() && "Expected vector op");
2820
2821 KnownUndef = KnownZero = APInt::getZero(NumElts);
2822
2823 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2824 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2825 return false;
2826
2827 // TODO: For now we assume we know nothing about scalable vectors.
2828 if (VT.isScalableVector())
2829 return false;
2830
2831 assert(VT.getVectorNumElements() == NumElts &&
2832 "Mask size mismatches value type element count!");
2833
2834 // Undef operand.
2835 if (Op.isUndef()) {
2836 KnownUndef.setAllBits();
2837 return false;
2838 }
2839
2840 // If Op has other users, assume that all elements are needed.
2841 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
2842 DemandedElts.setAllBits();
2843
2844 // Not demanding any elements from Op.
2845 if (DemandedElts == 0) {
2846 KnownUndef.setAllBits();
2847 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2848 }
2849
2850 // Limit search depth.
2852 return false;
2853
2854 SDLoc DL(Op);
2855 unsigned EltSizeInBits = VT.getScalarSizeInBits();
2856 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2857
2858 // Helper for demanding the specified elements and all the bits of both binary
2859 // operands.
2860 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2861 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2862 TLO.DAG, Depth + 1);
2863 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2864 TLO.DAG, Depth + 1);
2865 if (NewOp0 || NewOp1) {
2866 SDValue NewOp = TLO.DAG.getNode(
2867 Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2868 return TLO.CombineTo(Op, NewOp);
2869 }
2870 return false;
2871 };
2872
2873 switch (Opcode) {
2874 case ISD::SCALAR_TO_VECTOR: {
2875 if (!DemandedElts[0]) {
2876 KnownUndef.setAllBits();
2877 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2878 }
2879 SDValue ScalarSrc = Op.getOperand(0);
2880 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2881 SDValue Src = ScalarSrc.getOperand(0);
2882 SDValue Idx = ScalarSrc.getOperand(1);
2883 EVT SrcVT = Src.getValueType();
2884
2885 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2886
2887 if (SrcEltCnt.isScalable())
2888 return false;
2889
2890 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2891 if (isNullConstant(Idx)) {
2892 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2893 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2894 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2895 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2896 TLO, Depth + 1))
2897 return true;
2898 }
2899 }
2900 KnownUndef.setHighBits(NumElts - 1);
2901 break;
2902 }
2903 case ISD::BITCAST: {
2904 SDValue Src = Op.getOperand(0);
2905 EVT SrcVT = Src.getValueType();
2906
2907 // We only handle vectors here.
2908 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2909 if (!SrcVT.isVector())
2910 break;
2911
2912 // Fast handling of 'identity' bitcasts.
2913 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2914 if (NumSrcElts == NumElts)
2915 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2916 KnownZero, TLO, Depth + 1);
2917
2918 APInt SrcDemandedElts, SrcZero, SrcUndef;
2919
2920 // Bitcast from 'large element' src vector to 'small element' vector, we
2921 // must demand a source element if any DemandedElt maps to it.
2922 if ((NumElts % NumSrcElts) == 0) {
2923 unsigned Scale = NumElts / NumSrcElts;
2924 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2925 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2926 TLO, Depth + 1))
2927 return true;
2928
2929 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2930 // of the large element.
2931 // TODO - bigendian once we have test coverage.
2932 if (IsLE) {
2933 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2934 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2935 for (unsigned i = 0; i != NumElts; ++i)
2936 if (DemandedElts[i]) {
2937 unsigned Ofs = (i % Scale) * EltSizeInBits;
2938 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2939 }
2940
2941 KnownBits Known;
2942 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2943 TLO, Depth + 1))
2944 return true;
2945
2946 // The bitcast has split each wide element into a number of
2947 // narrow subelements. We have just computed the Known bits
2948 // for wide elements. See if element splitting results in
2949 // some subelements being zero. Only for demanded elements!
2950 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2951 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2952 .isAllOnes())
2953 continue;
2954 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2955 unsigned Elt = Scale * SrcElt + SubElt;
2956 if (DemandedElts[Elt])
2957 KnownZero.setBit(Elt);
2958 }
2959 }
2960 }
2961
2962 // If the src element is zero/undef then all the output elements will be -
2963 // only demanded elements are guaranteed to be correct.
2964 for (unsigned i = 0; i != NumSrcElts; ++i) {
2965 if (SrcDemandedElts[i]) {
2966 if (SrcZero[i])
2967 KnownZero.setBits(i * Scale, (i + 1) * Scale);
2968 if (SrcUndef[i])
2969 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2970 }
2971 }
2972 }
2973
2974 // Bitcast from 'small element' src vector to 'large element' vector, we
2975 // demand all smaller source elements covered by the larger demanded element
2976 // of this vector.
2977 if ((NumSrcElts % NumElts) == 0) {
2978 unsigned Scale = NumSrcElts / NumElts;
2979 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2980 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2981 TLO, Depth + 1))
2982 return true;
2983
2984 // If all the src elements covering an output element are zero/undef, then
2985 // the output element will be as well, assuming it was demanded.
2986 for (unsigned i = 0; i != NumElts; ++i) {
2987 if (DemandedElts[i]) {
2988 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
2989 KnownZero.setBit(i);
2990 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
2991 KnownUndef.setBit(i);
2992 }
2993 }
2994 }
2995 break;
2996 }
2997 case ISD::BUILD_VECTOR: {
2998 // Check all elements and simplify any unused elements with UNDEF.
2999 if (!DemandedElts.isAllOnes()) {
3000 // Don't simplify BROADCASTS.
3001 if (llvm::any_of(Op->op_values(),
3002 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3003 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3004 bool Updated = false;
3005 for (unsigned i = 0; i != NumElts; ++i) {
3006 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3007 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3008 KnownUndef.setBit(i);
3009 Updated = true;
3010 }
3011 }
3012 if (Updated)
3013 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3014 }
3015 }
3016 for (unsigned i = 0; i != NumElts; ++i) {
3017 SDValue SrcOp = Op.getOperand(i);
3018 if (SrcOp.isUndef()) {
3019 KnownUndef.setBit(i);
3020 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3022 KnownZero.setBit(i);
3023 }
3024 }
3025 break;
3026 }
3027 case ISD::CONCAT_VECTORS: {
3028 EVT SubVT = Op.getOperand(0).getValueType();
3029 unsigned NumSubVecs = Op.getNumOperands();
3030 unsigned NumSubElts = SubVT.getVectorNumElements();
3031 for (unsigned i = 0; i != NumSubVecs; ++i) {
3032 SDValue SubOp = Op.getOperand(i);
3033 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3034 APInt SubUndef, SubZero;
3035 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3036 Depth + 1))
3037 return true;
3038 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3039 KnownZero.insertBits(SubZero, i * NumSubElts);
3040 }
3041
3042 // Attempt to avoid multi-use ops if we don't need anything from them.
3043 if (!DemandedElts.isAllOnes()) {
3044 bool FoundNewSub = false;
3045 SmallVector<SDValue, 2> DemandedSubOps;
3046 for (unsigned i = 0; i != NumSubVecs; ++i) {
3047 SDValue SubOp = Op.getOperand(i);
3048 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3049 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3050 SubOp, SubElts, TLO.DAG, Depth + 1);
3051 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3052 FoundNewSub = NewSubOp ? true : FoundNewSub;
3053 }
3054 if (FoundNewSub) {
3055 SDValue NewOp =
3056 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3057 return TLO.CombineTo(Op, NewOp);
3058 }
3059 }
3060 break;
3061 }
3062 case ISD::INSERT_SUBVECTOR: {
3063 // Demand any elements from the subvector and the remainder from the src its
3064 // inserted into.
3065 SDValue Src = Op.getOperand(0);
3066 SDValue Sub = Op.getOperand(1);
3067 uint64_t Idx = Op.getConstantOperandVal(2);
3068 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3069 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3070 APInt DemandedSrcElts = DemandedElts;
3071 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3072
3073 APInt SubUndef, SubZero;
3074 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3075 Depth + 1))
3076 return true;
3077
3078 // If none of the src operand elements are demanded, replace it with undef.
3079 if (!DemandedSrcElts && !Src.isUndef())
3080 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3081 TLO.DAG.getUNDEF(VT), Sub,
3082 Op.getOperand(2)));
3083
3084 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3085 TLO, Depth + 1))
3086 return true;
3087 KnownUndef.insertBits(SubUndef, Idx);
3088 KnownZero.insertBits(SubZero, Idx);
3089
3090 // Attempt to avoid multi-use ops if we don't need anything from them.
3091 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3092 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3093 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3094 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3095 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3096 if (NewSrc || NewSub) {
3097 NewSrc = NewSrc ? NewSrc : Src;
3098 NewSub = NewSub ? NewSub : Sub;
3099 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3100 NewSub, Op.getOperand(2));
3101 return TLO.CombineTo(Op, NewOp);
3102 }
3103 }
3104 break;
3105 }
3107 // Offset the demanded elts by the subvector index.
3108 SDValue Src = Op.getOperand(0);
3109 if (Src.getValueType().isScalableVector())
3110 break;
3111 uint64_t Idx = Op.getConstantOperandVal(1);
3112 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3113 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3114
3115 APInt SrcUndef, SrcZero;
3116 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3117 Depth + 1))
3118 return true;
3119 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3120 KnownZero = SrcZero.extractBits(NumElts, Idx);
3121
3122 // Attempt to avoid multi-use ops if we don't need anything from them.
3123 if (!DemandedElts.isAllOnes()) {
3124 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3125 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3126 if (NewSrc) {
3127 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3128 Op.getOperand(1));
3129 return TLO.CombineTo(Op, NewOp);
3130 }
3131 }
3132 break;
3133 }
3135 SDValue Vec = Op.getOperand(0);
3136 SDValue Scl = Op.getOperand(1);
3137 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3138
3139 // For a legal, constant insertion index, if we don't need this insertion
3140 // then strip it, else remove it from the demanded elts.
3141 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3142 unsigned Idx = CIdx->getZExtValue();
3143 if (!DemandedElts[Idx])
3144 return TLO.CombineTo(Op, Vec);
3145
3146 APInt DemandedVecElts(DemandedElts);
3147 DemandedVecElts.clearBit(Idx);
3148 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3149 KnownZero, TLO, Depth + 1))
3150 return true;
3151
3152 KnownUndef.setBitVal(Idx, Scl.isUndef());
3153
3154 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3155 break;
3156 }
3157
3158 APInt VecUndef, VecZero;
3159 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3160 Depth + 1))
3161 return true;
3162 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3163 break;
3164 }
3165 case ISD::VSELECT: {
3166 SDValue Sel = Op.getOperand(0);
3167 SDValue LHS = Op.getOperand(1);
3168 SDValue RHS = Op.getOperand(2);
3169
3170 // Try to transform the select condition based on the current demanded
3171 // elements.
3172 APInt UndefSel, UndefZero;
3173 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
3174 Depth + 1))
3175 return true;
3176
3177 // See if we can simplify either vselect operand.
3178 APInt DemandedLHS(DemandedElts);
3179 APInt DemandedRHS(DemandedElts);
3180 APInt UndefLHS, ZeroLHS;
3181 APInt UndefRHS, ZeroRHS;
3182 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3183 Depth + 1))
3184 return true;
3185 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3186 Depth + 1))
3187 return true;
3188
3189 KnownUndef = UndefLHS & UndefRHS;
3190 KnownZero = ZeroLHS & ZeroRHS;
3191
3192 // If we know that the selected element is always zero, we don't need the
3193 // select value element.
3194 APInt DemandedSel = DemandedElts & ~KnownZero;
3195 if (DemandedSel != DemandedElts)
3196 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
3197 Depth + 1))
3198 return true;
3199
3200 break;
3201 }
3202 case ISD::VECTOR_SHUFFLE: {
3203 SDValue LHS = Op.getOperand(0);
3204 SDValue RHS = Op.getOperand(1);
3205 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3206
3207 // Collect demanded elements from shuffle operands..
3208 APInt DemandedLHS(NumElts, 0);
3209 APInt DemandedRHS(NumElts, 0);
3210 for (unsigned i = 0; i != NumElts; ++i) {
3211 int M = ShuffleMask[i];
3212 if (M < 0 || !DemandedElts[i])
3213 continue;
3214 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3215 if (M < (int)NumElts)
3216 DemandedLHS.setBit(M);
3217 else
3218 DemandedRHS.setBit(M - NumElts);
3219 }
3220
3221 // See if we can simplify either shuffle operand.
3222 APInt UndefLHS, ZeroLHS;
3223 APInt UndefRHS, ZeroRHS;
3224 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3225 Depth + 1))
3226 return true;
3227 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3228 Depth + 1))
3229 return true;
3230
3231 // Simplify mask using undef elements from LHS/RHS.
3232 bool Updated = false;
3233 bool IdentityLHS = true, IdentityRHS = true;
3234 SmallVector<int, 32> NewMask(ShuffleMask);
3235 for (unsigned i = 0; i != NumElts; ++i) {
3236 int &M = NewMask[i];
3237 if (M < 0)
3238 continue;
3239 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3240 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3241 Updated = true;
3242 M = -1;
3243 }
3244 IdentityLHS &= (M < 0) || (M == (int)i);
3245 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3246 }
3247
3248 // Update legal shuffle masks based on demanded elements if it won't reduce
3249 // to Identity which can cause premature removal of the shuffle mask.
3250 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3251 SDValue LegalShuffle =
3252 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3253 if (LegalShuffle)
3254 return TLO.CombineTo(Op, LegalShuffle);
3255 }
3256
3257 // Propagate undef/zero elements from LHS/RHS.
3258 for (unsigned i = 0; i != NumElts; ++i) {
3259 int M = ShuffleMask[i];
3260 if (M < 0) {
3261 KnownUndef.setBit(i);
3262 } else if (M < (int)NumElts) {
3263 if (UndefLHS[M])
3264 KnownUndef.setBit(i);
3265 if (ZeroLHS[M])
3266 KnownZero.setBit(i);
3267 } else {
3268 if (UndefRHS[M - NumElts])
3269 KnownUndef.setBit(i);
3270 if (ZeroRHS[M - NumElts])
3271 KnownZero.setBit(i);
3272 }
3273 }
3274 break;
3275 }
3279 APInt SrcUndef, SrcZero;
3280 SDValue Src = Op.getOperand(0);
3281 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3282 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3283 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3284 Depth + 1))
3285 return true;
3286 KnownZero = SrcZero.zextOrTrunc(NumElts);
3287 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3288
3289 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3290 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3291 DemandedSrcElts == 1) {
3292 // aext - if we just need the bottom element then we can bitcast.
3293 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3294 }
3295
3296 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3297 // zext(undef) upper bits are guaranteed to be zero.
3298 if (DemandedElts.isSubsetOf(KnownUndef))
3299 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3300 KnownUndef.clearAllBits();
3301
3302 // zext - if we just need the bottom element then we can mask:
3303 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3304 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3305 Op->isOnlyUserOf(Src.getNode()) &&
3306 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3307 SDLoc DL(Op);
3308 EVT SrcVT = Src.getValueType();
3309 EVT SrcSVT = SrcVT.getScalarType();
3310 SmallVector<SDValue> MaskElts;
3311 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3312 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3313 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3314 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3315 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3316 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3317 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3318 }
3319 }
3320 }
3321 break;
3322 }
3323
3324 // TODO: There are more binop opcodes that could be handled here - MIN,
3325 // MAX, saturated math, etc.
3326 case ISD::ADD: {
3327 SDValue Op0 = Op.getOperand(0);
3328 SDValue Op1 = Op.getOperand(1);
3329 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3330 APInt UndefLHS, ZeroLHS;
3331 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3332 Depth + 1, /*AssumeSingleUse*/ true))
3333 return true;
3334 }
3335 [[fallthrough]];
3336 }
3337 case ISD::OR:
3338 case ISD::XOR:
3339 case ISD::SUB:
3340 case ISD::FADD:
3341 case ISD::FSUB:
3342 case ISD::FMUL:
3343 case ISD::FDIV:
3344 case ISD::FREM: {
3345 SDValue Op0 = Op.getOperand(0);
3346 SDValue Op1 = Op.getOperand(1);
3347
3348 APInt UndefRHS, ZeroRHS;
3349 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3350 Depth + 1))
3351 return true;
3352 APInt UndefLHS, ZeroLHS;
3353 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3354 Depth + 1))
3355 return true;
3356
3357 KnownZero = ZeroLHS & ZeroRHS;
3358 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3359
3360 // Attempt to avoid multi-use ops if we don't need anything from them.
3361 // TODO - use KnownUndef to relax the demandedelts?
3362 if (!DemandedElts.isAllOnes())
3363 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3364 return true;
3365 break;
3366 }
3367 case ISD::SHL:
3368 case ISD::SRL:
3369 case ISD::SRA:
3370 case ISD::ROTL:
3371 case ISD::ROTR: {
3372 SDValue Op0 = Op.getOperand(0);
3373 SDValue Op1 = Op.getOperand(1);
3374
3375 APInt UndefRHS, ZeroRHS;
3376 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3377 Depth + 1))
3378 return true;
3379 APInt UndefLHS, ZeroLHS;
3380 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3381 Depth + 1))
3382 return true;
3383
3384 KnownZero = ZeroLHS;
3385 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3386
3387 // Attempt to avoid multi-use ops if we don't need anything from them.
3388 // TODO - use KnownUndef to relax the demandedelts?
3389 if (!DemandedElts.isAllOnes())
3390 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3391 return true;
3392 break;
3393 }
3394 case ISD::MUL:
3395 case ISD::MULHU:
3396 case ISD::MULHS:
3397 case ISD::AND: {
3398 SDValue Op0 = Op.getOperand(0);
3399 SDValue Op1 = Op.getOperand(1);
3400
3401 APInt SrcUndef, SrcZero;
3402 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3403 Depth + 1))
3404 return true;
3405 // If we know that a demanded element was zero in Op1 we don't need to
3406 // demand it in Op0 - its guaranteed to be zero.
3407 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3408 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3409 TLO, Depth + 1))
3410 return true;
3411
3412 KnownUndef &= DemandedElts0;
3413 KnownZero &= DemandedElts0;
3414
3415 // If every element pair has a zero/undef then just fold to zero.
3416 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3417 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3418 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3419 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3420
3421 // If either side has a zero element, then the result element is zero, even
3422 // if the other is an UNDEF.
3423 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3424 // and then handle 'and' nodes with the rest of the binop opcodes.
3425 KnownZero |= SrcZero;
3426 KnownUndef &= SrcUndef;
3427 KnownUndef &= ~KnownZero;
3428
3429 // Attempt to avoid multi-use ops if we don't need anything from them.
3430 if (!DemandedElts.isAllOnes())
3431 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3432 return true;
3433 break;
3434 }
3435 case ISD::TRUNCATE:
3436 case ISD::SIGN_EXTEND:
3437 case ISD::ZERO_EXTEND:
3438 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3439 KnownZero, TLO, Depth + 1))
3440 return true;
3441
3442 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3443 // zext(undef) upper bits are guaranteed to be zero.
3444 if (DemandedElts.isSubsetOf(KnownUndef))
3445 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3446 KnownUndef.clearAllBits();
3447 }
3448 break;
3449 default: {
3450 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3451 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3452 KnownZero, TLO, Depth))
3453 return true;
3454 } else {
3455 KnownBits Known;
3456 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3457 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3458 TLO, Depth, AssumeSingleUse))
3459 return true;
3460 }
3461 break;
3462 }
3463 }
3464 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3465
3466 // Constant fold all undef cases.
3467 // TODO: Handle zero cases as well.
3468 if (DemandedElts.isSubsetOf(KnownUndef))
3469 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3470
3471 return false;
3472}
3473
3474/// Determine which of the bits specified in Mask are known to be either zero or
3475/// one and return them in the Known.
3477 KnownBits &Known,
3478 const APInt &DemandedElts,
3479 const SelectionDAG &DAG,
3480 unsigned Depth) const {
3481 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3482 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3483 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3484 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3485 "Should use MaskedValueIsZero if you don't know whether Op"
3486 " is a target node!");
3487 Known.resetAll();
3488}
3489
3492 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3493 unsigned Depth) const {
3494 Known.resetAll();
3495}
3496
3498 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3499 // The low bits are known zero if the pointer is aligned.
3500 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3501}
3502
3505 unsigned Depth) const {
3506 return Align(1);
3507}
3508
3509/// This method can be implemented by targets that want to expose additional
3510/// information about sign bits to the DAG Combiner.
3512 const APInt &,
3513 const SelectionDAG &,
3514 unsigned Depth) const {
3515 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3516 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3517 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3518 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3519 "Should use ComputeNumSignBits if you don't know whether Op"
3520 " is a target node!");
3521 return 1;
3522}
3523
3525 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3526 const MachineRegisterInfo &MRI, unsigned Depth) const {
3527 return 1;
3528}
3529
3531 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3532 TargetLoweringOpt &TLO, unsigned Depth) const {
3533 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3534 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3535 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3536 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3537 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3538 " is a target node!");
3539 return false;
3540}
3541
3543 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3544 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3545 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3546 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3547 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3548 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3549 "Should use SimplifyDemandedBits if you don't know whether Op"
3550 " is a target node!");
3551 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3552 return false;
3553}
3554
3556 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3557 SelectionDAG &DAG, unsigned Depth) const {
3558 assert(
3559 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3560 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3561 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3562 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3563 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3564 " is a target node!");
3565 return SDValue();
3566}
3567
3568SDValue
3571 SelectionDAG &DAG) const {
3572 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3573 if (!LegalMask) {
3574 std::swap(N0, N1);
3576 LegalMask = isShuffleMaskLegal(Mask, VT);
3577 }
3578
3579 if (!LegalMask)
3580 return SDValue();
3581
3582 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3583}
3584
3586 return nullptr;
3587}
3588
3590 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3591 bool PoisonOnly, unsigned Depth) const {
3592 assert(
3593 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3594 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3595 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3596 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3597 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3598 " is a target node!");
3599 return false;
3600}
3601
3603 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3604 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3605 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3606 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3607 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3608 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3609 "Should use canCreateUndefOrPoison if you don't know whether Op"
3610 " is a target node!");
3611 // Be conservative and return true.
3612 return true;
3613}
3614
3616 const SelectionDAG &DAG,
3617 bool SNaN,
3618 unsigned Depth) const {
3619 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3620 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3621 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3622 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3623 "Should use isKnownNeverNaN if you don't know whether Op"
3624 " is a target node!");
3625 return false;
3626}
3627
3629 const APInt &DemandedElts,
3630 APInt &UndefElts,
3631 const SelectionDAG &DAG,
3632 unsigned Depth) const {
3633 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3634 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3635 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3636 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3637 "Should use isSplatValue if you don't know whether Op"
3638 " is a target node!");
3639 return false;
3640}
3641
3642// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3643// work with truncating build vectors and vectors with elements of less than
3644// 8 bits.
3646 if (!N)
3647 return false;
3648
3649 unsigned EltWidth;
3650 APInt CVal;
3651 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3652 /*AllowTruncation=*/true)) {
3653 CVal = CN->getAPIntValue();
3654 EltWidth = N.getValueType().getScalarSizeInBits();
3655 } else
3656 return false;
3657
3658 // If this is a truncating splat, truncate the splat value.
3659 // Otherwise, we may fail to match the expected values below.
3660 if (EltWidth < CVal.getBitWidth())
3661 CVal = CVal.trunc(EltWidth);
3662
3663 switch (getBooleanContents(N.getValueType())) {
3665 return CVal[0];
3667 return CVal.isOne();
3669 return CVal.isAllOnes();
3670 }
3671
3672 llvm_unreachable("Invalid boolean contents");
3673}
3674
3676 if (!N)
3677 return false;
3678
3679 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3680 if (!CN) {
3681 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3682 if (!BV)
3683 return false;
3684
3685 // Only interested in constant splats, we don't care about undef
3686 // elements in identifying boolean constants and getConstantSplatNode
3687 // returns NULL if all ops are undef;
3688 CN = BV->getConstantSplatNode();
3689 if (!CN)
3690 return false;
3691 }
3692
3693 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3694 return !CN->getAPIntValue()[0];
3695
3696 return CN->isZero();
3697}
3698
3700 bool SExt) const {
3701 if (VT == MVT::i1)
3702 return N->isOne();
3703
3705 switch (Cnt) {
3707 // An extended value of 1 is always true, unless its original type is i1,
3708 // in which case it will be sign extended to -1.
3709 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3712 return N->isAllOnes() && SExt;
3713 }
3714 llvm_unreachable("Unexpected enumeration.");
3715}
3716
3717/// This helper function of SimplifySetCC tries to optimize the comparison when
3718/// either operand of the SetCC node is a bitwise-and instruction.
3719SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3720 ISD::CondCode Cond, const SDLoc &DL,
3721 DAGCombinerInfo &DCI) const {
3722 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3723 std::swap(N0, N1);
3724
3725 SelectionDAG &DAG = DCI.DAG;
3726 EVT OpVT = N0.getValueType();
3727 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3728 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3729 return SDValue();
3730
3731 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3732 // iff everything but LSB is known zero:
3733 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3736 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3737 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3738 if (DAG.MaskedValueIsZero(N0, UpperBits))
3739 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3740 }
3741
3742 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3743 // test in a narrow type that we can truncate to with no cost. Examples:
3744 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3745 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3746 // TODO: This conservatively checks for type legality on the source and
3747 // destination types. That may inhibit optimizations, but it also
3748 // allows setcc->shift transforms that may be more beneficial.
3749 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3750 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3751 isTypeLegal(OpVT) && N0.hasOneUse()) {
3752 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3753 AndC->getAPIntValue().getActiveBits());
3754 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3755 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3756 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3757 return DAG.getSetCC(DL, VT, Trunc, Zero,
3759 }
3760 }
3761
3762 // Match these patterns in any of their permutations:
3763 // (X & Y) == Y
3764 // (X & Y) != Y
3765 SDValue X, Y;
3766 if (N0.getOperand(0) == N1) {
3767 X = N0.getOperand(1);
3768 Y = N0.getOperand(0);
3769 } else if (N0.getOperand(1) == N1) {
3770 X = N0.getOperand(0);
3771 Y = N0.getOperand(1);
3772 } else {
3773 return SDValue();
3774 }
3775
3776 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3777 if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3778 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3779 // Note that where Y is variable and is known to have at most one bit set
3780 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3781 // equivalent when Y == 0.
3782 assert(OpVT.isInteger());
3784 if (DCI.isBeforeLegalizeOps() ||
3786 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3787 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3788 // If the target supports an 'and-not' or 'and-complement' logic operation,
3789 // try to use that to make a comparison operation more efficient.
3790 // But don't do this transform if the mask is a single bit because there are
3791 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3792 // 'rlwinm' on PPC).
3793
3794 // Bail out if the compare operand that we want to turn into a zero is
3795 // already a zero (otherwise, infinite loop).
3796 auto *YConst = dyn_cast<ConstantSDNode>(Y);
3797 if (YConst && YConst->isZero())
3798 return SDValue();
3799
3800 // Transform this into: ~X & Y == 0.
3801 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3802 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3803 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3804 }
3805
3806 return SDValue();
3807}
3808
3809/// There are multiple IR patterns that could be checking whether certain
3810/// truncation of a signed number would be lossy or not. The pattern which is
3811/// best at IR level, may not lower optimally. Thus, we want to unfold it.
3812/// We are looking for the following pattern: (KeptBits is a constant)
3813/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3814/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3815/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3816/// We will unfold it into the natural trunc+sext pattern:
3817/// ((%x << C) a>> C) dstcond %x
3818/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3819SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3820 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3821 const SDLoc &DL) const {
3822 // We must be comparing with a constant.
3823 ConstantSDNode *C1;
3824 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3825 return SDValue();
3826
3827 // N0 should be: add %x, (1 << (KeptBits-1))
3828 if (N0->getOpcode() != ISD::ADD)
3829 return SDValue();
3830
3831 // And we must be 'add'ing a constant.
3832 ConstantSDNode *C01;
3833 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3834 return SDValue();
3835
3836 SDValue X = N0->getOperand(0);
3837 EVT XVT = X.getValueType();
3838
3839 // Validate constants ...
3840
3841 APInt I1 = C1->getAPIntValue();
3842
3843 ISD::CondCode NewCond;
3844 if (Cond == ISD::CondCode::SETULT) {
3845 NewCond = ISD::CondCode::SETEQ;
3846 } else if (Cond == ISD::CondCode::SETULE) {
3847 NewCond = ISD::CondCode::SETEQ;
3848 // But need to 'canonicalize' the constant.
3849 I1 += 1;
3850 } else if (Cond == ISD::CondCode::SETUGT) {
3851 NewCond = ISD::CondCode::SETNE;
3852 // But need to 'canonicalize' the constant.
3853 I1 += 1;
3854 } else if (Cond == ISD::CondCode::SETUGE) {
3855 NewCond = ISD::CondCode::SETNE;
3856 } else
3857 return SDValue();
3858
3859 APInt I01 = C01->getAPIntValue();
3860
3861 auto checkConstants = [&I1, &I01]() -> bool {
3862 // Both of them must be power-of-two, and the constant from setcc is bigger.
3863 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3864 };
3865
3866 if (checkConstants()) {
3867 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3868 } else {
3869 // What if we invert constants? (and the target predicate)
3870 I1.negate();
3871 I01.negate();
3872 assert(XVT.isInteger());
3873 NewCond = getSetCCInverse(NewCond, XVT);
3874 if (!checkConstants())
3875 return SDValue();
3876 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3877 }
3878
3879 // They are power-of-two, so which bit is set?
3880 const unsigned KeptBits = I1.logBase2();
3881 const unsigned KeptBitsMinusOne = I01.logBase2();
3882
3883 // Magic!
3884 if (KeptBits != (KeptBitsMinusOne + 1))
3885 return SDValue();
3886 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3887
3888 // We don't want to do this in every single case.
3889 SelectionDAG &DAG = DCI.DAG;
3891 XVT, KeptBits))
3892 return SDValue();
3893
3894 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3895 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3896
3897 // Unfold into: ((%x << C) a>> C) cond %x
3898 // Where 'cond' will be either 'eq' or 'ne'.
3899 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3900 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3901 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3902 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3903
3904 return T2;
3905}
3906
3907// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3908SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3909 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3910 DAGCombinerInfo &DCI, const SDLoc &DL) const {
3912 "Should be a comparison with 0.");
3913 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3914 "Valid only for [in]equality comparisons.");
3915
3916 unsigned NewShiftOpcode;
3917 SDValue X, C, Y;
3918
3919 SelectionDAG &DAG = DCI.DAG;
3920 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3921
3922 // Look for '(C l>>/<< Y)'.
3923 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3924 // The shift should be one-use.
3925 if (!V.hasOneUse())
3926 return false;
3927 unsigned OldShiftOpcode = V.getOpcode();
3928 switch (OldShiftOpcode) {
3929 case ISD::SHL:
3930 NewShiftOpcode = ISD::SRL;
3931 break;
3932 case ISD::SRL:
3933 NewShiftOpcode = ISD::SHL;
3934 break;
3935 default:
3936 return false; // must be a logical shift.
3937 }
3938 // We should be shifting a constant.
3939 // FIXME: best to use isConstantOrConstantVector().
3940 C = V.getOperand(0);
3942 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3943 if (!CC)
3944 return false;
3945 Y = V.getOperand(1);
3946
3948 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3949 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3950 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3951 };
3952
3953 // LHS of comparison should be an one-use 'and'.
3954 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3955 return SDValue();
3956
3957 X = N0.getOperand(0);
3958 SDValue Mask = N0.getOperand(1);
3959
3960 // 'and' is commutative!
3961 if (!Match(Mask)) {
3962 std::swap(X, Mask);
3963 if (!Match(Mask))
3964 return SDValue();
3965 }
3966
3967 EVT VT = X.getValueType();
3968
3969 // Produce:
3970 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3971 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3972 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3973 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3974 return T2;
3975}
3976
3977/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3978/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3979/// handle the commuted versions of these patterns.
3980SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3981 ISD::CondCode Cond, const SDLoc &DL,
3982 DAGCombinerInfo &DCI) const {
3983 unsigned BOpcode = N0.getOpcode();
3984 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3985 "Unexpected binop");
3986 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3987
3988 // (X + Y) == X --> Y == 0
3989 // (X - Y) == X --> Y == 0
3990 // (X ^ Y) == X --> Y == 0
3991 SelectionDAG &DAG = DCI.DAG;
3992 EVT OpVT = N0.getValueType();
3993 SDValue X = N0.getOperand(0);
3994 SDValue Y = N0.getOperand(1);
3995 if (X == N1)
3996 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3997
3998 if (Y != N1)
3999 return SDValue();
4000
4001 // (X + Y) == Y --> X == 0
4002 // (X ^ Y) == Y --> X == 0
4003 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4004 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4005
4006 // The shift would not be valid if the operands are boolean (i1).
4007 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4008 return SDValue();
4009
4010 // (X - Y) == Y --> X == Y << 1
4011 EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
4012 !DCI.isBeforeLegalize());
4013 SDValue One = DAG.getConstant(1, DL, ShiftVT);
4014 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4015 if (!DCI.isCalledByLegalizer())
4016 DCI.AddToWorklist(YShl1.getNode());
4017 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4018}
4019
4021 SDValue N0, const APInt &C1,
4022 ISD::CondCode Cond, const SDLoc &dl,
4023 SelectionDAG &DAG) {
4024 // Look through truncs that don't change the value of a ctpop.
4025 // FIXME: Add vector support? Need to be careful with setcc result type below.
4026 SDValue CTPOP = N0;
4027 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4029 CTPOP = N0.getOperand(0);
4030
4031 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4032 return SDValue();
4033
4034 EVT CTVT = CTPOP.getValueType();
4035 SDValue CTOp = CTPOP.getOperand(0);
4036
4037 // Expand a power-of-2-or-zero comparison based on ctpop:
4038 // (ctpop x) u< 2 -> (x & x-1) == 0
4039 // (ctpop x) u> 1 -> (x & x-1) != 0
4040 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4041 // Keep the CTPOP if it is a legal vector op.
4042 if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
4043 return SDValue();
4044
4045 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4046 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4047 return SDValue();
4048 if (C1 == 0 && (Cond == ISD::SETULT))
4049 return SDValue(); // This is handled elsewhere.
4050
4051 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4052
4053 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4054 SDValue Result = CTOp;
4055 for (unsigned i = 0; i < Passes; i++) {
4056 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4057 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4058 }
4060 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4061 }
4062
4063 // Expand a power-of-2 comparison based on ctpop:
4064 // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
4065 // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
4066 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4067 // Keep the CTPOP if it is legal.
4068 if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
4069 return SDValue();
4070
4071 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4072 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4073 assert(CTVT.isInteger());
4074 ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
4075 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4076 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4077 SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
4078 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4079 unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
4080 return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
4081 }
4082
4083 return SDValue();
4084}
4085
4087 ISD::CondCode Cond, const SDLoc &dl,
4088 SelectionDAG &DAG) {
4089 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4090 return SDValue();
4091
4092 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4093 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4094 return SDValue();
4095
4096 auto getRotateSource = [](SDValue X) {
4097 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4098 return X.getOperand(0);
4099 return SDValue();
4100 };
4101
4102 // Peek through a rotated value compared against 0 or -1:
4103 // (rot X, Y) == 0/-1 --> X == 0/-1
4104 // (rot X, Y) != 0/-1 --> X != 0/-1
4105 if (SDValue R = getRotateSource(N0))
4106 return DAG.getSetCC(dl, VT, R, N1, Cond);
4107
4108 // Peek through an 'or' of a rotated value compared against 0:
4109 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4110 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4111 //
4112 // TODO: Add the 'and' with -1 sibling.
4113 // TODO: Recurse through a series of 'or' ops to find the rotate.
4114 EVT OpVT = N0.getValueType();
4115 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4116 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4117 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4118 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4119 }
4120 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4121 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4122 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4123 }
4124 }
4125
4126 return SDValue();
4127}
4128
4130 ISD::CondCode Cond, const SDLoc &dl,
4131 SelectionDAG &DAG) {
4132 // If we are testing for all-bits-clear, we might be able to do that with
4133 // less shifting since bit-order does not matter.
4134 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4135 return SDValue();
4136
4137 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4138 if (!C1 || !C1->isZero())
4139 return SDValue();
4140
4141 if (!N0.hasOneUse() ||
4142 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4143 return SDValue();
4144
4145 unsigned BitWidth = N0.getScalarValueSizeInBits();
4146 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4147 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4148 return SDValue();
4149
4150 // Canonicalize fshr as fshl to reduce pattern-matching.
4151 unsigned ShAmt = ShAmtC->getZExtValue();
4152 if (N0.getOpcode() == ISD::FSHR)
4153 ShAmt = BitWidth - ShAmt;
4154
4155 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4156 SDValue X, Y;
4157 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4158 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4159 return false;
4160 if (Or.getOperand(0) == Other) {
4161 X = Or.getOperand(0);
4162 Y = Or.getOperand(1);
4163 return true;
4164 }
4165 if (Or.getOperand(1) == Other) {
4166 X = Or.getOperand(1);
4167 Y = Or.getOperand(0);
4168 return true;
4169 }
4170 return false;
4171 };
4172
4173 EVT OpVT = N0.getValueType();
4174 EVT ShAmtVT = N0.getOperand(2).getValueType();
4175 SDValue F0 = N0.getOperand(0);
4176 SDValue F1 = N0.getOperand(1);
4177 if (matchOr(F0, F1)) {
4178 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4179 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4180 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4181 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4182 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4183 }
4184 if (matchOr(F1, F0)) {
4185 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4186 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4187 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4188 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4189 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4190 }
4191
4192 return SDValue();
4193}
4194
4195/// Try to simplify a setcc built with the specified operands and cc. If it is
4196/// unable to simplify it, return a null SDValue.
4198 ISD::CondCode Cond, bool foldBooleans,
4199 DAGCombinerInfo &DCI,
4200 const SDLoc &dl) const {
4201 SelectionDAG &DAG = DCI.DAG;
4202 const DataLayout &Layout = DAG.getDataLayout();
4203 EVT OpVT = N0.getValueType();
4205
4206 // Constant fold or commute setcc.
4207 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4208 return Fold;
4209
4210 bool N0ConstOrSplat =
4211 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4212 bool N1ConstOrSplat =
4213 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4214
4215 // Ensure that the constant occurs on the RHS and fold constant comparisons.
4216 // TODO: Handle non-splat vector constants. All undef causes trouble.
4217 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4218 // infinite loop here when we encounter one.
4220 if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
4221 (DCI.isBeforeLegalizeOps() ||
4222 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4223 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4224
4225 // If we have a subtract with the same 2 non-constant operands as this setcc
4226 // -- but in reverse order -- then try to commute the operands of this setcc
4227 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4228 // instruction on some targets.
4229 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4230 (DCI.isBeforeLegalizeOps() ||
4231 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4232 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4233 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4234 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4235
4236 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4237 return V;
4238
4239 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4240 return V;
4241
4242 if (auto *N1C = isConstOrConstSplat(N1)) {
4243 const APInt &C1 = N1C->getAPIntValue();
4244
4245 // Optimize some CTPOP cases.
4246 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4247 return V;
4248
4249 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4250 // X * Y == 0 --> (X == 0) || (Y == 0)
4251 // X * Y != 0 --> (X != 0) && (Y != 0)
4252 // TODO: This bails out if minsize is set, but if the target doesn't have a
4253 // single instruction multiply for this type, it would likely be
4254 // smaller to decompose.
4255 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4256 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4257 (N0->getFlags().hasNoUnsignedWrap() ||
4258 N0->getFlags().hasNoSignedWrap()) &&
4259 !Attr.hasFnAttr(Attribute::MinSize)) {
4260 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4261 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4262 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4263 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4264 }
4265
4266 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4267 // equality comparison, then we're just comparing whether X itself is
4268 // zero.
4269 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4270 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4271 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4272 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4273 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4274 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4275 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4276 // (srl (ctlz x), 5) == 0 -> X != 0
4277 // (srl (ctlz x), 5) != 1 -> X != 0
4278 Cond = ISD::SETNE;
4279 } else {
4280 // (srl (ctlz x), 5) != 0 -> X == 0
4281 // (srl (ctlz x), 5) == 1 -> X == 0
4282 Cond = ISD::SETEQ;
4283 }
4284 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4285 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4286 Cond);
4287 }
4288 }
4289 }
4290 }
4291
4292 // FIXME: Support vectors.
4293 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4294 const APInt &C1 = N1C->getAPIntValue();
4295
4296 // (zext x) == C --> x == (trunc C)
4297 // (sext x) == C --> x == (trunc C)
4298 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4299 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4300 unsigned MinBits = N0.getValueSizeInBits();
4301 SDValue PreExt;
4302 bool Signed = false;
4303 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4304 // ZExt
4305 MinBits = N0->getOperand(0).getValueSizeInBits();
4306 PreExt = N0->getOperand(0);
4307 } else if (N0->getOpcode() == ISD::AND) {
4308 // DAGCombine turns costly ZExts into ANDs
4309 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4310 if ((C->getAPIntValue()+1).isPowerOf2()) {
4311 MinBits = C->getAPIntValue().countr_one();
4312 PreExt = N0->getOperand(0);
4313 }
4314 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4315 // SExt
4316 MinBits = N0->getOperand(0).getValueSizeInBits();
4317 PreExt = N0->getOperand(0);
4318 Signed = true;
4319 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4320 // ZEXTLOAD / SEXTLOAD
4321 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4322 MinBits = LN0->getMemoryVT().getSizeInBits();
4323 PreExt = N0;
4324 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4325 Signed = true;
4326 MinBits = LN0->getMemoryVT().getSizeInBits();
4327 PreExt = N0;
4328 }
4329 }
4330
4331 // Figure out how many bits we need to preserve this constant.
4332 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4333
4334 // Make sure we're not losing bits from the constant.
4335 if (MinBits > 0 &&
4336 MinBits < C1.getBitWidth() &&
4337 MinBits >= ReqdBits) {
4338 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4339 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4340 // Will get folded away.
4341 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4342 if (MinBits == 1 && C1 == 1)
4343 // Invert the condition.
4344 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4346 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4347 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4348 }
4349
4350 // If truncating the setcc operands is not desirable, we can still
4351 // simplify the expression in some cases:
4352 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4353 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4354 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4355 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4356 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4357 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4358 SDValue TopSetCC = N0->getOperand(0);
4359 unsigned N0Opc = N0->getOpcode();
4360 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4361 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4362 TopSetCC.getOpcode() == ISD::SETCC &&
4363 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4364 (isConstFalseVal(N1) ||
4365 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4366
4367 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4368 (!N1C->isZero() && Cond == ISD::SETNE);
4369
4370 if (!Inverse)
4371 return TopSetCC;
4372
4374 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4375 TopSetCC.getOperand(0).getValueType());
4376 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4377 TopSetCC.getOperand(1),
4378 InvCond);
4379 }
4380 }
4381 }
4382
4383 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4384 // equality or unsigned, and all 1 bits of the const are in the same
4385 // partial word, see if we can shorten the load.
4386 if (DCI.isBeforeLegalize() &&
4388 N0.getOpcode() == ISD::AND && C1 == 0 &&
4389 N0.getNode()->hasOneUse() &&
4390 isa<LoadSDNode>(N0.getOperand(0)) &&
4391 N0.getOperand(0).getNode()->hasOneUse() &&
4392 isa<ConstantSDNode>(N0.getOperand(1))) {
4393 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4394 APInt bestMask;
4395 unsigned bestWidth = 0, bestOffset = 0;
4396 if (Lod->isSimple() && Lod->isUnindexed()) {
4397 unsigned origWidth = N0.getValueSizeInBits();
4398 unsigned maskWidth = origWidth;
4399 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4400 // 8 bits, but have to be careful...
4401 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4402 origWidth = Lod->getMemoryVT().getSizeInBits();
4403 const APInt &Mask = N0.getConstantOperandAPInt(1);
4404 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4405 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4406 for (unsigned offset=0; offset<origWidth/width; offset++) {
4407 if (Mask.isSubsetOf(newMask)) {
4408 if (Layout.isLittleEndian())
4409 bestOffset = (uint64_t)offset * (width/8);
4410 else
4411 bestOffset = (origWidth/width - offset - 1) * (width/8);
4412 bestMask = Mask.lshr(offset * (width/8) * 8);
4413 bestWidth = width;
4414 break;
4415 }
4416 newMask <<= width;
4417 }
4418 }
4419 }
4420 if (bestWidth) {
4421 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4422 if (newVT.isRound() &&
4424 SDValue Ptr = Lod->getBasePtr();
4425 if (bestOffset != 0)
4426 Ptr =
4427 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4428 SDValue NewLoad =
4429 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4430 Lod->getPointerInfo().getWithOffset(bestOffset),
4431 Lod->getOriginalAlign());
4432 return DAG.getSetCC(dl, VT,
4433 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4434 DAG.getConstant(bestMask.trunc(bestWidth),
4435