LLVM  16.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCExpr.h"
32 #include "llvm/Support/KnownBits.h"
35 #include <cctype>
36 using namespace llvm;
37 
38 /// NOTE: The TargetMachine owns TLOF.
40  : TargetLoweringBase(tm) {}
41 
42 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43  return nullptr;
44 }
45 
48 }
49 
50 /// Check whether a given call node is in tail position within its function. If
51 /// so, it sets Chain to the input chain of the tail call.
53  SDValue &Chain) const {
54  const Function &F = DAG.getMachineFunction().getFunction();
55 
56  // First, check if tail calls have been disabled in this function.
57  if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
58  return false;
59 
60  // Conservatively require the attributes of the call to match those of
61  // the return. Ignore following attributes because they don't affect the
62  // call sequence.
63  AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
64  for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
65  Attribute::DereferenceableOrNull, Attribute::NoAlias,
66  Attribute::NonNull, Attribute::NoUndef})
67  CallerAttrs.removeAttribute(Attr);
68 
69  if (CallerAttrs.hasAttributes())
70  return false;
71 
72  // It's not safe to eliminate the sign / zero extension of the return value.
73  if (CallerAttrs.contains(Attribute::ZExt) ||
74  CallerAttrs.contains(Attribute::SExt))
75  return false;
76 
77  // Check if the only use is a function return node.
78  return isUsedByReturnOnly(Node, Chain);
79 }
80 
82  const uint32_t *CallerPreservedMask,
83  const SmallVectorImpl<CCValAssign> &ArgLocs,
84  const SmallVectorImpl<SDValue> &OutVals) const {
85  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
86  const CCValAssign &ArgLoc = ArgLocs[I];
87  if (!ArgLoc.isRegLoc())
88  continue;
89  MCRegister Reg = ArgLoc.getLocReg();
90  // Only look at callee saved registers.
91  if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
92  continue;
93  // Check that we pass the value used for the caller.
94  // (We look for a CopyFromReg reading a virtual register that is used
95  // for the function live-in value of register Reg)
96  SDValue Value = OutVals[I];
97  if (Value->getOpcode() == ISD::AssertZext)
98  Value = Value.getOperand(0);
99  if (Value->getOpcode() != ISD::CopyFromReg)
100  return false;
101  Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
102  if (MRI.getLiveInPhysReg(ArgReg) != Reg)
103  return false;
104  }
105  return true;
106 }
107 
108 /// Set CallLoweringInfo attribute flags based on a call instruction
109 /// and called function attributes.
111  unsigned ArgIdx) {
112  IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
113  IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
114  IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
115  IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
116  IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
117  IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
118  IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
119  IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
120  IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
121  IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
122  IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
123  IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
124  Alignment = Call->getParamStackAlign(ArgIdx);
125  IndirectType = nullptr;
127  "multiple ABI attributes?");
128  if (IsByVal) {
129  IndirectType = Call->getParamByValType(ArgIdx);
130  if (!Alignment)
131  Alignment = Call->getParamAlign(ArgIdx);
132  }
133  if (IsPreallocated)
134  IndirectType = Call->getParamPreallocatedType(ArgIdx);
135  if (IsInAlloca)
136  IndirectType = Call->getParamInAllocaType(ArgIdx);
137  if (IsSRet)
138  IndirectType = Call->getParamStructRetType(ArgIdx);
139 }
140 
141 /// Generate a libcall taking the given operands as arguments and returning a
142 /// result of type RetVT.
143 std::pair<SDValue, SDValue>
145  ArrayRef<SDValue> Ops,
146  MakeLibCallOptions CallOptions,
147  const SDLoc &dl,
148  SDValue InChain) const {
149  if (!InChain)
150  InChain = DAG.getEntryNode();
151 
153  Args.reserve(Ops.size());
154 
156  for (unsigned i = 0; i < Ops.size(); ++i) {
157  SDValue NewOp = Ops[i];
158  Entry.Node = NewOp;
159  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
160  Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
161  CallOptions.IsSExt);
162  Entry.IsZExt = !Entry.IsSExt;
163 
164  if (CallOptions.IsSoften &&
166  Entry.IsSExt = Entry.IsZExt = false;
167  }
168  Args.push_back(Entry);
169  }
170 
171  if (LC == RTLIB::UNKNOWN_LIBCALL)
172  report_fatal_error("Unsupported library call operation!");
174  getPointerTy(DAG.getDataLayout()));
175 
176  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
178  bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
179  bool zeroExtend = !signExtend;
180 
181  if (CallOptions.IsSoften &&
183  signExtend = zeroExtend = false;
184  }
185 
186  CLI.setDebugLoc(dl)
187  .setChain(InChain)
189  .setNoReturn(CallOptions.DoesNotReturn)
190  .setDiscardResult(!CallOptions.IsReturnValueUsed)
192  .setSExtResult(signExtend)
193  .setZExtResult(zeroExtend);
194  return LowerCallTo(CLI);
195 }
196 
198  std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
199  unsigned SrcAS, const AttributeList &FuncAttributes) const {
200  if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
201  Op.getSrcAlign() < Op.getDstAlign())
202  return false;
203 
204  EVT VT = getOptimalMemOpType(Op, FuncAttributes);
205 
206  if (VT == MVT::Other) {
207  // Use the largest integer type whose alignment constraints are satisfied.
208  // We only need to check DstAlign here as SrcAlign is always greater or
209  // equal to DstAlign (or zero).
210  VT = MVT::i64;
211  if (Op.isFixedDstAlign())
212  while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
213  !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
214  VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
215  assert(VT.isInteger());
216 
217  // Find the largest legal integer type.
218  MVT LVT = MVT::i64;
219  while (!isTypeLegal(LVT))
220  LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
221  assert(LVT.isInteger());
222 
223  // If the type we've chosen is larger than the largest legal integer type
224  // then use that instead.
225  if (VT.bitsGT(LVT))
226  VT = LVT;
227  }
228 
229  unsigned NumMemOps = 0;
230  uint64_t Size = Op.size();
231  while (Size) {
232  unsigned VTSize = VT.getSizeInBits() / 8;
233  while (VTSize > Size) {
234  // For now, only use non-vector load / store's for the left-over pieces.
235  EVT NewVT = VT;
236  unsigned NewVTSize;
237 
238  bool Found = false;
239  if (VT.isVector() || VT.isFloatingPoint()) {
240  NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
241  if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
242  isSafeMemOpType(NewVT.getSimpleVT()))
243  Found = true;
244  else if (NewVT == MVT::i64 &&
247  // i64 is usually not legal on 32-bit targets, but f64 may be.
248  NewVT = MVT::f64;
249  Found = true;
250  }
251  }
252 
253  if (!Found) {
254  do {
255  NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
256  if (NewVT == MVT::i8)
257  break;
258  } while (!isSafeMemOpType(NewVT.getSimpleVT()));
259  }
260  NewVTSize = NewVT.getSizeInBits() / 8;
261 
262  // If the new VT cannot cover all of the remaining bits, then consider
263  // issuing a (or a pair of) unaligned and overlapping load / store.
264  unsigned Fast;
265  if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267  VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
268  MachineMemOperand::MONone, &Fast) &&
269  Fast)
270  VTSize = Size;
271  else {
272  VT = NewVT;
273  VTSize = NewVTSize;
274  }
275  }
276 
277  if (++NumMemOps > Limit)
278  return false;
279 
280  MemOps.push_back(VT);
281  Size -= VTSize;
282  }
283 
284  return true;
285 }
286 
287 /// Soften the operands of a comparison. This code is shared among BR_CC,
288 /// SELECT_CC, and SETCC handlers.
290  SDValue &NewLHS, SDValue &NewRHS,
291  ISD::CondCode &CCCode,
292  const SDLoc &dl, const SDValue OldLHS,
293  const SDValue OldRHS) const {
294  SDValue Chain;
295  return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
296  OldRHS, Chain);
297 }
298 
300  SDValue &NewLHS, SDValue &NewRHS,
301  ISD::CondCode &CCCode,
302  const SDLoc &dl, const SDValue OldLHS,
303  const SDValue OldRHS,
304  SDValue &Chain,
305  bool IsSignaling) const {
306  // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
307  // not supporting it. We can update this code when libgcc provides such
308  // functions.
309 
310  assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
311  && "Unsupported setcc type!");
312 
313  // Expand into one or more soft-fp libcall(s).
314  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
315  bool ShouldInvertCC = false;
316  switch (CCCode) {
317  case ISD::SETEQ:
318  case ISD::SETOEQ:
319  LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
320  (VT == MVT::f64) ? RTLIB::OEQ_F64 :
321  (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
322  break;
323  case ISD::SETNE:
324  case ISD::SETUNE:
325  LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
326  (VT == MVT::f64) ? RTLIB::UNE_F64 :
327  (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
328  break;
329  case ISD::SETGE:
330  case ISD::SETOGE:
331  LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
332  (VT == MVT::f64) ? RTLIB::OGE_F64 :
333  (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
334  break;
335  case ISD::SETLT:
336  case ISD::SETOLT:
337  LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
338  (VT == MVT::f64) ? RTLIB::OLT_F64 :
339  (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
340  break;
341  case ISD::SETLE:
342  case ISD::SETOLE:
343  LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
344  (VT == MVT::f64) ? RTLIB::OLE_F64 :
345  (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
346  break;
347  case ISD::SETGT:
348  case ISD::SETOGT:
349  LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
350  (VT == MVT::f64) ? RTLIB::OGT_F64 :
351  (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
352  break;
353  case ISD::SETO:
354  ShouldInvertCC = true;
355  [[fallthrough]];
356  case ISD::SETUO:
357  LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
358  (VT == MVT::f64) ? RTLIB::UO_F64 :
359  (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
360  break;
361  case ISD::SETONE:
362  // SETONE = O && UNE
363  ShouldInvertCC = true;
364  [[fallthrough]];
365  case ISD::SETUEQ:
366  LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
367  (VT == MVT::f64) ? RTLIB::UO_F64 :
368  (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
369  LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
370  (VT == MVT::f64) ? RTLIB::OEQ_F64 :
371  (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
372  break;
373  default:
374  // Invert CC for unordered comparisons
375  ShouldInvertCC = true;
376  switch (CCCode) {
377  case ISD::SETULT:
378  LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
379  (VT == MVT::f64) ? RTLIB::OGE_F64 :
380  (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
381  break;
382  case ISD::SETULE:
383  LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
384  (VT == MVT::f64) ? RTLIB::OGT_F64 :
385  (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
386  break;
387  case ISD::SETUGT:
388  LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
389  (VT == MVT::f64) ? RTLIB::OLE_F64 :
390  (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
391  break;
392  case ISD::SETUGE:
393  LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
394  (VT == MVT::f64) ? RTLIB::OLT_F64 :
395  (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
396  break;
397  default: llvm_unreachable("Do not know how to soften this setcc!");
398  }
399  }
400 
401  // Use the target specific return value for comparison lib calls.
402  EVT RetVT = getCmpLibcallReturnType();
403  SDValue Ops[2] = {NewLHS, NewRHS};
405  EVT OpsVT[2] = { OldLHS.getValueType(),
406  OldRHS.getValueType() };
407  CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
408  auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
409  NewLHS = Call.first;
410  NewRHS = DAG.getConstant(0, dl, RetVT);
411 
412  CCCode = getCmpLibcallCC(LC1);
413  if (ShouldInvertCC) {
414  assert(RetVT.isInteger());
415  CCCode = getSetCCInverse(CCCode, RetVT);
416  }
417 
418  if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
419  // Update Chain.
420  Chain = Call.second;
421  } else {
422  EVT SetCCVT =
423  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
424  SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
425  auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
426  CCCode = getCmpLibcallCC(LC2);
427  if (ShouldInvertCC)
428  CCCode = getSetCCInverse(CCCode, RetVT);
429  NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
430  if (Chain)
431  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
432  Call2.second);
433  NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
434  Tmp.getValueType(), Tmp, NewLHS);
435  NewRHS = SDValue();
436  }
437 }
438 
439 /// Return the entry encoding for a jump table in the current function. The
440 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442  // In non-pic modes, just use the address of a block.
443  if (!isPositionIndependent())
445 
446  // In PIC mode, if the target supports a GPRel32 directive, use it.
447  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449 
450  // Otherwise, use a label difference.
452 }
453 
455  SelectionDAG &DAG) const {
456  // If our PIC model is GP relative, use the global offset table as the base.
457  unsigned JTEncoding = getJumpTableEncoding();
458 
459  if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
462 
463  return Table;
464 }
465 
466 /// This returns the relocation base for the given PIC jumptable, the same as
467 /// getPICJumpTableRelocBase, but as an MCExpr.
468 const MCExpr *
470  unsigned JTI,MCContext &Ctx) const{
471  // The normal PIC reloc base is the label at the start of the jump table.
472  return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
473 }
474 
475 bool
477  const TargetMachine &TM = getTargetMachine();
478  const GlobalValue *GV = GA->getGlobal();
479 
480  // If the address is not even local to this DSO we will have to load it from
481  // a got and then add the offset.
482  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
483  return false;
484 
485  // If the code is position independent we will have to add a base register.
486  if (isPositionIndependent())
487  return false;
488 
489  // Otherwise we can do it.
490  return true;
491 }
492 
493 //===----------------------------------------------------------------------===//
494 // Optimization Methods
495 //===----------------------------------------------------------------------===//
496 
497 /// If the specified instruction has a constant integer operand and there are
498 /// bits set in that constant that are not demanded, then clear those bits and
499 /// return true.
501  const APInt &DemandedBits,
502  const APInt &DemandedElts,
503  TargetLoweringOpt &TLO) const {
504  SDLoc DL(Op);
505  unsigned Opcode = Op.getOpcode();
506 
507  // Do target-specific constant optimization.
508  if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
509  return TLO.New.getNode();
510 
511  // FIXME: ISD::SELECT, ISD::SELECT_CC
512  switch (Opcode) {
513  default:
514  break;
515  case ISD::XOR:
516  case ISD::AND:
517  case ISD::OR: {
518  auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
519  if (!Op1C || Op1C->isOpaque())
520  return false;
521 
522  // If this is a 'not' op, don't touch it because that's a canonical form.
523  const APInt &C = Op1C->getAPIntValue();
524  if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
525  return false;
526 
527  if (!C.isSubsetOf(DemandedBits)) {
528  EVT VT = Op.getValueType();
529  SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
530  SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
531  return TLO.CombineTo(Op, NewOp);
532  }
533 
534  break;
535  }
536  }
537 
538  return false;
539 }
540 
542  const APInt &DemandedBits,
543  TargetLoweringOpt &TLO) const {
544  EVT VT = Op.getValueType();
545  APInt DemandedElts = VT.isVector()
547  : APInt(1, 1);
548  return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
549 }
550 
551 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
552 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
553 /// generalized for targets with other types of implicit widening casts.
555  const APInt &Demanded,
556  TargetLoweringOpt &TLO) const {
557  assert(Op.getNumOperands() == 2 &&
558  "ShrinkDemandedOp only supports binary operators!");
559  assert(Op.getNode()->getNumValues() == 1 &&
560  "ShrinkDemandedOp only supports nodes with one result!");
561 
562  SelectionDAG &DAG = TLO.DAG;
563  SDLoc dl(Op);
564 
565  // Early return, as this function cannot handle vector types.
566  if (Op.getValueType().isVector())
567  return false;
568 
569  // Don't do this if the node has another user, which may require the
570  // full value.
571  if (!Op.getNode()->hasOneUse())
572  return false;
573 
574  // Search for the smallest integer type with free casts to and from
575  // Op's type. For expedience, just check power-of-2 integer types.
576  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
577  unsigned DemandedSize = Demanded.getActiveBits();
578  unsigned SmallVTBits = DemandedSize;
579  if (!isPowerOf2_32(SmallVTBits))
580  SmallVTBits = NextPowerOf2(SmallVTBits);
581  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
582  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
583  if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
584  TLI.isZExtFree(SmallVT, Op.getValueType())) {
585  // We found a type with free casts.
586  SDValue X = DAG.getNode(
587  Op.getOpcode(), dl, SmallVT,
588  DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
589  DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
590  assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
591  SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
592  return TLO.CombineTo(Op, Z);
593  }
594  }
595  return false;
596 }
597 
599  DAGCombinerInfo &DCI) const {
600  SelectionDAG &DAG = DCI.DAG;
601  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
602  !DCI.isBeforeLegalizeOps());
603  KnownBits Known;
604 
605  bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
606  if (Simplified) {
607  DCI.AddToWorklist(Op.getNode());
608  DCI.CommitTargetLoweringOpt(TLO);
609  }
610  return Simplified;
611 }
612 
614  const APInt &DemandedElts,
615  DAGCombinerInfo &DCI) const {
616  SelectionDAG &DAG = DCI.DAG;
617  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
618  !DCI.isBeforeLegalizeOps());
619  KnownBits Known;
620 
621  bool Simplified =
622  SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
623  if (Simplified) {
624  DCI.AddToWorklist(Op.getNode());
625  DCI.CommitTargetLoweringOpt(TLO);
626  }
627  return Simplified;
628 }
629 
631  KnownBits &Known,
632  TargetLoweringOpt &TLO,
633  unsigned Depth,
634  bool AssumeSingleUse) const {
635  EVT VT = Op.getValueType();
636 
637  // TODO: We can probably do more work on calculating the known bits and
638  // simplifying the operations for scalable vectors, but for now we just
639  // bail out.
640  if (VT.isScalableVector()) {
641  // Pretend we don't know anything for now.
642  Known = KnownBits(DemandedBits.getBitWidth());
643  return false;
644  }
645 
646  APInt DemandedElts = VT.isVector()
648  : APInt(1, 1);
649  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
650  AssumeSingleUse);
651 }
652 
653 // TODO: Under what circumstances can we create nodes? Constant folding?
655  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
656  SelectionDAG &DAG, unsigned Depth) const {
657  EVT VT = Op.getValueType();
658 
659  // Pretend we don't know anything about scalable vectors for now.
660  // TODO: We can probably do more work on simplifying the operations for
661  // scalable vectors, but for now we just bail out.
662  if (VT.isScalableVector())
663  return SDValue();
664 
665  // Limit search depth.
667  return SDValue();
668 
669  // Ignore UNDEFs.
670  if (Op.isUndef())
671  return SDValue();
672 
673  // Not demanding any bits/elts from Op.
674  if (DemandedBits == 0 || DemandedElts == 0)
675  return DAG.getUNDEF(VT);
676 
677  bool IsLE = DAG.getDataLayout().isLittleEndian();
678  unsigned NumElts = DemandedElts.getBitWidth();
679  unsigned BitWidth = DemandedBits.getBitWidth();
680  KnownBits LHSKnown, RHSKnown;
681  switch (Op.getOpcode()) {
682  case ISD::BITCAST: {
683  SDValue Src = peekThroughBitcasts(Op.getOperand(0));
684  EVT SrcVT = Src.getValueType();
685  EVT DstVT = Op.getValueType();
686  if (SrcVT == DstVT)
687  return Src;
688 
689  unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
690  unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
691  if (NumSrcEltBits == NumDstEltBits)
692  if (SDValue V = SimplifyMultipleUseDemandedBits(
693  Src, DemandedBits, DemandedElts, DAG, Depth + 1))
694  return DAG.getBitcast(DstVT, V);
695 
696  if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
697  unsigned Scale = NumDstEltBits / NumSrcEltBits;
698  unsigned NumSrcElts = SrcVT.getVectorNumElements();
699  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
700  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
701  for (unsigned i = 0; i != Scale; ++i) {
702  unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
703  unsigned BitOffset = EltOffset * NumSrcEltBits;
704  APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
705  if (!Sub.isZero()) {
706  DemandedSrcBits |= Sub;
707  for (unsigned j = 0; j != NumElts; ++j)
708  if (DemandedElts[j])
709  DemandedSrcElts.setBit((j * Scale) + i);
710  }
711  }
712 
713  if (SDValue V = SimplifyMultipleUseDemandedBits(
714  Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
715  return DAG.getBitcast(DstVT, V);
716  }
717 
718  // TODO - bigendian once we have test coverage.
719  if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
720  unsigned Scale = NumSrcEltBits / NumDstEltBits;
721  unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
722  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
723  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
724  for (unsigned i = 0; i != NumElts; ++i)
725  if (DemandedElts[i]) {
726  unsigned Offset = (i % Scale) * NumDstEltBits;
727  DemandedSrcBits.insertBits(DemandedBits, Offset);
728  DemandedSrcElts.setBit(i / Scale);
729  }
730 
731  if (SDValue V = SimplifyMultipleUseDemandedBits(
732  Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
733  return DAG.getBitcast(DstVT, V);
734  }
735 
736  break;
737  }
738  case ISD::AND: {
739  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
740  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
741 
742  // If all of the demanded bits are known 1 on one side, return the other.
743  // These bits cannot contribute to the result of the 'and' in this
744  // context.
745  if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
746  return Op.getOperand(0);
747  if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
748  return Op.getOperand(1);
749  break;
750  }
751  case ISD::OR: {
752  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
753  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
754 
755  // If all of the demanded bits are known zero on one side, return the
756  // other. These bits cannot contribute to the result of the 'or' in this
757  // context.
758  if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
759  return Op.getOperand(0);
760  if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
761  return Op.getOperand(1);
762  break;
763  }
764  case ISD::XOR: {
765  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
766  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
767 
768  // If all of the demanded bits are known zero on one side, return the
769  // other.
770  if (DemandedBits.isSubsetOf(RHSKnown.Zero))
771  return Op.getOperand(0);
772  if (DemandedBits.isSubsetOf(LHSKnown.Zero))
773  return Op.getOperand(1);
774  break;
775  }
776  case ISD::SHL: {
777  // If we are only demanding sign bits then we can use the shift source
778  // directly.
779  if (const APInt *MaxSA =
780  DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
781  SDValue Op0 = Op.getOperand(0);
782  unsigned ShAmt = MaxSA->getZExtValue();
783  unsigned NumSignBits =
784  DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
785  unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
786  if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
787  return Op0;
788  }
789  break;
790  }
791  case ISD::SETCC: {
792  SDValue Op0 = Op.getOperand(0);
793  SDValue Op1 = Op.getOperand(1);
794  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
795  // If (1) we only need the sign-bit, (2) the setcc operands are the same
796  // width as the setcc result, and (3) the result of a setcc conforms to 0 or
797  // -1, we may be able to bypass the setcc.
798  if (DemandedBits.isSignMask() &&
801  BooleanContent::ZeroOrNegativeOneBooleanContent) {
802  // If we're testing X < 0, then this compare isn't needed - just use X!
803  // FIXME: We're limiting to integer types here, but this should also work
804  // if we don't care about FP signed-zero. The use of SETLT with FP means
805  // that we don't care about NaNs.
806  if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
808  return Op0;
809  }
810  break;
811  }
812  case ISD::SIGN_EXTEND_INREG: {
813  // If none of the extended bits are demanded, eliminate the sextinreg.
814  SDValue Op0 = Op.getOperand(0);
815  EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
816  unsigned ExBits = ExVT.getScalarSizeInBits();
817  if (DemandedBits.getActiveBits() <= ExBits)
818  return Op0;
819  // If the input is already sign extended, just drop the extension.
820  unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
821  if (NumSignBits >= (BitWidth - ExBits + 1))
822  return Op0;
823  break;
824  }
828  // If we only want the lowest element and none of extended bits, then we can
829  // return the bitcasted source vector.
830  SDValue Src = Op.getOperand(0);
831  EVT SrcVT = Src.getValueType();
832  EVT DstVT = Op.getValueType();
833  if (IsLE && DemandedElts == 1 &&
834  DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
835  DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
836  return DAG.getBitcast(DstVT, Src);
837  }
838  break;
839  }
840  case ISD::INSERT_VECTOR_ELT: {
841  // If we don't demand the inserted element, return the base vector.
842  SDValue Vec = Op.getOperand(0);
843  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
844  EVT VecVT = Vec.getValueType();
845  if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
846  !DemandedElts[CIdx->getZExtValue()])
847  return Vec;
848  break;
849  }
850  case ISD::INSERT_SUBVECTOR: {
851  SDValue Vec = Op.getOperand(0);
852  SDValue Sub = Op.getOperand(1);
853  uint64_t Idx = Op.getConstantOperandVal(2);
854  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
855  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
856  // If we don't demand the inserted subvector, return the base vector.
857  if (DemandedSubElts == 0)
858  return Vec;
859  // If this simply widens the lowest subvector, see if we can do it earlier.
860  // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating
861  // general nodes like this.
862  if (Idx == 0 && Vec.isUndef()) {
863  if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
864  Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
865  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
866  Op.getOperand(0), NewSub, Op.getOperand(2));
867  }
868  break;
869  }
870  case ISD::VECTOR_SHUFFLE: {
871  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
872 
873  // If all the demanded elts are from one operand and are inline,
874  // then we can use the operand directly.
875  bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
876  for (unsigned i = 0; i != NumElts; ++i) {
877  int M = ShuffleMask[i];
878  if (M < 0 || !DemandedElts[i])
879  continue;
880  AllUndef = false;
881  IdentityLHS &= (M == (int)i);
882  IdentityRHS &= ((M - NumElts) == i);
883  }
884 
885  if (AllUndef)
886  return DAG.getUNDEF(Op.getValueType());
887  if (IdentityLHS)
888  return Op.getOperand(0);
889  if (IdentityRHS)
890  return Op.getOperand(1);
891  break;
892  }
893  default:
894  if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
895  if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
896  Op, DemandedBits, DemandedElts, DAG, Depth))
897  return V;
898  break;
899  }
900  return SDValue();
901 }
902 
904  SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
905  unsigned Depth) const {
906  EVT VT = Op.getValueType();
907 
908  // Pretend we don't know anything about scalable vectors for now.
909  // TODO: We can probably do more work on simplifying the operations for
910  // scalable vectors, but for now we just bail out.
911  if (VT.isScalableVector())
912  return SDValue();
913 
914  APInt DemandedElts = VT.isVector()
916  : APInt(1, 1);
917  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
918  Depth);
919 }
920 
922  SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
923  unsigned Depth) const {
924  APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
925  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
926  Depth);
927 }
928 
929 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
930 // or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
932  const TargetLowering &TLI,
933  const APInt &DemandedBits,
934  const APInt &DemandedElts,
935  unsigned Depth) {
936  assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
937  "SRL or SRA node is required here!");
938  // Is the right shift using an immediate value of 1?
939  ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
940  if (!N1C || !N1C->isOne())
941  return SDValue();
942 
943  // We are looking for an avgfloor
944  // add(ext, ext)
945  // or one of these as a avgceil
946  // add(add(ext, ext), 1)
947  // add(add(ext, 1), ext)
948  // add(ext, add(ext, 1))
949  SDValue Add = Op.getOperand(0);
950  if (Add.getOpcode() != ISD::ADD)
951  return SDValue();
952 
953  SDValue ExtOpA = Add.getOperand(0);
954  SDValue ExtOpB = Add.getOperand(1);
955  auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
956  ConstantSDNode *ConstOp;
957  if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
958  ConstOp->isOne()) {
959  ExtOpA = Op2;
960  ExtOpB = Op3;
961  return true;
962  }
963  if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
964  ConstOp->isOne()) {
965  ExtOpA = Op1;
966  ExtOpB = Op3;
967  return true;
968  }
969  if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
970  ConstOp->isOne()) {
971  ExtOpA = Op1;
972  ExtOpB = Op2;
973  return true;
974  }
975  return false;
976  };
977  bool IsCeil =
978  (ExtOpA.getOpcode() == ISD::ADD &&
979  MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
980  (ExtOpB.getOpcode() == ISD::ADD &&
981  MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
982 
983  // If the shift is signed (sra):
984  // - Needs >= 2 sign bit for both operands.
985  // - Needs >= 2 zero bits.
986  // If the shift is unsigned (srl):
987  // - Needs >= 1 zero bit for both operands.
988  // - Needs 1 demanded bit zero and >= 2 sign bits.
989  unsigned ShiftOpc = Op.getOpcode();
990  bool IsSigned = false;
991  unsigned KnownBits;
992  unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
993  unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
994  unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
995  unsigned NumZeroA =
996  DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
997  unsigned NumZeroB =
998  DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
999  unsigned NumZero = std::min(NumZeroA, NumZeroB);
1000 
1001  switch (ShiftOpc) {
1002  default:
1003  llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1004  case ISD::SRA: {
1005  if (NumZero >= 2 && NumSigned < NumZero) {
1006  IsSigned = false;
1007  KnownBits = NumZero;
1008  break;
1009  }
1010  if (NumSigned >= 1) {
1011  IsSigned = true;
1012  KnownBits = NumSigned;
1013  break;
1014  }
1015  return SDValue();
1016  }
1017  case ISD::SRL: {
1018  if (NumZero >= 1 && NumSigned < NumZero) {
1019  IsSigned = false;
1020  KnownBits = NumZero;
1021  break;
1022  }
1023  if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1024  IsSigned = true;
1025  KnownBits = NumSigned;
1026  break;
1027  }
1028  return SDValue();
1029  }
1030  }
1031 
1032  unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1033  : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1034 
1035  // Find the smallest power-2 type that is legal for this vector size and
1036  // operation, given the original type size and the number of known sign/zero
1037  // bits.
1038  EVT VT = Op.getValueType();
1039  unsigned MinWidth =
1040  std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1041  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
1042  if (VT.isVector())
1043  NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1044  if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
1045  return SDValue();
1046 
1047  SDLoc DL(Op);
1048  SDValue ResultAVG =
1049  DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1050  DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1051  return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1052  ResultAVG);
1053 }
1054 
1055 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1056 /// result of Op are ever used downstream. If we can use this information to
1057 /// simplify Op, create a new simplified DAG node and return true, returning the
1058 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1059 /// return a mask of Known bits for the expression (used to simplify the
1060 /// caller). The Known bits may only be accurate for those bits in the
1061 /// OriginalDemandedBits and OriginalDemandedElts.
1063  SDValue Op, const APInt &OriginalDemandedBits,
1064  const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1065  unsigned Depth, bool AssumeSingleUse) const {
1066  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1067  assert(Op.getScalarValueSizeInBits() == BitWidth &&
1068  "Mask size mismatches value type size!");
1069 
1070  // Don't know anything.
1071  Known = KnownBits(BitWidth);
1072 
1073  // TODO: We can probably do more work on calculating the known bits and
1074  // simplifying the operations for scalable vectors, but for now we just
1075  // bail out.
1076  EVT VT = Op.getValueType();
1077  if (VT.isScalableVector())
1078  return false;
1079 
1080  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1081  unsigned NumElts = OriginalDemandedElts.getBitWidth();
1082  assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) &&
1083  "Unexpected vector size");
1084 
1085  APInt DemandedBits = OriginalDemandedBits;
1086  APInt DemandedElts = OriginalDemandedElts;
1087  SDLoc dl(Op);
1088  auto &DL = TLO.DAG.getDataLayout();
1089 
1090  // Undef operand.
1091  if (Op.isUndef())
1092  return false;
1093 
1094  // We can't simplify target constants.
1095  if (Op.getOpcode() == ISD::TargetConstant)
1096  return false;
1097 
1098  if (Op.getOpcode() == ISD::Constant) {
1099  // We know all of the bits for a constant!
1100  Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1101  return false;
1102  }
1103 
1104  if (Op.getOpcode() == ISD::ConstantFP) {
1105  // We know all of the bits for a floating point constant!
1106  Known = KnownBits::makeConstant(
1107  cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1108  return false;
1109  }
1110 
1111  // Other users may use these bits.
1112  bool HasMultiUse = false;
1113  if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1115  // Limit search depth.
1116  return false;
1117  }
1118  // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1120  DemandedElts = APInt::getAllOnes(NumElts);
1121  HasMultiUse = true;
1122  } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1123  // Not demanding any bits/elts from Op.
1124  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1125  } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1126  // Limit search depth.
1127  return false;
1128  }
1129 
1130  KnownBits Known2;
1131  switch (Op.getOpcode()) {
1132  case ISD::SCALAR_TO_VECTOR: {
1133  if (!DemandedElts[0])
1134  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1135 
1136  KnownBits SrcKnown;
1137  SDValue Src = Op.getOperand(0);
1138  unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1139  APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1140  if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1141  return true;
1142 
1143  // Upper elements are undef, so only get the knownbits if we just demand
1144  // the bottom element.
1145  if (DemandedElts == 1)
1146  Known = SrcKnown.anyextOrTrunc(BitWidth);
1147  break;
1148  }
1149  case ISD::BUILD_VECTOR:
1150  // Collect the known bits that are shared by every demanded element.
1151  // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1152  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1153  return false; // Don't fall through, will infinitely loop.
1154  case ISD::LOAD: {
1155  auto *LD = cast<LoadSDNode>(Op);
1156  if (getTargetConstantFromLoad(LD)) {
1157  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1158  return false; // Don't fall through, will infinitely loop.
1159  }
1160  if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1161  // If this is a ZEXTLoad and we are looking at the loaded value.
1162  EVT MemVT = LD->getMemoryVT();
1163  unsigned MemBits = MemVT.getScalarSizeInBits();
1164  Known.Zero.setBitsFrom(MemBits);
1165  return false; // Don't fall through, will infinitely loop.
1166  }
1167  break;
1168  }
1169  case ISD::INSERT_VECTOR_ELT: {
1170  SDValue Vec = Op.getOperand(0);
1171  SDValue Scl = Op.getOperand(1);
1172  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1173  EVT VecVT = Vec.getValueType();
1174 
1175  // If index isn't constant, assume we need all vector elements AND the
1176  // inserted element.
1177  APInt DemandedVecElts(DemandedElts);
1178  if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1179  unsigned Idx = CIdx->getZExtValue();
1180  DemandedVecElts.clearBit(Idx);
1181 
1182  // Inserted element is not required.
1183  if (!DemandedElts[Idx])
1184  return TLO.CombineTo(Op, Vec);
1185  }
1186 
1187  KnownBits KnownScl;
1188  unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1189  APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1190  if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1191  return true;
1192 
1193  Known = KnownScl.anyextOrTrunc(BitWidth);
1194 
1195  KnownBits KnownVec;
1196  if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1197  Depth + 1))
1198  return true;
1199 
1200  if (!!DemandedVecElts)
1201  Known = KnownBits::commonBits(Known, KnownVec);
1202 
1203  return false;
1204  }
1205  case ISD::INSERT_SUBVECTOR: {
1206  // Demand any elements from the subvector and the remainder from the src its
1207  // inserted into.
1208  SDValue Src = Op.getOperand(0);
1209  SDValue Sub = Op.getOperand(1);
1210  uint64_t Idx = Op.getConstantOperandVal(2);
1211  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1212  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1213  APInt DemandedSrcElts = DemandedElts;
1214  DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1215 
1216  KnownBits KnownSub, KnownSrc;
1217  if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1218  Depth + 1))
1219  return true;
1220  if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1221  Depth + 1))
1222  return true;
1223 
1224  Known.Zero.setAllBits();
1225  Known.One.setAllBits();
1226  if (!!DemandedSubElts)
1227  Known = KnownBits::commonBits(Known, KnownSub);
1228  if (!!DemandedSrcElts)
1229  Known = KnownBits::commonBits(Known, KnownSrc);
1230 
1231  // Attempt to avoid multi-use src if we don't need anything from it.
1232  if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1233  !DemandedSrcElts.isAllOnes()) {
1234  SDValue NewSub = SimplifyMultipleUseDemandedBits(
1235  Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1236  SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1237  Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1238  if (NewSub || NewSrc) {
1239  NewSub = NewSub ? NewSub : Sub;
1240  NewSrc = NewSrc ? NewSrc : Src;
1241  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1242  Op.getOperand(2));
1243  return TLO.CombineTo(Op, NewOp);
1244  }
1245  }
1246  break;
1247  }
1248  case ISD::EXTRACT_SUBVECTOR: {
1249  // Offset the demanded elts by the subvector index.
1250  SDValue Src = Op.getOperand(0);
1251  if (Src.getValueType().isScalableVector())
1252  break;
1253  uint64_t Idx = Op.getConstantOperandVal(1);
1254  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1255  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1256 
1257  if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1258  Depth + 1))
1259  return true;
1260 
1261  // Attempt to avoid multi-use src if we don't need anything from it.
1262  if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1263  SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1264  Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1265  if (DemandedSrc) {
1266  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1267  Op.getOperand(1));
1268  return TLO.CombineTo(Op, NewOp);
1269  }
1270  }
1271  break;
1272  }
1273  case ISD::CONCAT_VECTORS: {
1274  Known.Zero.setAllBits();
1275  Known.One.setAllBits();
1276  EVT SubVT = Op.getOperand(0).getValueType();
1277  unsigned NumSubVecs = Op.getNumOperands();
1278  unsigned NumSubElts = SubVT.getVectorNumElements();
1279  for (unsigned i = 0; i != NumSubVecs; ++i) {
1280  APInt DemandedSubElts =
1281  DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1282  if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1283  Known2, TLO, Depth + 1))
1284  return true;
1285  // Known bits are shared by every demanded subvector element.
1286  if (!!DemandedSubElts)
1287  Known = KnownBits::commonBits(Known, Known2);
1288  }
1289  break;
1290  }
1291  case ISD::VECTOR_SHUFFLE: {
1292  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1293 
1294  // Collect demanded elements from shuffle operands..
1295  APInt DemandedLHS, DemandedRHS;
1296  if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1297  DemandedRHS))
1298  break;
1299 
1300  if (!!DemandedLHS || !!DemandedRHS) {
1301  SDValue Op0 = Op.getOperand(0);
1302  SDValue Op1 = Op.getOperand(1);
1303 
1304  Known.Zero.setAllBits();
1305  Known.One.setAllBits();
1306  if (!!DemandedLHS) {
1307  if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1308  Depth + 1))
1309  return true;
1310  Known = KnownBits::commonBits(Known, Known2);
1311  }
1312  if (!!DemandedRHS) {
1313  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1314  Depth + 1))
1315  return true;
1316  Known = KnownBits::commonBits(Known, Known2);
1317  }
1318 
1319  // Attempt to avoid multi-use ops if we don't need anything from them.
1320  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1321  Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1322  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1323  Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1324  if (DemandedOp0 || DemandedOp1) {
1325  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1326  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1327  SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1328  return TLO.CombineTo(Op, NewOp);
1329  }
1330  }
1331  break;
1332  }
1333  case ISD::AND: {
1334  SDValue Op0 = Op.getOperand(0);
1335  SDValue Op1 = Op.getOperand(1);
1336 
1337  // If the RHS is a constant, check to see if the LHS would be zero without
1338  // using the bits from the RHS. Below, we use knowledge about the RHS to
1339  // simplify the LHS, here we're using information from the LHS to simplify
1340  // the RHS.
1341  if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1342  // Do not increment Depth here; that can cause an infinite loop.
1343  KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1344  // If the LHS already has zeros where RHSC does, this 'and' is dead.
1345  if ((LHSKnown.Zero & DemandedBits) ==
1346  (~RHSC->getAPIntValue() & DemandedBits))
1347  return TLO.CombineTo(Op, Op0);
1348 
1349  // If any of the set bits in the RHS are known zero on the LHS, shrink
1350  // the constant.
1351  if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1352  DemandedElts, TLO))
1353  return true;
1354 
1355  // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1356  // constant, but if this 'and' is only clearing bits that were just set by
1357  // the xor, then this 'and' can be eliminated by shrinking the mask of
1358  // the xor. For example, for a 32-bit X:
1359  // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1360  if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1361  LHSKnown.One == ~RHSC->getAPIntValue()) {
1362  SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1363  return TLO.CombineTo(Op, Xor);
1364  }
1365  }
1366 
1367  // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1368  // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1369  if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR &&
1370  (Op0.getOperand(0).isUndef() ||
1372  Op0->hasOneUse()) {
1373  unsigned NumSubElts =
1375  unsigned SubIdx = Op0.getConstantOperandVal(2);
1376  APInt DemandedSub =
1377  APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1378  KnownBits KnownSubMask =
1379  TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1380  if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1381  SDValue NewAnd =
1382  TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1383  SDValue NewInsert =
1384  TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1385  Op0.getOperand(1), Op0.getOperand(2));
1386  return TLO.CombineTo(Op, NewInsert);
1387  }
1388  }
1389 
1390  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1391  Depth + 1))
1392  return true;
1393  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1394  if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1395  Known2, TLO, Depth + 1))
1396  return true;
1397  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1398 
1399  // If all of the demanded bits are known one on one side, return the other.
1400  // These bits cannot contribute to the result of the 'and'.
1401  if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1402  return TLO.CombineTo(Op, Op0);
1403  if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1404  return TLO.CombineTo(Op, Op1);
1405  // If all of the demanded bits in the inputs are known zeros, return zero.
1406  if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1407  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1408  // If the RHS is a constant, see if we can simplify it.
1409  if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1410  TLO))
1411  return true;
1412  // If the operation can be done in a smaller type, do so.
1413  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1414  return true;
1415 
1416  // Attempt to avoid multi-use ops if we don't need anything from them.
1417  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1418  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1419  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1420  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1421  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1422  if (DemandedOp0 || DemandedOp1) {
1423  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1424  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1425  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1426  return TLO.CombineTo(Op, NewOp);
1427  }
1428  }
1429 
1430  Known &= Known2;
1431  break;
1432  }
1433  case ISD::OR: {
1434  SDValue Op0 = Op.getOperand(0);
1435  SDValue Op1 = Op.getOperand(1);
1436 
1437  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1438  Depth + 1))
1439  return true;
1440  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1441  if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1442  Known2, TLO, Depth + 1))
1443  return true;
1444  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1445 
1446  // If all of the demanded bits are known zero on one side, return the other.
1447  // These bits cannot contribute to the result of the 'or'.
1448  if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1449  return TLO.CombineTo(Op, Op0);
1450  if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1451  return TLO.CombineTo(Op, Op1);
1452  // If the RHS is a constant, see if we can simplify it.
1453  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1454  return true;
1455  // If the operation can be done in a smaller type, do so.
1456  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1457  return true;
1458 
1459  // Attempt to avoid multi-use ops if we don't need anything from them.
1460  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1461  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1462  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1463  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1464  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1465  if (DemandedOp0 || DemandedOp1) {
1466  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1467  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1468  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1469  return TLO.CombineTo(Op, NewOp);
1470  }
1471  }
1472 
1473  // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1474  // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1475  if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1476  Op0->hasOneUse() && Op1->hasOneUse()) {
1477  // Attempt to match all commutations - m_c_Or would've been useful!
1478  for (int I = 0; I != 2; ++I) {
1479  SDValue X = Op.getOperand(I).getOperand(0);
1480  SDValue C1 = Op.getOperand(I).getOperand(1);
1481  SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1482  SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1483  if (Alt.getOpcode() == ISD::OR) {
1484  for (int J = 0; J != 2; ++J) {
1485  if (X == Alt.getOperand(J)) {
1486  SDValue Y = Alt.getOperand(1 - J);
1487  if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1488  {C1, C2})) {
1489  SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1490  SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1491  return TLO.CombineTo(
1492  Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1493  }
1494  }
1495  }
1496  }
1497  }
1498  }
1499 
1500  Known |= Known2;
1501  break;
1502  }
1503  case ISD::XOR: {
1504  SDValue Op0 = Op.getOperand(0);
1505  SDValue Op1 = Op.getOperand(1);
1506 
1507  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1508  Depth + 1))
1509  return true;
1510  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1511  if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1512  Depth + 1))
1513  return true;
1514  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1515 
1516  // If all of the demanded bits are known zero on one side, return the other.
1517  // These bits cannot contribute to the result of the 'xor'.
1518  if (DemandedBits.isSubsetOf(Known.Zero))
1519  return TLO.CombineTo(Op, Op0);
1520  if (DemandedBits.isSubsetOf(Known2.Zero))
1521  return TLO.CombineTo(Op, Op1);
1522  // If the operation can be done in a smaller type, do so.
1523  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1524  return true;
1525 
1526  // If all of the unknown bits are known to be zero on one side or the other
1527  // turn this into an *inclusive* or.
1528  // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1529  if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1530  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1531 
1532  ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1533  if (C) {
1534  // If one side is a constant, and all of the set bits in the constant are
1535  // also known set on the other side, turn this into an AND, as we know
1536  // the bits will be cleared.
1537  // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1538  // NB: it is okay if more bits are known than are requested
1539  if (C->getAPIntValue() == Known2.One) {
1540  SDValue ANDC =
1541  TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1542  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1543  }
1544 
1545  // If the RHS is a constant, see if we can change it. Don't alter a -1
1546  // constant because that's a 'not' op, and that is better for combining
1547  // and codegen.
1548  if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1549  // We're flipping all demanded bits. Flip the undemanded bits too.
1550  SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1551  return TLO.CombineTo(Op, New);
1552  }
1553 
1554  unsigned Op0Opcode = Op0.getOpcode();
1555  if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1556  if (ConstantSDNode *ShiftC =
1557  isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1558  // Don't crash on an oversized shift. We can not guarantee that a
1559  // bogus shift has been simplified to undef.
1560  if (ShiftC->getAPIntValue().ult(BitWidth)) {
1561  uint64_t ShiftAmt = ShiftC->getZExtValue();
1563  Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1564  : Ones.lshr(ShiftAmt);
1565  const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1566  if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1567  TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1568  // If the xor constant is a demanded mask, do a 'not' before the
1569  // shift:
1570  // xor (X << ShiftC), XorC --> (not X) << ShiftC
1571  // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1572  SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1573  return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1574  Op0.getOperand(1)));
1575  }
1576  }
1577  }
1578  }
1579  }
1580 
1581  // If we can't turn this into a 'not', try to shrink the constant.
1582  if (!C || !C->isAllOnes())
1583  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1584  return true;
1585 
1586  // Attempt to avoid multi-use ops if we don't need anything from them.
1587  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1588  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1589  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1590  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1591  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1592  if (DemandedOp0 || DemandedOp1) {
1593  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1594  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1595  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1596  return TLO.CombineTo(Op, NewOp);
1597  }
1598  }
1599 
1600  Known ^= Known2;
1601  break;
1602  }
1603  case ISD::SELECT:
1604  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1605  Depth + 1))
1606  return true;
1607  if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1608  Depth + 1))
1609  return true;
1610  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1611  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1612 
1613  // If the operands are constants, see if we can simplify them.
1614  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1615  return true;
1616 
1617  // Only known if known in both the LHS and RHS.
1618  Known = KnownBits::commonBits(Known, Known2);
1619  break;
1620  case ISD::VSELECT:
1621  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1622  Known, TLO, Depth + 1))
1623  return true;
1624  if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1625  Known2, TLO, Depth + 1))
1626  return true;
1627  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1628  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1629 
1630  // Only known if known in both the LHS and RHS.
1631  Known = KnownBits::commonBits(Known, Known2);
1632  break;
1633  case ISD::SELECT_CC:
1634  if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1635  Depth + 1))
1636  return true;
1637  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1638  Depth + 1))
1639  return true;
1640  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1641  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1642 
1643  // If the operands are constants, see if we can simplify them.
1644  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1645  return true;
1646 
1647  // Only known if known in both the LHS and RHS.
1648  Known = KnownBits::commonBits(Known, Known2);
1649  break;
1650  case ISD::SETCC: {
1651  SDValue Op0 = Op.getOperand(0);
1652  SDValue Op1 = Op.getOperand(1);
1653  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1654  // If (1) we only need the sign-bit, (2) the setcc operands are the same
1655  // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1656  // -1, we may be able to bypass the setcc.
1657  if (DemandedBits.isSignMask() &&
1660  BooleanContent::ZeroOrNegativeOneBooleanContent) {
1661  // If we're testing X < 0, then this compare isn't needed - just use X!
1662  // FIXME: We're limiting to integer types here, but this should also work
1663  // if we don't care about FP signed-zero. The use of SETLT with FP means
1664  // that we don't care about NaNs.
1665  if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1667  return TLO.CombineTo(Op, Op0);
1668 
1669  // TODO: Should we check for other forms of sign-bit comparisons?
1670  // Examples: X <= -1, X >= 0
1671  }
1672  if (getBooleanContents(Op0.getValueType()) ==
1674  BitWidth > 1)
1675  Known.Zero.setBitsFrom(1);
1676  break;
1677  }
1678  case ISD::SHL: {
1679  SDValue Op0 = Op.getOperand(0);
1680  SDValue Op1 = Op.getOperand(1);
1681  EVT ShiftVT = Op1.getValueType();
1682 
1683  if (const APInt *SA =
1684  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1685  unsigned ShAmt = SA->getZExtValue();
1686  if (ShAmt == 0)
1687  return TLO.CombineTo(Op, Op0);
1688 
1689  // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1690  // single shift. We can do this if the bottom bits (which are shifted
1691  // out) are never demanded.
1692  // TODO - support non-uniform vector amounts.
1693  if (Op0.getOpcode() == ISD::SRL) {
1694  if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1695  if (const APInt *SA2 =
1696  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1697  unsigned C1 = SA2->getZExtValue();
1698  unsigned Opc = ISD::SHL;
1699  int Diff = ShAmt - C1;
1700  if (Diff < 0) {
1701  Diff = -Diff;
1702  Opc = ISD::SRL;
1703  }
1704  SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1705  return TLO.CombineTo(
1706  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1707  }
1708  }
1709  }
1710 
1711  // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1712  // are not demanded. This will likely allow the anyext to be folded away.
1713  // TODO - support non-uniform vector amounts.
1714  if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1715  SDValue InnerOp = Op0.getOperand(0);
1716  EVT InnerVT = InnerOp.getValueType();
1717  unsigned InnerBits = InnerVT.getScalarSizeInBits();
1718  if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1719  isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1720  EVT ShTy = getShiftAmountTy(InnerVT, DL);
1721  if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1722  ShTy = InnerVT;
1723  SDValue NarrowShl =
1724  TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1725  TLO.DAG.getConstant(ShAmt, dl, ShTy));
1726  return TLO.CombineTo(
1727  Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1728  }
1729 
1730  // Repeat the SHL optimization above in cases where an extension
1731  // intervenes: (shl (anyext (shr x, c1)), c2) to
1732  // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1733  // aren't demanded (as above) and that the shifted upper c1 bits of
1734  // x aren't demanded.
1735  // TODO - support non-uniform vector amounts.
1736  if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1737  InnerOp.hasOneUse()) {
1738  if (const APInt *SA2 =
1739  TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1740  unsigned InnerShAmt = SA2->getZExtValue();
1741  if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1742  DemandedBits.getActiveBits() <=
1743  (InnerBits - InnerShAmt + ShAmt) &&
1744  DemandedBits.countTrailingZeros() >= ShAmt) {
1745  SDValue NewSA =
1746  TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1747  SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1748  InnerOp.getOperand(0));
1749  return TLO.CombineTo(
1750  Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1751  }
1752  }
1753  }
1754  }
1755 
1756  APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1757  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1758  Depth + 1))
1759  return true;
1760  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1761  Known.Zero <<= ShAmt;
1762  Known.One <<= ShAmt;
1763  // low bits known zero.
1764  Known.Zero.setLowBits(ShAmt);
1765 
1766  // Attempt to avoid multi-use ops if we don't need anything from them.
1767  if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1768  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1769  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1770  if (DemandedOp0) {
1771  SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1772  return TLO.CombineTo(Op, NewOp);
1773  }
1774  }
1775 
1776  // Try shrinking the operation as long as the shift amount will still be
1777  // in range.
1778  if ((ShAmt < DemandedBits.getActiveBits()) &&
1779  ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1780  return true;
1781  } else {
1782  // This is a variable shift, so we can't shift the demand mask by a known
1783  // amount. But if we are not demanding high bits, then we are not
1784  // demanding those bits from the pre-shifted operand either.
1785  if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
1786  APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1787  if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1788  Depth + 1)) {
1789  SDNodeFlags Flags = Op.getNode()->getFlags();
1790  if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1791  // Disable the nsw and nuw flags. We can no longer guarantee that we
1792  // won't wrap after simplification.
1793  Flags.setNoSignedWrap(false);
1794  Flags.setNoUnsignedWrap(false);
1795  Op->setFlags(Flags);
1796  }
1797  return true;
1798  }
1799  Known.resetAll();
1800  }
1801  }
1802 
1803  // If we are only demanding sign bits then we can use the shift source
1804  // directly.
1805  if (const APInt *MaxSA =
1806  TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1807  unsigned ShAmt = MaxSA->getZExtValue();
1808  unsigned NumSignBits =
1809  TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1810  unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1811  if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1812  return TLO.CombineTo(Op, Op0);
1813  }
1814  break;
1815  }
1816  case ISD::SRL: {
1817  SDValue Op0 = Op.getOperand(0);
1818  SDValue Op1 = Op.getOperand(1);
1819  EVT ShiftVT = Op1.getValueType();
1820 
1821  // Try to match AVG patterns.
1822  if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1823  DemandedElts, Depth + 1))
1824  return TLO.CombineTo(Op, AVG);
1825 
1826  if (const APInt *SA =
1827  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1828  unsigned ShAmt = SA->getZExtValue();
1829  if (ShAmt == 0)
1830  return TLO.CombineTo(Op, Op0);
1831 
1832  // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1833  // single shift. We can do this if the top bits (which are shifted out)
1834  // are never demanded.
1835  // TODO - support non-uniform vector amounts.
1836  if (Op0.getOpcode() == ISD::SHL) {
1837  if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1838  if (const APInt *SA2 =
1839  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1840  unsigned C1 = SA2->getZExtValue();
1841  unsigned Opc = ISD::SRL;
1842  int Diff = ShAmt - C1;
1843  if (Diff < 0) {
1844  Diff = -Diff;
1845  Opc = ISD::SHL;
1846  }
1847  SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1848  return TLO.CombineTo(
1849  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1850  }
1851  }
1852  }
1853 
1854  APInt InDemandedMask = (DemandedBits << ShAmt);
1855 
1856  // If the shift is exact, then it does demand the low bits (and knows that
1857  // they are zero).
1858  if (Op->getFlags().hasExact())
1859  InDemandedMask.setLowBits(ShAmt);
1860 
1861  // Compute the new bits that are at the top now.
1862  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1863  Depth + 1))
1864  return true;
1865  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1866  Known.Zero.lshrInPlace(ShAmt);
1867  Known.One.lshrInPlace(ShAmt);
1868  // High bits known zero.
1869  Known.Zero.setHighBits(ShAmt);
1870 
1871  // Attempt to avoid multi-use ops if we don't need anything from them.
1872  if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1873  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1874  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1875  if (DemandedOp0) {
1876  SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1877  return TLO.CombineTo(Op, NewOp);
1878  }
1879  }
1880  }
1881  break;
1882  }
1883  case ISD::SRA: {
1884  SDValue Op0 = Op.getOperand(0);
1885  SDValue Op1 = Op.getOperand(1);
1886  EVT ShiftVT = Op1.getValueType();
1887 
1888  // If we only want bits that already match the signbit then we don't need
1889  // to shift.
1890  unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1891  if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1892  NumHiDemandedBits)
1893  return TLO.CombineTo(Op, Op0);
1894 
1895  // If this is an arithmetic shift right and only the low-bit is set, we can
1896  // always convert this into a logical shr, even if the shift amount is
1897  // variable. The low bit of the shift cannot be an input sign bit unless
1898  // the shift amount is >= the size of the datatype, which is undefined.
1899  if (DemandedBits.isOne())
1900  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1901 
1902  // Try to match AVG patterns.
1903  if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1904  DemandedElts, Depth + 1))
1905  return TLO.CombineTo(Op, AVG);
1906 
1907  if (const APInt *SA =
1908  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1909  unsigned ShAmt = SA->getZExtValue();
1910  if (ShAmt == 0)
1911  return TLO.CombineTo(Op, Op0);
1912 
1913  APInt InDemandedMask = (DemandedBits << ShAmt);
1914 
1915  // If the shift is exact, then it does demand the low bits (and knows that
1916  // they are zero).
1917  if (Op->getFlags().hasExact())
1918  InDemandedMask.setLowBits(ShAmt);
1919 
1920  // If any of the demanded bits are produced by the sign extension, we also
1921  // demand the input sign bit.
1922  if (DemandedBits.countLeadingZeros() < ShAmt)
1923  InDemandedMask.setSignBit();
1924 
1925  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1926  Depth + 1))
1927  return true;
1928  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1929  Known.Zero.lshrInPlace(ShAmt);
1930  Known.One.lshrInPlace(ShAmt);
1931 
1932  // If the input sign bit is known to be zero, or if none of the top bits
1933  // are demanded, turn this into an unsigned shift right.
1934  if (Known.Zero[BitWidth - ShAmt - 1] ||
1935  DemandedBits.countLeadingZeros() >= ShAmt) {
1936  SDNodeFlags Flags;
1937  Flags.setExact(Op->getFlags().hasExact());
1938  return TLO.CombineTo(
1939  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1940  }
1941 
1942  int Log2 = DemandedBits.exactLogBase2();
1943  if (Log2 >= 0) {
1944  // The bit must come from the sign.
1945  SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1946  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1947  }
1948 
1949  if (Known.One[BitWidth - ShAmt - 1])
1950  // New bits are known one.
1951  Known.One.setHighBits(ShAmt);
1952 
1953  // Attempt to avoid multi-use ops if we don't need anything from them.
1954  if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1955  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1956  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1957  if (DemandedOp0) {
1958  SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1959  return TLO.CombineTo(Op, NewOp);
1960  }
1961  }
1962  }
1963  break;
1964  }
1965  case ISD::FSHL:
1966  case ISD::FSHR: {
1967  SDValue Op0 = Op.getOperand(0);
1968  SDValue Op1 = Op.getOperand(1);
1969  SDValue Op2 = Op.getOperand(2);
1970  bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1971 
1972  if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1973  unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1974 
1975  // For fshl, 0-shift returns the 1st arg.
1976  // For fshr, 0-shift returns the 2nd arg.
1977  if (Amt == 0) {
1978  if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1979  Known, TLO, Depth + 1))
1980  return true;
1981  break;
1982  }
1983 
1984  // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1985  // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1986  APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1987  APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1988  if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1989  Depth + 1))
1990  return true;
1991  if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1992  Depth + 1))
1993  return true;
1994 
1995  Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1996  Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1997  Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1998  Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1999  Known.One |= Known2.One;
2000  Known.Zero |= Known2.Zero;
2001 
2002  // Attempt to avoid multi-use ops if we don't need anything from them.
2003  if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2004  !DemandedElts.isAllOnes()) {
2005  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2006  Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2007  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2008  Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2009  if (DemandedOp0 || DemandedOp1) {
2010  DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2011  DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2012  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2013  DemandedOp1, Op2);
2014  return TLO.CombineTo(Op, NewOp);
2015  }
2016  }
2017  }
2018 
2019  // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2020  if (isPowerOf2_32(BitWidth)) {
2021  APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2022  if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2023  Known2, TLO, Depth + 1))
2024  return true;
2025  }
2026  break;
2027  }
2028  case ISD::ROTL:
2029  case ISD::ROTR: {
2030  SDValue Op0 = Op.getOperand(0);
2031  SDValue Op1 = Op.getOperand(1);
2032  bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2033 
2034  // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2035  if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2036  return TLO.CombineTo(Op, Op0);
2037 
2038  if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2039  unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2040  unsigned RevAmt = BitWidth - Amt;
2041 
2042  // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2043  // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2044  APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2045  if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2046  Depth + 1))
2047  return true;
2048 
2049  // rot*(x, 0) --> x
2050  if (Amt == 0)
2051  return TLO.CombineTo(Op, Op0);
2052 
2053  // See if we don't demand either half of the rotated bits.
2054  if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2055  DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
2056  Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2057  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2058  }
2059  if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2060  DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
2061  Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2062  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2063  }
2064  }
2065 
2066  // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2067  if (isPowerOf2_32(BitWidth)) {
2068  APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2069  if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2070  Depth + 1))
2071  return true;
2072  }
2073  break;
2074  }
2075  case ISD::UMIN: {
2076  // Check if one arg is always less than (or equal) to the other arg.
2077  SDValue Op0 = Op.getOperand(0);
2078  SDValue Op1 = Op.getOperand(1);
2079  KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2080  KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2081  Known = KnownBits::umin(Known0, Known1);
2082  if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2083  return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1);
2084  if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2085  return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1);
2086  break;
2087  }
2088  case ISD::UMAX: {
2089  // Check if one arg is always greater than (or equal) to the other arg.
2090  SDValue Op0 = Op.getOperand(0);
2091  SDValue Op1 = Op.getOperand(1);
2092  KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2093  KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2094  Known = KnownBits::umax(Known0, Known1);
2095  if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2096  return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1);
2097  if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2098  return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1);
2099  break;
2100  }
2101  case ISD::BITREVERSE: {
2102  SDValue Src = Op.getOperand(0);
2103  APInt DemandedSrcBits = DemandedBits.reverseBits();
2104  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2105  Depth + 1))
2106  return true;
2107  Known.One = Known2.One.reverseBits();
2108  Known.Zero = Known2.Zero.reverseBits();
2109  break;
2110  }
2111  case ISD::BSWAP: {
2112  SDValue Src = Op.getOperand(0);
2113 
2114  // If the only bits demanded come from one byte of the bswap result,
2115  // just shift the input byte into position to eliminate the bswap.
2116  unsigned NLZ = DemandedBits.countLeadingZeros();
2117  unsigned NTZ = DemandedBits.countTrailingZeros();
2118 
2119  // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2120  // we need all the bits down to bit 8. Likewise, round NLZ. If we
2121  // have 14 leading zeros, round to 8.
2122  NLZ = alignDown(NLZ, 8);
2123  NTZ = alignDown(NTZ, 8);
2124  // If we need exactly one byte, we can do this transformation.
2125  if (BitWidth - NLZ - NTZ == 8) {
2126  // Replace this with either a left or right shift to get the byte into
2127  // the right place.
2128  unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2129  if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2130  EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2131  unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2132  SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2133  SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2134  return TLO.CombineTo(Op, NewOp);
2135  }
2136  }
2137 
2138  APInt DemandedSrcBits = DemandedBits.byteSwap();
2139  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2140  Depth + 1))
2141  return true;
2142  Known.One = Known2.One.byteSwap();
2143  Known.Zero = Known2.Zero.byteSwap();
2144  break;
2145  }
2146  case ISD::CTPOP: {
2147  // If only 1 bit is demanded, replace with PARITY as long as we're before
2148  // op legalization.
2149  // FIXME: Limit to scalars for now.
2150  if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2151  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2152  Op.getOperand(0)));
2153 
2154  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2155  break;
2156  }
2157  case ISD::SIGN_EXTEND_INREG: {
2158  SDValue Op0 = Op.getOperand(0);
2159  EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2160  unsigned ExVTBits = ExVT.getScalarSizeInBits();
2161 
2162  // If we only care about the highest bit, don't bother shifting right.
2163  if (DemandedBits.isSignMask()) {
2164  unsigned MinSignedBits =
2165  TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2166  bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2167  // However if the input is already sign extended we expect the sign
2168  // extension to be dropped altogether later and do not simplify.
2169  if (!AlreadySignExtended) {
2170  // Compute the correct shift amount type, which must be getShiftAmountTy
2171  // for scalar types after legalization.
2172  SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2173  getShiftAmountTy(VT, DL));
2174  return TLO.CombineTo(Op,
2175  TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2176  }
2177  }
2178 
2179  // If none of the extended bits are demanded, eliminate the sextinreg.
2180  if (DemandedBits.getActiveBits() <= ExVTBits)
2181  return TLO.CombineTo(Op, Op0);
2182 
2183  APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2184 
2185  // Since the sign extended bits are demanded, we know that the sign
2186  // bit is demanded.
2187  InputDemandedBits.setBit(ExVTBits - 1);
2188 
2189  if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2190  Depth + 1))
2191  return true;
2192  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2193 
2194  // If the sign bit of the input is known set or clear, then we know the
2195  // top bits of the result.
2196 
2197  // If the input sign bit is known zero, convert this into a zero extension.
2198  if (Known.Zero[ExVTBits - 1])
2199  return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2200 
2201  APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2202  if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2203  Known.One.setBitsFrom(ExVTBits);
2204  Known.Zero &= Mask;
2205  } else { // Input sign bit unknown
2206  Known.Zero &= Mask;
2207  Known.One &= Mask;
2208  }
2209  break;
2210  }
2211  case ISD::BUILD_PAIR: {
2212  EVT HalfVT = Op.getOperand(0).getValueType();
2213  unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2214 
2215  APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2216  APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2217 
2218  KnownBits KnownLo, KnownHi;
2219 
2220  if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2221  return true;
2222 
2223  if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2224  return true;
2225 
2226  Known = KnownHi.concat(KnownLo);
2227  break;
2228  }
2229  case ISD::ZERO_EXTEND:
2231  SDValue Src = Op.getOperand(0);
2232  EVT SrcVT = Src.getValueType();
2233  unsigned InBits = SrcVT.getScalarSizeInBits();
2234  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2235  bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2236 
2237  // If none of the top bits are demanded, convert this into an any_extend.
2238  if (DemandedBits.getActiveBits() <= InBits) {
2239  // If we only need the non-extended bits of the bottom element
2240  // then we can just bitcast to the result.
2241  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2242  VT.getSizeInBits() == SrcVT.getSizeInBits())
2243  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2244 
2245  unsigned Opc =
2247  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2248  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2249  }
2250 
2251  APInt InDemandedBits = DemandedBits.trunc(InBits);
2252  APInt InDemandedElts = DemandedElts.zext(InElts);
2253  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2254  Depth + 1))
2255  return true;
2256  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2257  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2258  Known = Known.zext(BitWidth);
2259 
2260  // Attempt to avoid multi-use ops if we don't need anything from them.
2261  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2262  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2263  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2264  break;
2265  }
2266  case ISD::SIGN_EXTEND:
2268  SDValue Src = Op.getOperand(0);
2269  EVT SrcVT = Src.getValueType();
2270  unsigned InBits = SrcVT.getScalarSizeInBits();
2271  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2272  bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2273 
2274  // If none of the top bits are demanded, convert this into an any_extend.
2275  if (DemandedBits.getActiveBits() <= InBits) {
2276  // If we only need the non-extended bits of the bottom element
2277  // then we can just bitcast to the result.
2278  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2279  VT.getSizeInBits() == SrcVT.getSizeInBits())
2280  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2281 
2282  unsigned Opc =
2284  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2285  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2286  }
2287 
2288  APInt InDemandedBits = DemandedBits.trunc(InBits);
2289  APInt InDemandedElts = DemandedElts.zext(InElts);
2290 
2291  // Since some of the sign extended bits are demanded, we know that the sign
2292  // bit is demanded.
2293  InDemandedBits.setBit(InBits - 1);
2294 
2295  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2296  Depth + 1))
2297  return true;
2298  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2299  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2300 
2301  // If the sign bit is known one, the top bits match.
2302  Known = Known.sext(BitWidth);
2303 
2304  // If the sign bit is known zero, convert this to a zero extend.
2305  if (Known.isNonNegative()) {
2306  unsigned Opc =
2308  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2309  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2310  }
2311 
2312  // Attempt to avoid multi-use ops if we don't need anything from them.
2313  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2314  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2315  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2316  break;
2317  }
2318  case ISD::ANY_EXTEND:
2320  SDValue Src = Op.getOperand(0);
2321  EVT SrcVT = Src.getValueType();
2322  unsigned InBits = SrcVT.getScalarSizeInBits();
2323  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2324  bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2325 
2326  // If we only need the bottom element then we can just bitcast.
2327  // TODO: Handle ANY_EXTEND?
2328  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2329  VT.getSizeInBits() == SrcVT.getSizeInBits())
2330  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2331 
2332  APInt InDemandedBits = DemandedBits.trunc(InBits);
2333  APInt InDemandedElts = DemandedElts.zext(InElts);
2334  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2335  Depth + 1))
2336  return true;
2337  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2338  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2339  Known = Known.anyext(BitWidth);
2340 
2341  // Attempt to avoid multi-use ops if we don't need anything from them.
2342  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2343  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2344  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2345  break;
2346  }
2347  case ISD::TRUNCATE: {
2348  SDValue Src = Op.getOperand(0);
2349 
2350  // Simplify the input, using demanded bit information, and compute the known
2351  // zero/one bits live out.
2352  unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2353  APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2354  if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2355  Depth + 1))
2356  return true;
2357  Known = Known.trunc(BitWidth);
2358 
2359  // Attempt to avoid multi-use ops if we don't need anything from them.
2360  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2361  Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2362  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2363 
2364  // If the input is only used by this truncate, see if we can shrink it based
2365  // on the known demanded bits.
2366  switch (Src.getOpcode()) {
2367  default:
2368  break;
2369  case ISD::SRL:
2370  // Shrink SRL by a constant if none of the high bits shifted in are
2371  // demanded.
2372  if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2373  // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2374  // undesirable.
2375  break;
2376 
2377  if (Src.getNode()->hasOneUse()) {
2378  const APInt *ShAmtC =
2379  TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2380  if (!ShAmtC || ShAmtC->uge(BitWidth))
2381  break;
2382  uint64_t ShVal = ShAmtC->getZExtValue();
2383 
2384  APInt HighBits =
2385  APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2386  HighBits.lshrInPlace(ShVal);
2387  HighBits = HighBits.trunc(BitWidth);
2388 
2389  if (!(HighBits & DemandedBits)) {
2390  // None of the shifted in bits are needed. Add a truncate of the
2391  // shift input, then shift it.
2392  SDValue NewShAmt = TLO.DAG.getConstant(
2393  ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2394  SDValue NewTrunc =
2395  TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2396  return TLO.CombineTo(
2397  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2398  }
2399  }
2400  break;
2401  }
2402 
2403  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2404  break;
2405  }
2406  case ISD::AssertZext: {
2407  // AssertZext demands all of the high bits, plus any of the low bits
2408  // demanded by its users.
2409  EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2411  if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2412  TLO, Depth + 1))
2413  return true;
2414  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2415 
2416  Known.Zero |= ~InMask;
2417  Known.One &= (~Known.Zero);
2418  break;
2419  }
2420  case ISD::EXTRACT_VECTOR_ELT: {
2421  SDValue Src = Op.getOperand(0);
2422  SDValue Idx = Op.getOperand(1);
2423  ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2424  unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2425 
2426  if (SrcEltCnt.isScalable())
2427  return false;
2428 
2429  // Demand the bits from every vector element without a constant index.
2430  unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2431  APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2432  if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2433  if (CIdx->getAPIntValue().ult(NumSrcElts))
2434  DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2435 
2436  // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2437  // anything about the extended bits.
2438  APInt DemandedSrcBits = DemandedBits;
2439  if (BitWidth > EltBitWidth)
2440  DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2441 
2442  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2443  Depth + 1))
2444  return true;
2445 
2446  // Attempt to avoid multi-use ops if we don't need anything from them.
2447  if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2448  if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2449  Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2450  SDValue NewOp =
2451  TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2452  return TLO.CombineTo(Op, NewOp);
2453  }
2454  }
2455 
2456  Known = Known2;
2457  if (BitWidth > EltBitWidth)
2458  Known = Known.anyext(BitWidth);
2459  break;
2460  }
2461  case ISD::BITCAST: {
2462  SDValue Src = Op.getOperand(0);
2463  EVT SrcVT = Src.getValueType();
2464  unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2465 
2466  // If this is an FP->Int bitcast and if the sign bit is the only
2467  // thing demanded, turn this into a FGETSIGN.
2468  if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2469  DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2470  SrcVT.isFloatingPoint()) {
2471  bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2473  if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2474  SrcVT != MVT::f128) {
2475  // Cannot eliminate/lower SHL for f128 yet.
2476  EVT Ty = OpVTLegal ? VT : MVT::i32;
2477  // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2478  // place. We expect the SHL to be eliminated by other optimizations.
2479  SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2480  unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2481  if (!OpVTLegal && OpVTSizeInBits > 32)
2482  Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2483  unsigned ShVal = Op.getValueSizeInBits() - 1;
2484  SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2485  return TLO.CombineTo(Op,
2486  TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2487  }
2488  }
2489 
2490  // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2491  // Demand the elt/bit if any of the original elts/bits are demanded.
2492  if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2493  unsigned Scale = BitWidth / NumSrcEltBits;
2494  unsigned NumSrcElts = SrcVT.getVectorNumElements();
2495  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2496  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2497  for (unsigned i = 0; i != Scale; ++i) {
2498  unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2499  unsigned BitOffset = EltOffset * NumSrcEltBits;
2500  APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2501  if (!Sub.isZero()) {
2502  DemandedSrcBits |= Sub;
2503  for (unsigned j = 0; j != NumElts; ++j)
2504  if (DemandedElts[j])
2505  DemandedSrcElts.setBit((j * Scale) + i);
2506  }
2507  }
2508 
2509  APInt KnownSrcUndef, KnownSrcZero;
2510  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2511  KnownSrcZero, TLO, Depth + 1))
2512  return true;
2513 
2514  KnownBits KnownSrcBits;
2515  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2516  KnownSrcBits, TLO, Depth + 1))
2517  return true;
2518  } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2519  // TODO - bigendian once we have test coverage.
2520  unsigned Scale = NumSrcEltBits / BitWidth;
2521  unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2522  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2523  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2524  for (unsigned i = 0; i != NumElts; ++i)
2525  if (DemandedElts[i]) {
2526  unsigned Offset = (i % Scale) * BitWidth;
2527  DemandedSrcBits.insertBits(DemandedBits, Offset);
2528  DemandedSrcElts.setBit(i / Scale);
2529  }
2530 
2531  if (SrcVT.isVector()) {
2532  APInt KnownSrcUndef, KnownSrcZero;
2533  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2534  KnownSrcZero, TLO, Depth + 1))
2535  return true;
2536  }
2537 
2538  KnownBits KnownSrcBits;
2539  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2540  KnownSrcBits, TLO, Depth + 1))
2541  return true;
2542  }
2543 
2544  // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2545  // recursive call where Known may be useful to the caller.
2546  if (Depth > 0) {
2547  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2548  return false;
2549  }
2550  break;
2551  }
2552  case ISD::MUL:
2553  if (DemandedBits.isPowerOf2()) {
2554  // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2555  // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2556  // odd (has LSB set), then the left-shifted low bit of X is the answer.
2557  unsigned CTZ = DemandedBits.countTrailingZeros();
2558  ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2559  if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
2560  EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2561  SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2562  SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2563  return TLO.CombineTo(Op, Shl);
2564  }
2565  }
2566  // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2567  // X * X is odd iff X is odd.
2568  // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2569  if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2570  SDValue One = TLO.DAG.getConstant(1, dl, VT);
2571  SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2572  return TLO.CombineTo(Op, And1);
2573  }
2574  [[fallthrough]];
2575  case ISD::ADD:
2576  case ISD::SUB: {
2577  // Add, Sub, and Mul don't demand any bits in positions beyond that
2578  // of the highest bit demanded of them.
2579  SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2580  SDNodeFlags Flags = Op.getNode()->getFlags();
2581  unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2582  APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2583  if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2584  Depth + 1) ||
2585  SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2586  Depth + 1) ||
2587  // See if the operation should be performed at a smaller bit width.
2588  ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2589  if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2590  // Disable the nsw and nuw flags. We can no longer guarantee that we
2591  // won't wrap after simplification.
2592  Flags.setNoSignedWrap(false);
2593  Flags.setNoUnsignedWrap(false);
2594  Op->setFlags(Flags);
2595  }
2596  return true;
2597  }
2598 
2599  // Attempt to avoid multi-use ops if we don't need anything from them.
2600  if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2601  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2602  Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2603  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2604  Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2605  if (DemandedOp0 || DemandedOp1) {
2606  Flags.setNoSignedWrap(false);
2607  Flags.setNoUnsignedWrap(false);
2608  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2609  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2610  SDValue NewOp =
2611  TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2612  return TLO.CombineTo(Op, NewOp);
2613  }
2614  }
2615 
2616  // If we have a constant operand, we may be able to turn it into -1 if we
2617  // do not demand the high bits. This can make the constant smaller to
2618  // encode, allow more general folding, or match specialized instruction
2619  // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2620  // is probably not useful (and could be detrimental).
2622  APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2623  if (C && !C->isAllOnes() && !C->isOne() &&
2624  (C->getAPIntValue() | HighMask).isAllOnes()) {
2625  SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2626  // Disable the nsw and nuw flags. We can no longer guarantee that we
2627  // won't wrap after simplification.
2628  Flags.setNoSignedWrap(false);
2629  Flags.setNoUnsignedWrap(false);
2630  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2631  return TLO.CombineTo(Op, NewOp);
2632  }
2633 
2634  // Match a multiply with a disguised negated-power-of-2 and convert to a
2635  // an equivalent shift-left amount.
2636  // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2637  auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2638  if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2639  return 0;
2640 
2641  // Don't touch opaque constants. Also, ignore zero and power-of-2
2642  // multiplies. Those will get folded later.
2644  if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2645  !MulC->getAPIntValue().isPowerOf2()) {
2646  APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2647  if (UnmaskedC.isNegatedPowerOf2())
2648  return (-UnmaskedC).logBase2();
2649  }
2650  return 0;
2651  };
2652 
2653  auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2654  EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2655  SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2656  SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2657  SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2658  return TLO.CombineTo(Op, Res);
2659  };
2660 
2662  if (Op.getOpcode() == ISD::ADD) {
2663  // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2664  if (unsigned ShAmt = getShiftLeftAmt(Op0))
2665  return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2666  // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2667  if (unsigned ShAmt = getShiftLeftAmt(Op1))
2668  return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2669  }
2670  if (Op.getOpcode() == ISD::SUB) {
2671  // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2672  if (unsigned ShAmt = getShiftLeftAmt(Op1))
2673  return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2674  }
2675  }
2676 
2677  [[fallthrough]];
2678  }
2679  default:
2680  // We also ask the target about intrinsics (which could be specific to it).
2681  if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2682  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2683  if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2684  Known, TLO, Depth))
2685  return true;
2686  break;
2687  }
2688 
2689  // Just use computeKnownBits to compute output bits.
2690  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2691  break;
2692  }
2693 
2694  // If we know the value of all of the demanded bits, return this as a
2695  // constant.
2696  if (!isTargetCanonicalConstantNode(Op) &&
2697  DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2698  // Avoid folding to a constant if any OpaqueConstant is involved.
2699  const SDNode *N = Op.getNode();
2700  for (SDNode *Op :
2702  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2703  if (C->isOpaque())
2704  return false;
2705  }
2706  if (VT.isInteger())
2707  return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2708  if (VT.isFloatingPoint())
2709  return TLO.CombineTo(
2710  Op,
2711  TLO.DAG.getConstantFP(
2712  APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2713  }
2714 
2715  // A multi use 'all demanded elts' simplify failed to find any knownbits.
2716  // Try again just for the original demanded elts.
2717  // Ensure we do this AFTER constant folding above.
2718  if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2719  Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2720 
2721  return false;
2722 }
2723 
2725  const APInt &DemandedElts,
2726  DAGCombinerInfo &DCI) const {
2727  SelectionDAG &DAG = DCI.DAG;
2728  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2729  !DCI.isBeforeLegalizeOps());
2730 
2731  APInt KnownUndef, KnownZero;
2732  bool Simplified =
2733  SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2734  if (Simplified) {
2735  DCI.AddToWorklist(Op.getNode());
2736  DCI.CommitTargetLoweringOpt(TLO);
2737  }
2738 
2739  return Simplified;
2740 }
2741 
2742 /// Given a vector binary operation and known undefined elements for each input
2743 /// operand, compute whether each element of the output is undefined.
2745  const APInt &UndefOp0,
2746  const APInt &UndefOp1) {
2747  EVT VT = BO.getValueType();
2748  assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2749  "Vector binop only");
2750 
2751  EVT EltVT = VT.getVectorElementType();
2752  unsigned NumElts = VT.getVectorNumElements();
2753  assert(UndefOp0.getBitWidth() == NumElts &&
2754  UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2755 
2756  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2757  const APInt &UndefVals) {
2758  if (UndefVals[Index])
2759  return DAG.getUNDEF(EltVT);
2760 
2761  if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2762  // Try hard to make sure that the getNode() call is not creating temporary
2763  // nodes. Ignore opaque integers because they do not constant fold.
2764  SDValue Elt = BV->getOperand(Index);
2765  auto *C = dyn_cast<ConstantSDNode>(Elt);
2766  if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2767  return Elt;
2768  }
2769 
2770  return SDValue();
2771  };
2772 
2773  APInt KnownUndef = APInt::getZero(NumElts);
2774  for (unsigned i = 0; i != NumElts; ++i) {
2775  // If both inputs for this element are either constant or undef and match
2776  // the element type, compute the constant/undef result for this element of
2777  // the vector.
2778  // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2779  // not handle FP constants. The code within getNode() should be refactored
2780  // to avoid the danger of creating a bogus temporary node here.
2781  SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2782  SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2783  if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2784  if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2785  KnownUndef.setBit(i);
2786  }
2787  return KnownUndef;
2788 }
2789 
2791  SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2792  APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2793  bool AssumeSingleUse) const {
2794  EVT VT = Op.getValueType();
2795  unsigned Opcode = Op.getOpcode();
2796  APInt DemandedElts = OriginalDemandedElts;
2797  unsigned NumElts = DemandedElts.getBitWidth();
2798  assert(VT.isVector() && "Expected vector op");
2799 
2800  KnownUndef = KnownZero = APInt::getZero(NumElts);
2801 
2802  const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2803  if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2804  return false;
2805 
2806  // TODO: For now we assume we know nothing about scalable vectors.
2807  if (VT.isScalableVector())
2808  return false;
2809 
2810  assert(VT.getVectorNumElements() == NumElts &&
2811  "Mask size mismatches value type element count!");
2812 
2813  // Undef operand.
2814  if (Op.isUndef()) {
2815  KnownUndef.setAllBits();
2816  return false;
2817  }
2818 
2819  // If Op has other users, assume that all elements are needed.
2820  if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
2821  DemandedElts.setAllBits();
2822 
2823  // Not demanding any elements from Op.
2824  if (DemandedElts == 0) {
2825  KnownUndef.setAllBits();
2826  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2827  }
2828 
2829  // Limit search depth.
2831  return false;
2832 
2833  SDLoc DL(Op);
2834  unsigned EltSizeInBits = VT.getScalarSizeInBits();
2835  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2836 
2837  // Helper for demanding the specified elements and all the bits of both binary
2838  // operands.
2839  auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2840  SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2841  TLO.DAG, Depth + 1);
2842  SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2843  TLO.DAG, Depth + 1);
2844  if (NewOp0 || NewOp1) {
2845  SDValue NewOp = TLO.DAG.getNode(
2846  Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2847  return TLO.CombineTo(Op, NewOp);
2848  }
2849  return false;
2850  };
2851 
2852  switch (Opcode) {
2853  case ISD::SCALAR_TO_VECTOR: {
2854  if (!DemandedElts[0]) {
2855  KnownUndef.setAllBits();
2856  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2857  }
2858  SDValue ScalarSrc = Op.getOperand(0);
2859  if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2860  SDValue Src = ScalarSrc.getOperand(0);
2861  SDValue Idx = ScalarSrc.getOperand(1);
2862  EVT SrcVT = Src.getValueType();
2863 
2864  ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2865 
2866  if (SrcEltCnt.isScalable())
2867  return false;
2868 
2869  unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2870  if (isNullConstant(Idx)) {
2871  APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2872  APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2873  APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2874  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2875  TLO, Depth + 1))
2876  return true;
2877  }
2878  }
2879  KnownUndef.setHighBits(NumElts - 1);
2880  break;
2881  }
2882  case ISD::BITCAST: {
2883  SDValue Src = Op.getOperand(0);
2884  EVT SrcVT = Src.getValueType();
2885 
2886  // We only handle vectors here.
2887  // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2888  if (!SrcVT.isVector())
2889  break;
2890 
2891  // Fast handling of 'identity' bitcasts.
2892  unsigned NumSrcElts = SrcVT.getVectorNumElements();
2893  if (NumSrcElts == NumElts)
2894  return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2895  KnownZero, TLO, Depth + 1);
2896 
2897  APInt SrcDemandedElts, SrcZero, SrcUndef;
2898 
2899  // Bitcast from 'large element' src vector to 'small element' vector, we
2900  // must demand a source element if any DemandedElt maps to it.
2901  if ((NumElts % NumSrcElts) == 0) {
2902  unsigned Scale = NumElts / NumSrcElts;
2903  SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2904  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2905  TLO, Depth + 1))
2906  return true;
2907 
2908  // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2909  // of the large element.
2910  // TODO - bigendian once we have test coverage.
2911  if (IsLE) {
2912  unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2913  APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2914  for (unsigned i = 0; i != NumElts; ++i)
2915  if (DemandedElts[i]) {
2916  unsigned Ofs = (i % Scale) * EltSizeInBits;
2917  SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2918  }
2919 
2920  KnownBits Known;
2921  if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2922  TLO, Depth + 1))
2923  return true;
2924 
2925  // The bitcast has split each wide element into a number of
2926  // narrow subelements. We have just computed the Known bits
2927  // for wide elements. See if element splitting results in
2928  // some subelements being zero. Only for demanded elements!
2929  for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2930  if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2931  .isAllOnes())
2932  continue;
2933  for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2934  unsigned Elt = Scale * SrcElt + SubElt;
2935  if (DemandedElts[Elt])
2936  KnownZero.setBit(Elt);
2937  }
2938  }
2939  }
2940 
2941  // If the src element is zero/undef then all the output elements will be -
2942  // only demanded elements are guaranteed to be correct.
2943  for (unsigned i = 0; i != NumSrcElts; ++i) {
2944  if (SrcDemandedElts[i]) {
2945  if (SrcZero[i])
2946  KnownZero.setBits(i * Scale, (i + 1) * Scale);
2947  if (SrcUndef[i])
2948  KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2949  }
2950  }
2951  }
2952 
2953  // Bitcast from 'small element' src vector to 'large element' vector, we
2954  // demand all smaller source elements covered by the larger demanded element
2955  // of this vector.
2956  if ((NumSrcElts % NumElts) == 0) {
2957  unsigned Scale = NumSrcElts / NumElts;
2958  SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2959  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2960  TLO, Depth + 1))
2961  return true;
2962 
2963  // If all the src elements covering an output element are zero/undef, then
2964  // the output element will be as well, assuming it was demanded.
2965  for (unsigned i = 0; i != NumElts; ++i) {
2966  if (DemandedElts[i]) {
2967  if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
2968  KnownZero.setBit(i);
2969  if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
2970  KnownUndef.setBit(i);
2971  }
2972  }
2973  }
2974  break;
2975  }
2976  case ISD::BUILD_VECTOR: {
2977  // Check all elements and simplify any unused elements with UNDEF.
2978  if (!DemandedElts.isAllOnes()) {
2979  // Don't simplify BROADCASTS.
2980  if (llvm::any_of(Op->op_values(),
2981  [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2982  SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2983  bool Updated = false;
2984  for (unsigned i = 0; i != NumElts; ++i) {
2985  if (!DemandedElts[i] && !Ops[i].isUndef()) {
2986  Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2987  KnownUndef.setBit(i);
2988  Updated = true;
2989  }
2990  }
2991  if (Updated)
2992  return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2993  }
2994  }
2995  for (unsigned i = 0; i != NumElts; ++i) {
2996  SDValue SrcOp = Op.getOperand(i);
2997  if (SrcOp.isUndef()) {
2998  KnownUndef.setBit(i);
2999  } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3001  KnownZero.setBit(i);
3002  }
3003  }
3004  break;
3005  }
3006  case ISD::CONCAT_VECTORS: {
3007  EVT SubVT = Op.getOperand(0).getValueType();
3008  unsigned NumSubVecs = Op.getNumOperands();
3009  unsigned NumSubElts = SubVT.getVectorNumElements();
3010  for (unsigned i = 0; i != NumSubVecs; ++i) {
3011  SDValue SubOp = Op.getOperand(i);
3012  APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3013  APInt SubUndef, SubZero;
3014  if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3015  Depth + 1))
3016  return true;
3017  KnownUndef.insertBits(SubUndef, i * NumSubElts);
3018  KnownZero.insertBits(SubZero, i * NumSubElts);
3019  }
3020 
3021  // Attempt to avoid multi-use ops if we don't need anything from them.
3022  if (!DemandedElts.isAllOnes()) {
3023  bool FoundNewSub = false;
3024  SmallVector<SDValue, 2> DemandedSubOps;
3025  for (unsigned i = 0; i != NumSubVecs; ++i) {
3026  SDValue SubOp = Op.getOperand(i);
3027  APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3028  SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3029  SubOp, SubElts, TLO.DAG, Depth + 1);
3030  DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3031  FoundNewSub = NewSubOp ? true : FoundNewSub;
3032  }
3033  if (FoundNewSub) {
3034  SDValue NewOp =
3035  TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3036  return TLO.CombineTo(Op, NewOp);
3037  }
3038  }
3039  break;
3040  }
3041  case ISD::INSERT_SUBVECTOR: {
3042  // Demand any elements from the subvector and the remainder from the src its
3043  // inserted into.
3044  SDValue Src = Op.getOperand(0);
3045  SDValue Sub = Op.getOperand(1);
3046  uint64_t Idx = Op.getConstantOperandVal(2);
3047  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3048  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3049  APInt DemandedSrcElts = DemandedElts;
3050  DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3051 
3052  APInt SubUndef, SubZero;
3053  if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3054  Depth + 1))
3055  return true;
3056 
3057  // If none of the src operand elements are demanded, replace it with undef.
3058  if (!DemandedSrcElts && !Src.isUndef())
3059  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3060  TLO.DAG.getUNDEF(VT), Sub,
3061  Op.getOperand(2)));
3062 
3063  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3064  TLO, Depth + 1))
3065  return true;
3066  KnownUndef.insertBits(SubUndef, Idx);
3067  KnownZero.insertBits(SubZero, Idx);
3068 
3069  // Attempt to avoid multi-use ops if we don't need anything from them.
3070  if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3071  SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3072  Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3073  SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3074  Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3075  if (NewSrc || NewSub) {
3076  NewSrc = NewSrc ? NewSrc : Src;
3077  NewSub = NewSub ? NewSub : Sub;
3078  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3079  NewSub, Op.getOperand(2));
3080  return TLO.CombineTo(Op, NewOp);
3081  }
3082  }
3083  break;
3084  }
3085  case ISD::EXTRACT_SUBVECTOR: {
3086  // Offset the demanded elts by the subvector index.
3087  SDValue Src = Op.getOperand(0);
3088  if (Src.getValueType().isScalableVector())
3089  break;
3090  uint64_t Idx = Op.getConstantOperandVal(1);
3091  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3092  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3093 
3094  APInt SrcUndef, SrcZero;
3095  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3096  Depth + 1))
3097  return true;
3098  KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3099  KnownZero = SrcZero.extractBits(NumElts, Idx);
3100 
3101  // Attempt to avoid multi-use ops if we don't need anything from them.
3102  if (!DemandedElts.isAllOnes()) {
3103  SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3104  Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3105  if (NewSrc) {
3106  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3107  Op.getOperand(1));
3108  return TLO.CombineTo(Op, NewOp);
3109  }
3110  }
3111  break;
3112  }
3113  case ISD::INSERT_VECTOR_ELT: {
3114  SDValue Vec = Op.getOperand(0);
3115  SDValue Scl = Op.getOperand(1);
3116  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3117 
3118  // For a legal, constant insertion index, if we don't need this insertion
3119  // then strip it, else remove it from the demanded elts.
3120  if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3121  unsigned Idx = CIdx->getZExtValue();
3122  if (!DemandedElts[Idx])
3123  return TLO.CombineTo(Op, Vec);
3124 
3125  APInt DemandedVecElts(DemandedElts);
3126  DemandedVecElts.clearBit(Idx);
3127  if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3128  KnownZero, TLO, Depth + 1))
3129  return true;
3130 
3131  KnownUndef.setBitVal(Idx, Scl.isUndef());
3132 
3133  KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3134  break;
3135  }
3136 
3137  APInt VecUndef, VecZero;
3138  if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3139  Depth + 1))
3140  return true;
3141  // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3142  break;
3143  }
3144  case ISD::VSELECT: {
3145  SDValue Sel = Op.getOperand(0);
3146  SDValue LHS = Op.getOperand(1);
3147  SDValue RHS = Op.getOperand(2);
3148 
3149  // Try to transform the select condition based on the current demanded
3150  // elements.
3151  APInt UndefSel, UndefZero;
3152  if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
3153  Depth + 1))
3154  return true;
3155 
3156  // See if we can simplify either vselect operand.
3157  APInt DemandedLHS(DemandedElts);
3158  APInt DemandedRHS(DemandedElts);
3159  APInt UndefLHS, ZeroLHS;
3160  APInt UndefRHS, ZeroRHS;
3161  if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3162  Depth + 1))
3163  return true;
3164  if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3165  Depth + 1))
3166  return true;
3167 
3168  KnownUndef = UndefLHS & UndefRHS;
3169  KnownZero = ZeroLHS & ZeroRHS;
3170 
3171  // If we know that the selected element is always zero, we don't need the
3172  // select value element.
3173  APInt DemandedSel = DemandedElts & ~KnownZero;
3174  if (DemandedSel != DemandedElts)
3175  if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
3176  Depth + 1))
3177  return true;
3178 
3179  break;
3180  }
3181  case ISD::VECTOR_SHUFFLE: {
3182  SDValue LHS = Op.getOperand(0);
3183  SDValue RHS = Op.getOperand(1);
3184  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3185 
3186  // Collect demanded elements from shuffle operands..
3187  APInt DemandedLHS(NumElts, 0);
3188  APInt DemandedRHS(NumElts, 0);
3189  for (unsigned i = 0; i != NumElts; ++i) {
3190  int M = ShuffleMask[i];
3191  if (M < 0 || !DemandedElts[i])
3192  continue;
3193  assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3194  if (M < (int)NumElts)
3195  DemandedLHS.setBit(M);
3196  else
3197  DemandedRHS.setBit(M - NumElts);
3198  }
3199 
3200  // See if we can simplify either shuffle operand.
3201  APInt UndefLHS, ZeroLHS;
3202  APInt UndefRHS, ZeroRHS;
3203  if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3204  Depth + 1))
3205  return true;
3206  if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3207  Depth + 1))
3208  return true;
3209 
3210  // Simplify mask using undef elements from LHS/RHS.
3211  bool Updated = false;
3212  bool IdentityLHS = true, IdentityRHS = true;
3213  SmallVector<int, 32> NewMask(ShuffleMask);
3214  for (unsigned i = 0; i != NumElts; ++i) {
3215  int &M = NewMask[i];
3216  if (M < 0)
3217  continue;
3218  if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3219  (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3220  Updated = true;
3221  M = -1;
3222  }
3223  IdentityLHS &= (M < 0) || (M == (int)i);
3224  IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3225  }
3226 
3227  // Update legal shuffle masks based on demanded elements if it won't reduce
3228  // to Identity which can cause premature removal of the shuffle mask.
3229  if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3230  SDValue LegalShuffle =
3231  buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3232  if (LegalShuffle)
3233  return TLO.CombineTo(Op, LegalShuffle);
3234  }
3235 
3236  // Propagate undef/zero elements from LHS/RHS.
3237  for (unsigned i = 0; i != NumElts; ++i) {
3238  int M = ShuffleMask[i];
3239  if (M < 0) {
3240  KnownUndef.setBit(i);
3241  } else if (M < (int)NumElts) {
3242  if (UndefLHS[M])
3243  KnownUndef.setBit(i);
3244  if (ZeroLHS[M])
3245  KnownZero.setBit(i);
3246  } else {
3247  if (UndefRHS[M - NumElts])
3248  KnownUndef.setBit(i);
3249  if (ZeroRHS[M - NumElts])
3250  KnownZero.setBit(i);
3251  }
3252  }
3253  break;
3254  }
3258  APInt SrcUndef, SrcZero;
3259  SDValue Src = Op.getOperand(0);
3260  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3261  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3262  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3263  Depth + 1))
3264  return true;
3265  KnownZero = SrcZero.zextOrTrunc(NumElts);
3266  KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3267 
3268  if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3269  Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3270  DemandedSrcElts == 1) {
3271  // aext - if we just need the bottom element then we can bitcast.
3272  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3273  }
3274 
3275  if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3276  // zext(undef) upper bits are guaranteed to be zero.
3277  if (DemandedElts.isSubsetOf(KnownUndef))
3278  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3279  KnownUndef.clearAllBits();
3280 
3281  // zext - if we just need the bottom element then we can mask:
3282  // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3283  if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3284  Op->isOnlyUserOf(Src.getNode()) &&
3285  Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3286  SDLoc DL(Op);
3287  EVT SrcVT = Src.getValueType();
3288  EVT SrcSVT = SrcVT.getScalarType();
3289  SmallVector<SDValue> MaskElts;
3290  MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3291  MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3292  SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3293  if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3294  ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3295  Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3296  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3297  }
3298  }
3299  }
3300  break;
3301  }
3302 
3303  // TODO: There are more binop opcodes that could be handled here - MIN,
3304  // MAX, saturated math, etc.
3305  case ISD::ADD: {
3306  SDValue Op0 = Op.getOperand(0);
3307  SDValue Op1 = Op.getOperand(1);
3308  if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3309  APInt UndefLHS, ZeroLHS;
3310  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3311  Depth + 1, /*AssumeSingleUse*/ true))
3312  return true;
3313  }
3314  [[fallthrough]];
3315  }
3316  case ISD::OR:
3317  case ISD::XOR:
3318  case ISD::SUB:
3319  case ISD::FADD:
3320  case ISD::FSUB:
3321  case ISD::FMUL:
3322  case ISD::FDIV:
3323  case ISD::FREM: {
3324  SDValue Op0 = Op.getOperand(0);
3325  SDValue Op1 = Op.getOperand(1);
3326 
3327  APInt UndefRHS, ZeroRHS;
3328  if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3329  Depth + 1))
3330  return true;
3331  APInt UndefLHS, ZeroLHS;
3332  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3333  Depth + 1))
3334  return true;
3335 
3336  KnownZero = ZeroLHS & ZeroRHS;
3337  KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3338 
3339  // Attempt to avoid multi-use ops if we don't need anything from them.
3340  // TODO - use KnownUndef to relax the demandedelts?
3341  if (!DemandedElts.isAllOnes())
3342  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3343  return true;
3344  break;
3345  }
3346  case ISD::SHL:
3347  case ISD::SRL:
3348  case ISD::SRA:
3349  case ISD::ROTL:
3350  case ISD::ROTR: {
3351  SDValue Op0 = Op.getOperand(0);
3352  SDValue Op1 = Op.getOperand(1);
3353 
3354  APInt UndefRHS, ZeroRHS;
3355  if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3356  Depth + 1))
3357  return true;
3358  APInt UndefLHS, ZeroLHS;
3359  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3360  Depth + 1))
3361  return true;
3362 
3363  KnownZero = ZeroLHS;
3364  KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3365 
3366  // Attempt to avoid multi-use ops if we don't need anything from them.
3367  // TODO - use KnownUndef to relax the demandedelts?
3368  if (!DemandedElts.isAllOnes())
3369  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3370  return true;
3371  break;
3372  }
3373  case ISD::MUL:
3374  case ISD::MULHU:
3375  case ISD::MULHS:
3376  case ISD::AND: {
3377  SDValue Op0 = Op.getOperand(0);
3378  SDValue Op1 = Op.getOperand(1);
3379 
3380  APInt SrcUndef, SrcZero;
3381  if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3382  Depth + 1))
3383  return true;
3384  // If we know that a demanded element was zero in Op1 we don't need to
3385  // demand it in Op0 - its guaranteed to be zero.
3386  APInt DemandedElts0 = DemandedElts & ~SrcZero;
3387  if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3388  TLO, Depth + 1))
3389  return true;
3390 
3391  KnownUndef &= DemandedElts0;
3392  KnownZero &= DemandedElts0;
3393 
3394  // If every element pair has a zero/undef then just fold to zero.
3395  // fold (and x, undef) -> 0 / (and x, 0) -> 0
3396  // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3397  if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3398  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3399 
3400  // If either side has a zero element, then the result element is zero, even
3401  // if the other is an UNDEF.
3402  // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3403  // and then handle 'and' nodes with the rest of the binop opcodes.
3404  KnownZero |= SrcZero;
3405  KnownUndef &= SrcUndef;
3406  KnownUndef &= ~KnownZero;
3407 
3408  // Attempt to avoid multi-use ops if we don't need anything from them.
3409  if (!DemandedElts.isAllOnes())
3410  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3411  return true;
3412  break;
3413  }
3414  case ISD::TRUNCATE:
3415  case ISD::SIGN_EXTEND:
3416  case ISD::ZERO_EXTEND:
3417  if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3418  KnownZero, TLO, Depth + 1))
3419  return true;
3420 
3421  if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3422  // zext(undef) upper bits are guaranteed to be zero.
3423  if (DemandedElts.isSubsetOf(KnownUndef))
3424  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3425  KnownUndef.clearAllBits();
3426  }
3427  break;
3428  default: {
3429  if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3430  if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3431  KnownZero, TLO, Depth))
3432  return true;
3433  } else {
3434  KnownBits Known;
3435  APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3436  if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3437  TLO, Depth, AssumeSingleUse))
3438  return true;
3439  }
3440  break;
3441  }
3442  }
3443  assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3444 
3445  // Constant fold all undef cases.
3446  // TODO: Handle zero cases as well.
3447  if (DemandedElts.isSubsetOf(KnownUndef))
3448  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3449 
3450  return false;
3451 }
3452 
3453 /// Determine which of the bits specified in Mask are known to be either zero or
3454 /// one and return them in the Known.
3456  KnownBits &Known,
3457  const APInt &DemandedElts,
3458  const SelectionDAG &DAG,
3459  unsigned Depth) const {
3460  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3461  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3462  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3463  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3464  "Should use MaskedValueIsZero if you don't know whether Op"
3465  " is a target node!");
3466  Known.resetAll();
3467 }
3468 
3470  GISelKnownBits &Analysis, Register R, KnownBits &Known,
3471  const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3472  unsigned Depth) const {
3473  Known.resetAll();
3474 }
3475 
3477  const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3478  // The low bits are known zero if the pointer is aligned.
3479  Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3480 }
3481 
3483  GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3484  unsigned Depth) const {
3485  return Align(1);
3486 }
3487 
3488 /// This method can be implemented by targets that want to expose additional
3489 /// information about sign bits to the DAG Combiner.
3491  const APInt &,
3492  const SelectionDAG &,
3493  unsigned Depth) const {
3494  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3495  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3496  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3497  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3498  "Should use ComputeNumSignBits if you don't know whether Op"
3499  " is a target node!");
3500  return 1;
3501 }
3502 
3504  GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3505  const MachineRegisterInfo &MRI, unsigned Depth) const {
3506  return 1;
3507 }
3508 
3510  SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3511  TargetLoweringOpt &TLO, unsigned Depth) const {
3512  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3513  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3514  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3515  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3516  "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3517  " is a target node!");
3518  return false;
3519 }
3520 
3522  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3523  KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3524  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3525  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3526  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3527  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3528  "Should use SimplifyDemandedBits if you don't know whether Op"
3529  " is a target node!");
3530  computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3531  return false;
3532 }
3533 
3535  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3536  SelectionDAG &DAG, unsigned Depth) const {
3537  assert(
3538  (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3539  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3540  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3541  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3542  "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3543  " is a target node!");
3544  return SDValue();
3545 }
3546 
3547 SDValue
3550  SelectionDAG &DAG) const {
3551  bool LegalMask = isShuffleMaskLegal(Mask, VT);
3552  if (!LegalMask) {
3553  std::swap(N0, N1);
3555  LegalMask = isShuffleMaskLegal(Mask, VT);
3556  }
3557 
3558  if (!LegalMask)
3559  return SDValue();
3560 
3561  return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3562 }
3563 
3565  return nullptr;
3566 }
3567 
3569  SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3570  bool PoisonOnly, unsigned Depth) const {
3571  assert(
3572  (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3573  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3574  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3575  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3576  "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3577  " is a target node!");
3578  return false;
3579 }
3580 
3582  SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3583  bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3584  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3585  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3586  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3587  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3588  "Should use canCreateUndefOrPoison if you don't know whether Op"
3589  " is a target node!");
3590  // Be conservative and return true.
3591  return true;
3592 }
3593 
3595  const SelectionDAG &DAG,
3596  bool SNaN,
3597  unsigned Depth) const {
3598  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3599  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3600  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3601  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3602  "Should use isKnownNeverNaN if you don't know whether Op"
3603  " is a target node!");
3604  return false;
3605 }
3606 
3608  const APInt &DemandedElts,
3609  APInt &UndefElts,
3610  unsigned Depth) const {
3611  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3612  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3613  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3614  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3615  "Should use isSplatValue if you don't know whether Op"
3616  " is a target node!");
3617  return false;
3618 }
3619 
3620 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3621 // work with truncating build vectors and vectors with elements of less than
3622 // 8 bits.
3624  if (!N)
3625  return false;
3626 
3627  unsigned EltWidth;
3628  APInt CVal;
3629  if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3630  /*AllowTruncation=*/true)) {
3631  CVal = CN->getAPIntValue();
3632  EltWidth = N.getValueType().getScalarSizeInBits();
3633  } else
3634  return false;
3635 
3636  // If this is a truncating splat, truncate the splat value.
3637  // Otherwise, we may fail to match the expected values below.
3638  if (EltWidth < CVal.getBitWidth())
3639  CVal = CVal.trunc(EltWidth);
3640 
3641  switch (getBooleanContents(N.getValueType())) {
3643  return CVal[0];
3645  return CVal.isOne();
3647  return CVal.isAllOnes();
3648  }
3649 
3650  llvm_unreachable("Invalid boolean contents");
3651 }
3652 
3654  if (!N)
3655  return false;
3656 
3657  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3658  if (!CN) {
3659  const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3660  if (!BV)
3661  return false;
3662 
3663  // Only interested in constant splats, we don't care about undef
3664  // elements in identifying boolean constants and getConstantSplatNode
3665  // returns NULL if all ops are undef;
3666  CN = BV->getConstantSplatNode();
3667  if (!CN)
3668  return false;
3669  }
3670 
3671  if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3672  return !CN->getAPIntValue()[0];
3673 
3674  return CN->isZero();
3675 }
3676 
3678  bool SExt) const {
3679  if (VT == MVT::i1)
3680  return N->isOne();
3681 
3683  switch (Cnt) {
3685  // An extended value of 1 is always true, unless its original type is i1,
3686  // in which case it will be sign extended to -1.
3687  return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3690  return N->isAllOnes() && SExt;
3691  }
3692  llvm_unreachable("Unexpected enumeration.");
3693 }
3694 
3695 /// This helper function of SimplifySetCC tries to optimize the comparison when
3696 /// either operand of the SetCC node is a bitwise-and instruction.
3697 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3698  ISD::CondCode Cond, const SDLoc &DL,
3699  DAGCombinerInfo &DCI) const {
3700  if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3701  std::swap(N0, N1);
3702 
3703  SelectionDAG &DAG = DCI.DAG;
3704  EVT OpVT = N0.getValueType();
3705  if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3706  (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3707  return SDValue();
3708 
3709  // (X & Y) != 0 --> zextOrTrunc(X & Y)
3710  // iff everything but LSB is known zero:
3711  if (Cond == ISD::SETNE && isNullConstant(N1) &&
3714  unsigned NumEltBits = OpVT.getScalarSizeInBits();
3715  APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3716  if (DAG.MaskedValueIsZero(N0, UpperBits))
3717  return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3718  }
3719 
3720  // Match these patterns in any of their permutations:
3721  // (X & Y) == Y
3722  // (X & Y) != Y
3723  SDValue X, Y;
3724  if (N0.getOperand(0) == N1) {
3725  X = N0.getOperand(1);
3726  Y = N0.getOperand(0);
3727  } else if (N0.getOperand(1) == N1) {
3728  X = N0.getOperand(0);
3729  Y = N0.getOperand(1);
3730  } else {
3731  return SDValue();
3732  }
3733 
3734  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3735  if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3736  // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3737  // Note that where Y is variable and is known to have at most one bit set
3738  // (for example, if it is Z & 1) we cannot do this; the expressions are not
3739  // equivalent when Y == 0.
3740  assert(OpVT.isInteger());
3741  Cond = ISD::getSetCCInverse(Cond, OpVT);
3742  if (DCI.isBeforeLegalizeOps() ||
3744  return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3745  } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3746  // If the target supports an 'and-not' or 'and-complement' logic operation,
3747  // try to use that to make a comparison operation more efficient.
3748  // But don't do this transform if the mask is a single bit because there are
3749  // more efficient ways to deal with that case (for example, 'bt' on x86 or
3750  // 'rlwinm' on PPC).
3751 
3752  // Bail out if the compare operand that we want to turn into a zero is
3753  // already a zero (otherwise, infinite loop).
3754  auto *YConst = dyn_cast<ConstantSDNode>(Y);
3755  if (YConst && YConst->isZero())
3756  return SDValue();
3757 
3758  // Transform this into: ~X & Y == 0.
3759  SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3760  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3761  return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3762  }
3763 
3764  return SDValue();
3765 }
3766 
3767 /// There are multiple IR patterns that could be checking whether certain
3768 /// truncation of a signed number would be lossy or not. The pattern which is
3769 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3770 /// We are looking for the following pattern: (KeptBits is a constant)
3771 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3772 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3773 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3774 /// We will unfold it into the natural trunc+sext pattern:
3775 /// ((%x << C) a>> C) dstcond %x
3776 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3777 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3778  EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3779  const SDLoc &DL) const {
3780  // We must be comparing with a constant.
3781  ConstantSDNode *C1;
3782  if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3783  return SDValue();
3784 
3785  // N0 should be: add %x, (1 << (KeptBits-1))
3786  if (N0->getOpcode() != ISD::ADD)
3787  return SDValue();
3788 
3789  // And we must be 'add'ing a constant.
3790  ConstantSDNode *C01;
3791  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3792  return SDValue();
3793 
3794  SDValue X = N0->getOperand(0);
3795  EVT XVT = X.getValueType();
3796 
3797  // Validate constants ...
3798 
3799  APInt I1 = C1->getAPIntValue();
3800 
3801  ISD::CondCode NewCond;
3802  if (Cond == ISD::CondCode::SETULT) {
3803  NewCond = ISD::CondCode::SETEQ;
3804  } else if (Cond == ISD::CondCode::SETULE) {
3805  NewCond = ISD::CondCode::SETEQ;
3806  // But need to 'canonicalize' the constant.
3807  I1 += 1;
3808  } else if (Cond == ISD::CondCode::SETUGT) {
3809  NewCond = ISD::CondCode::SETNE;
3810  // But need to 'canonicalize' the constant.
3811  I1 += 1;
3812  } else if (Cond == ISD::CondCode::SETUGE) {
3813  NewCond = ISD::CondCode::SETNE;
3814  } else
3815  return SDValue();
3816 
3817  APInt I01 = C01->getAPIntValue();
3818 
3819  auto checkConstants = [&I1, &I01]() -> bool {
3820  // Both of them must be power-of-two, and the constant from setcc is bigger.
3821  return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3822  };
3823 
3824  if (checkConstants()) {
3825  // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3826  } else {
3827  // What if we invert constants? (and the target predicate)
3828  I1.negate();
3829  I01.negate();
3830  assert(XVT.isInteger());
3831  NewCond = getSetCCInverse(NewCond, XVT);
3832  if (!checkConstants())
3833  return SDValue();
3834  // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3835  }
3836 
3837  // They are power-of-two, so which bit is set?
3838  const unsigned KeptBits = I1.logBase2();
3839  const unsigned KeptBitsMinusOne = I01.logBase2();
3840 
3841  // Magic!
3842  if (KeptBits != (KeptBitsMinusOne + 1))
3843  return SDValue();
3844  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3845 
3846  // We don't want to do this in every single case.
3847  SelectionDAG &DAG = DCI.DAG;
3849  XVT, KeptBits))
3850  return SDValue();
3851 
3852  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3853  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3854 
3855  // Unfold into: ((%x << C) a>> C) cond %x
3856  // Where 'cond' will be either 'eq' or 'ne'.
3857  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3858  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3859  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3860  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3861 
3862  return T2;
3863 }
3864 
3865 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3866 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3867  EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3868  DAGCombinerInfo &DCI, const SDLoc &DL) const {
3869  assert(isConstOrConstSplat(N1C) &&
3870  isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
3871  "Should be a comparison with 0.");
3872  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3873  "Valid only for [in]equality comparisons.");
3874 
3875  unsigned NewShiftOpcode;
3876  SDValue X, C, Y;
3877 
3878  SelectionDAG &DAG = DCI.DAG;
3879  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3880 
3881  // Look for '(C l>>/<< Y)'.
3882  auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3883  // The shift should be one-use.
3884  if (!V.hasOneUse())
3885  return false;
3886  unsigned OldShiftOpcode = V.getOpcode();
3887  switch (OldShiftOpcode) {
3888  case ISD::SHL:
3889  NewShiftOpcode = ISD::SRL;
3890  break;
3891  case ISD::SRL:
3892  NewShiftOpcode = ISD::SHL;
3893  break;
3894  default:
3895  return false; // must be a logical shift.
3896  }
3897  // We should be shifting a constant.
3898  // FIXME: best to use isConstantOrConstantVector().
3899  C = V.getOperand(0);
3900  ConstantSDNode *CC =
3901  isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3902  if (!CC)
3903  return false;
3904  Y = V.getOperand(1);
3905 
3906  ConstantSDNode *XC =
3907  isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3908  return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3909  X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3910  };
3911 
3912  // LHS of comparison should be an one-use 'and'.
3913  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3914  return SDValue();
3915 
3916  X = N0.getOperand(0);
3917  SDValue Mask = N0.getOperand(1);
3918 
3919  // 'and' is commutative!
3920  if (!Match(Mask)) {
3921  std::swap(X, Mask);
3922  if (!Match(Mask))
3923  return SDValue();
3924  }
3925 
3926  EVT VT = X.getValueType();
3927 
3928  // Produce:
3929  // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3930  SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3931  SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3932  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3933  return T2;
3934 }
3935 
3936 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3937 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3938 /// handle the commuted versions of these patterns.
3939 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3940  ISD::CondCode Cond, const SDLoc &DL,
3941  DAGCombinerInfo &DCI) const {
3942  unsigned BOpcode = N0.getOpcode();
3943  assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3944  "Unexpected binop");
3945  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3946 
3947  // (X + Y) == X --> Y == 0
3948  // (X - Y) == X --> Y == 0
3949  // (X ^ Y) == X --> Y == 0
3950  SelectionDAG &DAG = DCI.DAG;
3951  EVT OpVT = N0.getValueType();
3952  SDValue X = N0.getOperand(0);
3953  SDValue Y = N0.getOperand(1);
3954  if (X == N1)
3955  return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3956 
3957  if (Y != N1)
3958  return SDValue();
3959 
3960  // (X + Y) == Y --> X == 0
3961  // (X ^ Y) == Y --> X == 0
3962  if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3963  return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3964 
3965  // The shift would not be valid if the operands are boolean (i1).
3966  if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3967  return SDValue();
3968 
3969  // (X - Y) == Y --> X == Y << 1
3970  EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3971  !DCI.isBeforeLegalize());
3972  SDValue One = DAG.getConstant(1, DL, ShiftVT);
3973  SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3974  if (!DCI.isCalledByLegalizer())
3975  DCI.AddToWorklist(YShl1.getNode());
3976  return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3977 }
3978 
3980  SDValue N0, const APInt &C1,
3981  ISD::CondCode Cond, const SDLoc &dl,
3982  SelectionDAG &DAG) {
3983  // Look through truncs that don't change the value of a ctpop.
3984  // FIXME: Add vector support? Need to be careful with setcc result type below.
3985  SDValue CTPOP = N0;
3986  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
3988  CTPOP = N0.getOperand(0);
3989 
3990  if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
3991  return SDValue();
3992 
3993  EVT CTVT = CTPOP.getValueType();
3994  SDValue CTOp = CTPOP.getOperand(0);
3995 
3996  // Expand a power-of-2-or-zero comparison based on ctpop:
3997  // (ctpop x) u< 2 -> (x & x-1) == 0
3998  // (ctpop x) u> 1 -> (x & x-1) != 0
3999  if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4000  // Keep the CTPOP if it is a legal vector op.
4001  if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
4002  return SDValue();
4003 
4004  unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4005  if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4006  return SDValue();
4007  if (C1 == 0 && (Cond == ISD::SETULT))
4008  return SDValue(); // This is handled elsewhere.
4009 
4010  unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4011 
4012  SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4013  SDValue Result = CTOp;
4014  for (unsigned i = 0; i < Passes; i++) {
4015  SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4016  Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4017  }
4019  return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4020  }
4021 
4022  // Expand a power-of-2 comparison based on ctpop:
4023  // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
4024  // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
4025  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4026  // Keep the CTPOP if it is legal.
4027  if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
4028  return SDValue();
4029 
4030  SDValue Zero = DAG.getConstant(0, dl, CTVT);
4031  SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4032  assert(CTVT.isInteger());
4033  ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
4034  SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4035  SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4036  SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
4037  SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4038  unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
4039  return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
4040  }
4041 
4042  return SDValue();
4043 }
4044 
4046  ISD::CondCode Cond, const SDLoc &dl,
4047  SelectionDAG &DAG) {
4048  if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4049  return SDValue();
4050 
4051  auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4052  if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4053  return SDValue();
4054 
4055  auto getRotateSource = [](SDValue X) {
4056  if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4057  return X.getOperand(0);
4058  return SDValue();
4059  };
4060 
4061  // Peek through a rotated value compared against 0 or -1:
4062  // (rot X, Y) == 0/-1 --> X == 0/-1
4063  // (rot X, Y) != 0/-1 --> X != 0/-1
4064  if (SDValue R = getRotateSource(N0))
4065  return DAG.getSetCC(dl, VT, R, N1, Cond);
4066 
4067  // Peek through an 'or' of a rotated value compared against 0:
4068  // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4069  // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4070  //
4071  // TODO: Add the 'and' with -1 sibling.
4072  // TODO: Recurse through a series of 'or' ops to find the rotate.
4073  EVT OpVT = N0.getValueType();
4074  if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4075  if (SDValue R = getRotateSource(N0.getOperand(0))) {
4076  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4077  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4078  }
4079  if (SDValue R = getRotateSource(N0.getOperand(1))) {
4080  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4081  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4082  }
4083  }
4084 
4085  return SDValue();
4086 }
4087 
4089  ISD::CondCode Cond, const SDLoc &dl,
4090  SelectionDAG &DAG) {
4091  // If we are testing for all-bits-clear, we might be able to do that with
4092  // less shifting since bit-order does not matter.
4093  if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4094  return SDValue();
4095 
4096  auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4097  if (!C1 || !C1->isZero())
4098  return SDValue();
4099 
4100  if (!N0.hasOneUse() ||
4101  (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4102  return SDValue();
4103 
4104  unsigned BitWidth = N0.getScalarValueSizeInBits();
4105  auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4106  if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4107  return SDValue();
4108 
4109  // Canonicalize fshr as fshl to reduce pattern-matching.
4110  unsigned ShAmt = ShAmtC->getZExtValue();
4111  if (N0.getOpcode() == ISD::FSHR)
4112  ShAmt = BitWidth - ShAmt;
4113 
4114  // Match an 'or' with a specific operand 'Other' in either commuted variant.
4115  SDValue X, Y;
4116  auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4117  if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4118  return false;
4119  if (Or.getOperand(0) == Other) {
4120  X = Or.getOperand(0);
4121  Y = Or.getOperand(1);
4122  return true;
4123  }
4124  if (Or.getOperand(1) == Other) {
4125  X = Or.getOperand(1);
4126  Y = Or.getOperand(0);
4127  return true;
4128  }
4129  return false;
4130  };
4131 
4132  EVT OpVT = N0.getValueType();
4133  EVT ShAmtVT = N0.getOperand(2).getValueType();
4134  SDValue F0 = N0.getOperand(0);
4135  SDValue F1 = N0.getOperand(1);
4136  if (matchOr(F0, F1)) {
4137  // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4138  SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4139  SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4140  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4141  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4142  }
4143  if (matchOr(F1, F0)) {
4144  // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4145  SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4146  SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4147  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4148  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4149  }
4150 
4151  return SDValue();
4152 }
4153 
4154 /// Try to simplify a setcc built with the specified operands and cc. If it is
4155 /// unable to simplify it, return a null SDValue.
4157  ISD::CondCode Cond, bool foldBooleans,
4158  DAGCombinerInfo &DCI,
4159  const SDLoc &dl) const {
4160  SelectionDAG &DAG = DCI.DAG;
4161  const DataLayout &Layout = DAG.getDataLayout();
4162  EVT OpVT = N0.getValueType();
4163 
4164  // Constant fold or commute setcc.
4165  if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4166  return Fold;
4167 
4168  bool N0ConstOrSplat =
4169  isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4170  bool N1ConstOrSplat =
4171  isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4172 
4173  // Ensure that the constant occurs on the RHS and fold constant comparisons.
4174  // TODO: Handle non-splat vector constants. All undef causes trouble.
4175  // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4176  // infinite loop here when we encounter one.
4178  if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
4179  (DCI.isBeforeLegalizeOps() ||
4180  isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4181  return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4182 
4183  // If we have a subtract with the same 2 non-constant operands as this setcc
4184  // -- but in reverse order -- then try to commute the operands of this setcc
4185  // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4186  // instruction on some targets.
4187  if (!N0ConstOrSplat && !N1ConstOrSplat &&
4188  (DCI.isBeforeLegalizeOps() ||
4189  isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4190  DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4191  !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4192  return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4193 
4194  if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4195  return V;
4196 
4197  if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4198  return V;
4199 
4200  if (auto *N1C = isConstOrConstSplat(N1)) {
4201  const APInt &C1 = N1C->getAPIntValue();
4202 
4203  // Optimize some CTPOP cases.
4204  if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4205  return V;
4206 
4207  // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4208  // equality comparison, then we're just comparing whether X itself is
4209  // zero.
4210  if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4211  N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4213  if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4214  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4215  ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4216  if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4217  // (srl (ctlz x), 5) == 0 -> X != 0
4218  // (srl (ctlz x), 5) != 1 -> X != 0
4219  Cond = ISD::SETNE;
4220  } else {
4221  // (srl (ctlz x), 5) != 0 -> X == 0
4222  // (srl (ctlz x), 5) == 1 -> X == 0
4223  Cond = ISD::SETEQ;
4224  }
4225  SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4226  return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4227  Cond);
4228  }
4229  }
4230  }
4231  }
4232 
4233  // FIXME: Support vectors.
4234  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4235  const APInt &C1 = N1C->getAPIntValue();
4236 
4237  // (zext x) == C --> x == (trunc C)
4238  // (sext x) == C --> x == (trunc C)
4239  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4240  DCI.isBeforeLegalize() && N0->hasOneUse()) {
4241  unsigned MinBits = N0.getValueSizeInBits();
4242  SDValue PreExt;
4243  bool Signed = false;
4244  if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4245  // ZExt
4246  MinBits = N0->getOperand(0).getValueSizeInBits();
4247  PreExt = N0->getOperand(0);
4248  } else if (N0->getOpcode() == ISD::AND) {
4249  // DAGCombine turns costly ZExts into ANDs
4250  if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4251  if ((C->getAPIntValue()+1).isPowerOf2()) {
4252  MinBits = C->getAPIntValue().countTrailingOnes();
4253  PreExt = N0->getOperand(0);
4254  }
4255  } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4256  // SExt
4257  MinBits = N0->getOperand(0).getValueSizeInBits();
4258  PreExt = N0->getOperand(0);
4259  Signed = true;
4260  } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4261  // ZEXTLOAD / SEXTLOAD
4262  if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4263  MinBits = LN0->getMemoryVT().getSizeInBits();
4264  PreExt = N0;
4265  } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4266  Signed = true;
4267  MinBits = LN0->getMemoryVT().getSizeInBits();
4268  PreExt = N0;
4269  }
4270  }
4271 
4272  // Figure out how many bits we need to preserve this constant.
4273  unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
4274 
4275  // Make sure we're not losing bits from the constant.
4276  if (MinBits > 0 &&
4277  MinBits < C1.getBitWidth() &&
4278  MinBits >= ReqdBits) {
4279  EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4280  if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4281  // Will get folded away.
4282  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4283  if (MinBits == 1 && C1 == 1)
4284  // Invert the condition.
4285  return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4287  SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4288  return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4289  }
4290 
4291  // If truncating the setcc operands is not desirable, we can still
4292  // simplify the expression in some cases:
4293  // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4294  // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4295  // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4296  // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4297  // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4298  // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4299  SDValue TopSetCC = N0->getOperand(0);
4300  unsigned N0Opc = N0->getOpcode();
4301  bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4302  if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4303  TopSetCC.getOpcode() == ISD::SETCC &&
4304  (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4305  (isConstFalseVal(N1) ||
4306  isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4307 
4308  bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4309  (!N1C->isZero() && Cond == ISD::SETNE);
4310 
4311  if (!Inverse)
4312  return TopSetCC;
4313 
4315  cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4316  TopSetCC.getOperand(0).getValueType());
4317  return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4318  TopSetCC.getOperand(1),
4319  InvCond);
4320  }
4321  }
4322  }
4323 
4324  // If the LHS is '(and load, const)', the RHS is 0, the test is for
4325  // equality or unsigned, and all 1 bits of the const are in the same
4326  // partial word, see if we can shorten the load.
4327  if (DCI.isBeforeLegalize() &&
4329  N0.getOpcode() == ISD::AND && C1 == 0 &&
4330  N0.getNode()->hasOneUse() &&
4331  isa<LoadSDNode>(N0.getOperand(0)) &&
4332  N0.getOperand(0).getNode()->hasOneUse() &&
4333  isa<ConstantSDNode>(N0.getOperand(1))) {
4334  LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4335  APInt bestMask;
4336  unsigned bestWidth = 0, bestOffset = 0;
4337  if (Lod->isSimple() && Lod->isUnindexed()) {
4338  unsigned origWidth = N0.getValueSizeInBits();
4339  unsigned maskWidth = origWidth;
4340  // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4341  // 8 bits, but have to be careful...
4342  if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4343  origWidth = Lod->getMemoryVT().getSizeInBits();
4344  const APInt &Mask = N0.getConstantOperandAPInt(1);
4345  for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4346  APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4347  for (unsigned offset=0; offset<origWidth/width; offset++) {
4348  if (Mask.isSubsetOf(newMask)) {
4349  if (Layout.isLittleEndian())
4350  bestOffset = (uint64_t)offset * (width/8);
4351  else
4352  bestOffset = (origWidth/width - offset - 1) * (width/8);
4353  bestMask = Mask.lshr(offset * (width/8) * 8);
4354  bestWidth = width;
4355  break;
4356  }
4357  newMask <<= width;
4358  }
4359  }
4360  }
4361  if (bestWidth) {
4362  EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4363  if (newVT.isRound() &&
4364  shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
4365  SDValue Ptr = Lod->getBasePtr();
4366  if (bestOffset != 0)
4367  Ptr =
4368  DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4369  SDValue NewLoad =
4370  DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4371  Lod->getPointerInfo().getWithOffset(bestOffset),
4372  Lod->getOriginalAlign());
4373  return DAG.getSetCC(dl, VT,
4374  DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4375  DAG.getConstant(bestMask.trunc(bestWidth),
4376  dl, newVT)),
4377  DAG.getConstant(0LL, dl, newVT), Cond);
4378  }
4379  }
4380  }
4381 
4382  // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4383  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4384  unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4385 
4386  // If the comparison constant has bits in the upper part, the
4387  // zero-extended value could never match.
4388  if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4389  C1.getBitWidth() - InSize))) {
4390  switch (Cond) {
4391  case ISD::SETUGT:
4392  case ISD::SETUGE:
4393  case ISD::SETEQ:
4394  return DAG.getConstant(0, dl, VT);
4395  case ISD::SETULT:
4396  case ISD::SETULE:
4397  case ISD::SETNE:
4398  return DAG.getConstant(1, dl, VT);
4399  case ISD::SETGT:
4400  case ISD::SETGE:
4401  // True if the sign bit of C1 is set.
4402  return DAG.getConstant(C1.isNegative(), dl, VT);
4403  case ISD::SETLT:
4404  case ISD::SETLE:
4405  // True if the sign bit of C1 isn't set.
4406  return DAG.getConstant(C1.isNonNegative(), dl, VT);
4407  default:
4408  break;
4409  }
4410  }
4411 
4412  // Otherwise, we can perform the comparison with the low bits.
4413  switch (Cond) {
4414  case ISD::SETEQ:
4415  case ISD::SETNE:
4416  case ISD::SETUGT:
4417  case ISD::SETUGE:
4418  case ISD::SETULT:
4419  case ISD::SETULE: {
4420  EVT newVT = N0.getOperand(0).getValueType();
4421  if (DCI.isBeforeLegalizeOps() ||
4422  (isOperationLegal(ISD::SETCC, newVT) &&
4423  isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4424  EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4425  SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4426 
4427  SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4428  NewConst, Cond);
4429  return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4430  }
4431  break;
4432  }
4433  default:
4434  break; // todo, be more careful with signed comparisons
4435  }
4436  } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4437  (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4438  !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4439  OpVT)) {
4440  EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4441  unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4442  EVT ExtDstTy = N0.getValueType();
4443  unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4444 
4445  // If the constant doesn't fit into the number of bits for the source of
4446  // the sign extension, it is impossible for both sides to be equal.
4447  if (C1.getMinSignedBits() > ExtSrcTyBits)
4448  return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4449 
4450  assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4451  ExtDstTy != ExtSrcTy && "Unexpected types!");
4452  APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4453  SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4454  DAG.getConstant(Imm, dl, ExtDstTy));
4455  if (!DCI.isCalledByLegalizer())
4456  DCI.AddToWorklist(ZextOp.getNode());
4457  // Otherwise, make this a use of a zext.
4458  return DAG.getSetCC(dl, VT, ZextOp,
4459  DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4460  } else if ((N1C->isZero() || N1C->isOne()) &&
4461  (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4462  // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
4463  if (N0.getOpcode() == ISD::SETCC &&
4464  isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4465  (N0.getValueType() == MVT::i1 ||
4468  bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4469  if (TrueWhenTrue)
4470  return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4471  // Invert the condition.
4472  ISD::CondCode CC = cast<CondCodeSDNode>(N0.