LLVM  15.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/KnownBits.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
39  : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42  return nullptr;
43 }
44 
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
52  SDValue &Chain) const {
53  const Function &F = DAG.getMachineFunction().getFunction();
54 
55  // First, check if tail calls have been disabled in this function.
56  if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
57  return false;
58 
59  // Conservatively require the attributes of the call to match those of
60  // the return. Ignore following attributes because they don't affect the
61  // call sequence.
62  AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
63  for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
64  Attribute::DereferenceableOrNull, Attribute::NoAlias,
65  Attribute::NonNull, Attribute::NoUndef})
66  CallerAttrs.removeAttribute(Attr);
67 
68  if (CallerAttrs.hasAttributes())
69  return false;
70 
71  // It's not safe to eliminate the sign / zero extension of the return value.
72  if (CallerAttrs.contains(Attribute::ZExt) ||
73  CallerAttrs.contains(Attribute::SExt))
74  return false;
75 
76  // Check if the only use is a function return node.
77  return isUsedByReturnOnly(Node, Chain);
78 }
79 
81  const uint32_t *CallerPreservedMask,
82  const SmallVectorImpl<CCValAssign> &ArgLocs,
83  const SmallVectorImpl<SDValue> &OutVals) const {
84  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
85  const CCValAssign &ArgLoc = ArgLocs[I];
86  if (!ArgLoc.isRegLoc())
87  continue;
88  MCRegister Reg = ArgLoc.getLocReg();
89  // Only look at callee saved registers.
90  if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
91  continue;
92  // Check that we pass the value used for the caller.
93  // (We look for a CopyFromReg reading a virtual register that is used
94  // for the function live-in value of register Reg)
95  SDValue Value = OutVals[I];
96  if (Value->getOpcode() == ISD::AssertZext)
97  Value = Value.getOperand(0);
98  if (Value->getOpcode() != ISD::CopyFromReg)
99  return false;
100  Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
101  if (MRI.getLiveInPhysReg(ArgReg) != Reg)
102  return false;
103  }
104  return true;
105 }
106 
107 /// Set CallLoweringInfo attribute flags based on a call instruction
108 /// and called function attributes.
110  unsigned ArgIdx) {
111  IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
112  IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
113  IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
114  IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
115  IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
116  IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
117  IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
118  IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
119  IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
120  IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
121  IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
122  IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
123  Alignment = Call->getParamStackAlign(ArgIdx);
124  IndirectType = nullptr;
126  "multiple ABI attributes?");
127  if (IsByVal) {
128  IndirectType = Call->getParamByValType(ArgIdx);
129  if (!Alignment)
130  Alignment = Call->getParamAlign(ArgIdx);
131  }
132  if (IsPreallocated)
133  IndirectType = Call->getParamPreallocatedType(ArgIdx);
134  if (IsInAlloca)
135  IndirectType = Call->getParamInAllocaType(ArgIdx);
136  if (IsSRet)
137  IndirectType = Call->getParamStructRetType(ArgIdx);
138 }
139 
140 /// Generate a libcall taking the given operands as arguments and returning a
141 /// result of type RetVT.
142 std::pair<SDValue, SDValue>
144  ArrayRef<SDValue> Ops,
145  MakeLibCallOptions CallOptions,
146  const SDLoc &dl,
147  SDValue InChain) const {
148  if (!InChain)
149  InChain = DAG.getEntryNode();
150 
152  Args.reserve(Ops.size());
153 
155  for (unsigned i = 0; i < Ops.size(); ++i) {
156  SDValue NewOp = Ops[i];
157  Entry.Node = NewOp;
158  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
159  Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
160  CallOptions.IsSExt);
161  Entry.IsZExt = !Entry.IsSExt;
162 
163  if (CallOptions.IsSoften &&
165  Entry.IsSExt = Entry.IsZExt = false;
166  }
167  Args.push_back(Entry);
168  }
169 
170  if (LC == RTLIB::UNKNOWN_LIBCALL)
171  report_fatal_error("Unsupported library call operation!");
173  getPointerTy(DAG.getDataLayout()));
174 
175  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
177  bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
178  bool zeroExtend = !signExtend;
179 
180  if (CallOptions.IsSoften &&
182  signExtend = zeroExtend = false;
183  }
184 
185  CLI.setDebugLoc(dl)
186  .setChain(InChain)
188  .setNoReturn(CallOptions.DoesNotReturn)
189  .setDiscardResult(!CallOptions.IsReturnValueUsed)
191  .setSExtResult(signExtend)
192  .setZExtResult(zeroExtend);
193  return LowerCallTo(CLI);
194 }
195 
197  std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
198  unsigned SrcAS, const AttributeList &FuncAttributes) const {
199  if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
200  Op.getSrcAlign() < Op.getDstAlign())
201  return false;
202 
203  EVT VT = getOptimalMemOpType(Op, FuncAttributes);
204 
205  if (VT == MVT::Other) {
206  // Use the largest integer type whose alignment constraints are satisfied.
207  // We only need to check DstAlign here as SrcAlign is always greater or
208  // equal to DstAlign (or zero).
209  VT = MVT::i64;
210  if (Op.isFixedDstAlign())
211  while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
212  !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
213  VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
214  assert(VT.isInteger());
215 
216  // Find the largest legal integer type.
217  MVT LVT = MVT::i64;
218  while (!isTypeLegal(LVT))
219  LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
220  assert(LVT.isInteger());
221 
222  // If the type we've chosen is larger than the largest legal integer type
223  // then use that instead.
224  if (VT.bitsGT(LVT))
225  VT = LVT;
226  }
227 
228  unsigned NumMemOps = 0;
229  uint64_t Size = Op.size();
230  while (Size) {
231  unsigned VTSize = VT.getSizeInBits() / 8;
232  while (VTSize > Size) {
233  // For now, only use non-vector load / store's for the left-over pieces.
234  EVT NewVT = VT;
235  unsigned NewVTSize;
236 
237  bool Found = false;
238  if (VT.isVector() || VT.isFloatingPoint()) {
239  NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
240  if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
241  isSafeMemOpType(NewVT.getSimpleVT()))
242  Found = true;
243  else if (NewVT == MVT::i64 &&
246  // i64 is usually not legal on 32-bit targets, but f64 may be.
247  NewVT = MVT::f64;
248  Found = true;
249  }
250  }
251 
252  if (!Found) {
253  do {
254  NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
255  if (NewVT == MVT::i8)
256  break;
257  } while (!isSafeMemOpType(NewVT.getSimpleVT()));
258  }
259  NewVTSize = NewVT.getSizeInBits() / 8;
260 
261  // If the new VT cannot cover all of the remaining bits, then consider
262  // issuing a (or a pair of) unaligned and overlapping load / store.
263  bool Fast;
264  if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
266  VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
267  MachineMemOperand::MONone, &Fast) &&
268  Fast)
269  VTSize = Size;
270  else {
271  VT = NewVT;
272  VTSize = NewVTSize;
273  }
274  }
275 
276  if (++NumMemOps > Limit)
277  return false;
278 
279  MemOps.push_back(VT);
280  Size -= VTSize;
281  }
282 
283  return true;
284 }
285 
286 /// Soften the operands of a comparison. This code is shared among BR_CC,
287 /// SELECT_CC, and SETCC handlers.
289  SDValue &NewLHS, SDValue &NewRHS,
290  ISD::CondCode &CCCode,
291  const SDLoc &dl, const SDValue OldLHS,
292  const SDValue OldRHS) const {
293  SDValue Chain;
294  return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
295  OldRHS, Chain);
296 }
297 
299  SDValue &NewLHS, SDValue &NewRHS,
300  ISD::CondCode &CCCode,
301  const SDLoc &dl, const SDValue OldLHS,
302  const SDValue OldRHS,
303  SDValue &Chain,
304  bool IsSignaling) const {
305  // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
306  // not supporting it. We can update this code when libgcc provides such
307  // functions.
308 
309  assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
310  && "Unsupported setcc type!");
311 
312  // Expand into one or more soft-fp libcall(s).
313  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
314  bool ShouldInvertCC = false;
315  switch (CCCode) {
316  case ISD::SETEQ:
317  case ISD::SETOEQ:
318  LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
319  (VT == MVT::f64) ? RTLIB::OEQ_F64 :
320  (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
321  break;
322  case ISD::SETNE:
323  case ISD::SETUNE:
324  LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
325  (VT == MVT::f64) ? RTLIB::UNE_F64 :
326  (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
327  break;
328  case ISD::SETGE:
329  case ISD::SETOGE:
330  LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
331  (VT == MVT::f64) ? RTLIB::OGE_F64 :
332  (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
333  break;
334  case ISD::SETLT:
335  case ISD::SETOLT:
336  LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
337  (VT == MVT::f64) ? RTLIB::OLT_F64 :
338  (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
339  break;
340  case ISD::SETLE:
341  case ISD::SETOLE:
342  LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
343  (VT == MVT::f64) ? RTLIB::OLE_F64 :
344  (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
345  break;
346  case ISD::SETGT:
347  case ISD::SETOGT:
348  LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
349  (VT == MVT::f64) ? RTLIB::OGT_F64 :
350  (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
351  break;
352  case ISD::SETO:
353  ShouldInvertCC = true;
355  case ISD::SETUO:
356  LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
357  (VT == MVT::f64) ? RTLIB::UO_F64 :
358  (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
359  break;
360  case ISD::SETONE:
361  // SETONE = O && UNE
362  ShouldInvertCC = true;
364  case ISD::SETUEQ:
365  LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
366  (VT == MVT::f64) ? RTLIB::UO_F64 :
367  (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
368  LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
369  (VT == MVT::f64) ? RTLIB::OEQ_F64 :
370  (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
371  break;
372  default:
373  // Invert CC for unordered comparisons
374  ShouldInvertCC = true;
375  switch (CCCode) {
376  case ISD::SETULT:
377  LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
378  (VT == MVT::f64) ? RTLIB::OGE_F64 :
379  (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
380  break;
381  case ISD::SETULE:
382  LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
383  (VT == MVT::f64) ? RTLIB::OGT_F64 :
384  (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
385  break;
386  case ISD::SETUGT:
387  LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
388  (VT == MVT::f64) ? RTLIB::OLE_F64 :
389  (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
390  break;
391  case ISD::SETUGE:
392  LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
393  (VT == MVT::f64) ? RTLIB::OLT_F64 :
394  (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
395  break;
396  default: llvm_unreachable("Do not know how to soften this setcc!");
397  }
398  }
399 
400  // Use the target specific return value for comparions lib calls.
401  EVT RetVT = getCmpLibcallReturnType();
402  SDValue Ops[2] = {NewLHS, NewRHS};
404  EVT OpsVT[2] = { OldLHS.getValueType(),
405  OldRHS.getValueType() };
406  CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
407  auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
408  NewLHS = Call.first;
409  NewRHS = DAG.getConstant(0, dl, RetVT);
410 
411  CCCode = getCmpLibcallCC(LC1);
412  if (ShouldInvertCC) {
413  assert(RetVT.isInteger());
414  CCCode = getSetCCInverse(CCCode, RetVT);
415  }
416 
417  if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
418  // Update Chain.
419  Chain = Call.second;
420  } else {
421  EVT SetCCVT =
422  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
423  SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
424  auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
425  CCCode = getCmpLibcallCC(LC2);
426  if (ShouldInvertCC)
427  CCCode = getSetCCInverse(CCCode, RetVT);
428  NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
429  if (Chain)
430  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
431  Call2.second);
432  NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
433  Tmp.getValueType(), Tmp, NewLHS);
434  NewRHS = SDValue();
435  }
436 }
437 
438 /// Return the entry encoding for a jump table in the current function. The
439 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
441  // In non-pic modes, just use the address of a block.
442  if (!isPositionIndependent())
444 
445  // In PIC mode, if the target supports a GPRel32 directive, use it.
446  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
448 
449  // Otherwise, use a label difference.
451 }
452 
454  SelectionDAG &DAG) const {
455  // If our PIC model is GP relative, use the global offset table as the base.
456  unsigned JTEncoding = getJumpTableEncoding();
457 
458  if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
461 
462  return Table;
463 }
464 
465 /// This returns the relocation base for the given PIC jumptable, the same as
466 /// getPICJumpTableRelocBase, but as an MCExpr.
467 const MCExpr *
469  unsigned JTI,MCContext &Ctx) const{
470  // The normal PIC reloc base is the label at the start of the jump table.
471  return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
472 }
473 
474 bool
476  const TargetMachine &TM = getTargetMachine();
477  const GlobalValue *GV = GA->getGlobal();
478 
479  // If the address is not even local to this DSO we will have to load it from
480  // a got and then add the offset.
481  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
482  return false;
483 
484  // If the code is position independent we will have to add a base register.
485  if (isPositionIndependent())
486  return false;
487 
488  // Otherwise we can do it.
489  return true;
490 }
491 
492 //===----------------------------------------------------------------------===//
493 // Optimization Methods
494 //===----------------------------------------------------------------------===//
495 
496 /// If the specified instruction has a constant integer operand and there are
497 /// bits set in that constant that are not demanded, then clear those bits and
498 /// return true.
500  const APInt &DemandedBits,
501  const APInt &DemandedElts,
502  TargetLoweringOpt &TLO) const {
503  SDLoc DL(Op);
504  unsigned Opcode = Op.getOpcode();
505 
506  // Do target-specific constant optimization.
507  if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
508  return TLO.New.getNode();
509 
510  // FIXME: ISD::SELECT, ISD::SELECT_CC
511  switch (Opcode) {
512  default:
513  break;
514  case ISD::XOR:
515  case ISD::AND:
516  case ISD::OR: {
517  auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
518  if (!Op1C || Op1C->isOpaque())
519  return false;
520 
521  // If this is a 'not' op, don't touch it because that's a canonical form.
522  const APInt &C = Op1C->getAPIntValue();
523  if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
524  return false;
525 
526  if (!C.isSubsetOf(DemandedBits)) {
527  EVT VT = Op.getValueType();
528  SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
529  SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
530  return TLO.CombineTo(Op, NewOp);
531  }
532 
533  break;
534  }
535  }
536 
537  return false;
538 }
539 
541  const APInt &DemandedBits,
542  TargetLoweringOpt &TLO) const {
543  EVT VT = Op.getValueType();
544  APInt DemandedElts = VT.isVector()
546  : APInt(1, 1);
547  return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
548 }
549 
550 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
551 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
552 /// generalized for targets with other types of implicit widening casts.
554  const APInt &Demanded,
555  TargetLoweringOpt &TLO) const {
556  assert(Op.getNumOperands() == 2 &&
557  "ShrinkDemandedOp only supports binary operators!");
558  assert(Op.getNode()->getNumValues() == 1 &&
559  "ShrinkDemandedOp only supports nodes with one result!");
560 
561  SelectionDAG &DAG = TLO.DAG;
562  SDLoc dl(Op);
563 
564  // Early return, as this function cannot handle vector types.
565  if (Op.getValueType().isVector())
566  return false;
567 
568  // Don't do this if the node has another user, which may require the
569  // full value.
570  if (!Op.getNode()->hasOneUse())
571  return false;
572 
573  // Search for the smallest integer type with free casts to and from
574  // Op's type. For expedience, just check power-of-2 integer types.
575  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
576  unsigned DemandedSize = Demanded.getActiveBits();
577  unsigned SmallVTBits = DemandedSize;
578  if (!isPowerOf2_32(SmallVTBits))
579  SmallVTBits = NextPowerOf2(SmallVTBits);
580  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
581  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
582  if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
583  TLI.isZExtFree(SmallVT, Op.getValueType())) {
584  // We found a type with free casts.
585  SDValue X = DAG.getNode(
586  Op.getOpcode(), dl, SmallVT,
587  DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
588  DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
589  assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
590  SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
591  return TLO.CombineTo(Op, Z);
592  }
593  }
594  return false;
595 }
596 
598  DAGCombinerInfo &DCI) const {
599  SelectionDAG &DAG = DCI.DAG;
600  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
601  !DCI.isBeforeLegalizeOps());
602  KnownBits Known;
603 
604  bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
605  if (Simplified) {
606  DCI.AddToWorklist(Op.getNode());
607  DCI.CommitTargetLoweringOpt(TLO);
608  }
609  return Simplified;
610 }
611 
613  const APInt &DemandedElts,
614  DAGCombinerInfo &DCI) const {
615  SelectionDAG &DAG = DCI.DAG;
616  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
617  !DCI.isBeforeLegalizeOps());
618  KnownBits Known;
619 
620  bool Simplified =
621  SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
622  if (Simplified) {
623  DCI.AddToWorklist(Op.getNode());
624  DCI.CommitTargetLoweringOpt(TLO);
625  }
626  return Simplified;
627 }
628 
630  KnownBits &Known,
631  TargetLoweringOpt &TLO,
632  unsigned Depth,
633  bool AssumeSingleUse) const {
634  EVT VT = Op.getValueType();
635 
636  // TODO: We can probably do more work on calculating the known bits and
637  // simplifying the operations for scalable vectors, but for now we just
638  // bail out.
639  if (VT.isScalableVector()) {
640  // Pretend we don't know anything for now.
641  Known = KnownBits(DemandedBits.getBitWidth());
642  return false;
643  }
644 
645  APInt DemandedElts = VT.isVector()
647  : APInt(1, 1);
648  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
649  AssumeSingleUse);
650 }
651 
652 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
653 // TODO: Under what circumstances can we create nodes? Constant folding?
655  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
656  SelectionDAG &DAG, unsigned Depth) const {
657  // Limit search depth.
659  return SDValue();
660 
661  // Ignore UNDEFs.
662  if (Op.isUndef())
663  return SDValue();
664 
665  // Not demanding any bits/elts from Op.
666  if (DemandedBits == 0 || DemandedElts == 0)
667  return DAG.getUNDEF(Op.getValueType());
668 
669  bool IsLE = DAG.getDataLayout().isLittleEndian();
670  unsigned NumElts = DemandedElts.getBitWidth();
671  unsigned BitWidth = DemandedBits.getBitWidth();
672  KnownBits LHSKnown, RHSKnown;
673  switch (Op.getOpcode()) {
674  case ISD::BITCAST: {
675  SDValue Src = peekThroughBitcasts(Op.getOperand(0));
676  EVT SrcVT = Src.getValueType();
677  EVT DstVT = Op.getValueType();
678  if (SrcVT == DstVT)
679  return Src;
680 
681  unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
682  unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
683  if (NumSrcEltBits == NumDstEltBits)
684  if (SDValue V = SimplifyMultipleUseDemandedBits(
685  Src, DemandedBits, DemandedElts, DAG, Depth + 1))
686  return DAG.getBitcast(DstVT, V);
687 
688  if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
689  unsigned Scale = NumDstEltBits / NumSrcEltBits;
690  unsigned NumSrcElts = SrcVT.getVectorNumElements();
691  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
692  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
693  for (unsigned i = 0; i != Scale; ++i) {
694  unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
695  unsigned BitOffset = EltOffset * NumSrcEltBits;
696  APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
697  if (!Sub.isZero()) {
698  DemandedSrcBits |= Sub;
699  for (unsigned j = 0; j != NumElts; ++j)
700  if (DemandedElts[j])
701  DemandedSrcElts.setBit((j * Scale) + i);
702  }
703  }
704 
705  if (SDValue V = SimplifyMultipleUseDemandedBits(
706  Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
707  return DAG.getBitcast(DstVT, V);
708  }
709 
710  // TODO - bigendian once we have test coverage.
711  if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
712  unsigned Scale = NumSrcEltBits / NumDstEltBits;
713  unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
714  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
715  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
716  for (unsigned i = 0; i != NumElts; ++i)
717  if (DemandedElts[i]) {
718  unsigned Offset = (i % Scale) * NumDstEltBits;
719  DemandedSrcBits.insertBits(DemandedBits, Offset);
720  DemandedSrcElts.setBit(i / Scale);
721  }
722 
723  if (SDValue V = SimplifyMultipleUseDemandedBits(
724  Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
725  return DAG.getBitcast(DstVT, V);
726  }
727 
728  break;
729  }
730  case ISD::AND: {
731  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
732  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
733 
734  // If all of the demanded bits are known 1 on one side, return the other.
735  // These bits cannot contribute to the result of the 'and' in this
736  // context.
737  if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
738  return Op.getOperand(0);
739  if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
740  return Op.getOperand(1);
741  break;
742  }
743  case ISD::OR: {
744  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
745  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
746 
747  // If all of the demanded bits are known zero on one side, return the
748  // other. These bits cannot contribute to the result of the 'or' in this
749  // context.
750  if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
751  return Op.getOperand(0);
752  if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
753  return Op.getOperand(1);
754  break;
755  }
756  case ISD::XOR: {
757  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
758  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
759 
760  // If all of the demanded bits are known zero on one side, return the
761  // other.
762  if (DemandedBits.isSubsetOf(RHSKnown.Zero))
763  return Op.getOperand(0);
764  if (DemandedBits.isSubsetOf(LHSKnown.Zero))
765  return Op.getOperand(1);
766  break;
767  }
768  case ISD::SHL: {
769  // If we are only demanding sign bits then we can use the shift source
770  // directly.
771  if (const APInt *MaxSA =
772  DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
773  SDValue Op0 = Op.getOperand(0);
774  unsigned ShAmt = MaxSA->getZExtValue();
775  unsigned NumSignBits =
776  DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
777  unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
778  if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
779  return Op0;
780  }
781  break;
782  }
783  case ISD::SETCC: {
784  SDValue Op0 = Op.getOperand(0);
785  SDValue Op1 = Op.getOperand(1);
786  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
787  // If (1) we only need the sign-bit, (2) the setcc operands are the same
788  // width as the setcc result, and (3) the result of a setcc conforms to 0 or
789  // -1, we may be able to bypass the setcc.
790  if (DemandedBits.isSignMask() &&
793  BooleanContent::ZeroOrNegativeOneBooleanContent) {
794  // If we're testing X < 0, then this compare isn't needed - just use X!
795  // FIXME: We're limiting to integer types here, but this should also work
796  // if we don't care about FP signed-zero. The use of SETLT with FP means
797  // that we don't care about NaNs.
798  if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
800  return Op0;
801  }
802  break;
803  }
804  case ISD::SIGN_EXTEND_INREG: {
805  // If none of the extended bits are demanded, eliminate the sextinreg.
806  SDValue Op0 = Op.getOperand(0);
807  EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
808  unsigned ExBits = ExVT.getScalarSizeInBits();
809  if (DemandedBits.getActiveBits() <= ExBits)
810  return Op0;
811  // If the input is already sign extended, just drop the extension.
812  unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
813  if (NumSignBits >= (BitWidth - ExBits + 1))
814  return Op0;
815  break;
816  }
820  // If we only want the lowest element and none of extended bits, then we can
821  // return the bitcasted source vector.
822  SDValue Src = Op.getOperand(0);
823  EVT SrcVT = Src.getValueType();
824  EVT DstVT = Op.getValueType();
825  if (IsLE && DemandedElts == 1 &&
826  DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
827  DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
828  return DAG.getBitcast(DstVT, Src);
829  }
830  break;
831  }
832  case ISD::INSERT_VECTOR_ELT: {
833  // If we don't demand the inserted element, return the base vector.
834  SDValue Vec = Op.getOperand(0);
835  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
836  EVT VecVT = Vec.getValueType();
837  if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
838  !DemandedElts[CIdx->getZExtValue()])
839  return Vec;
840  break;
841  }
842  case ISD::INSERT_SUBVECTOR: {
843  SDValue Vec = Op.getOperand(0);
844  SDValue Sub = Op.getOperand(1);
845  uint64_t Idx = Op.getConstantOperandVal(2);
846  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
847  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
848  // If we don't demand the inserted subvector, return the base vector.
849  if (DemandedSubElts == 0)
850  return Vec;
851  // If this simply widens the lowest subvector, see if we can do it earlier.
852  if (Idx == 0 && Vec.isUndef()) {
853  if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
854  Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
855  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
856  Op.getOperand(0), NewSub, Op.getOperand(2));
857  }
858  break;
859  }
860  case ISD::VECTOR_SHUFFLE: {
861  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
862 
863  // If all the demanded elts are from one operand and are inline,
864  // then we can use the operand directly.
865  bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
866  for (unsigned i = 0; i != NumElts; ++i) {
867  int M = ShuffleMask[i];
868  if (M < 0 || !DemandedElts[i])
869  continue;
870  AllUndef = false;
871  IdentityLHS &= (M == (int)i);
872  IdentityRHS &= ((M - NumElts) == i);
873  }
874 
875  if (AllUndef)
876  return DAG.getUNDEF(Op.getValueType());
877  if (IdentityLHS)
878  return Op.getOperand(0);
879  if (IdentityRHS)
880  return Op.getOperand(1);
881  break;
882  }
883  default:
884  if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
885  if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
886  Op, DemandedBits, DemandedElts, DAG, Depth))
887  return V;
888  break;
889  }
890  return SDValue();
891 }
892 
894  SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
895  unsigned Depth) const {
896  EVT VT = Op.getValueType();
897  APInt DemandedElts = VT.isVector()
899  : APInt(1, 1);
900  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
901  Depth);
902 }
903 
905  SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
906  unsigned Depth) const {
907  APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
908  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
909  Depth);
910 }
911 
912 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
913 // or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
915  const TargetLowering &TLI,
916  const APInt &DemandedBits,
917  const APInt &DemandedElts,
918  unsigned Depth) {
919  assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
920  "SRL or SRA node is required here!");
921  // Is the right shift using an immediate value of 1?
922  ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
923  if (!N1C || !N1C->isOne())
924  return SDValue();
925 
926  // We are looking for an avgfloor
927  // add(ext, ext)
928  // or one of these as a avgceil
929  // add(add(ext, ext), 1)
930  // add(add(ext, 1), ext)
931  // add(ext, add(ext, 1))
932  SDValue Add = Op.getOperand(0);
933  if (Add.getOpcode() != ISD::ADD)
934  return SDValue();
935 
936  SDValue ExtOpA = Add.getOperand(0);
937  SDValue ExtOpB = Add.getOperand(1);
938  auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
939  ConstantSDNode *ConstOp;
940  if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
941  ConstOp->isOne()) {
942  ExtOpA = Op2;
943  ExtOpB = Op3;
944  return true;
945  }
946  if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
947  ConstOp->isOne()) {
948  ExtOpA = Op1;
949  ExtOpB = Op3;
950  return true;
951  }
952  if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
953  ConstOp->isOne()) {
954  ExtOpA = Op1;
955  ExtOpB = Op2;
956  return true;
957  }
958  return false;
959  };
960  bool IsCeil =
961  (ExtOpA.getOpcode() == ISD::ADD &&
962  MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
963  (ExtOpB.getOpcode() == ISD::ADD &&
964  MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
965 
966  // If the shift is signed (sra):
967  // - Needs >= 2 sign bit for both operands.
968  // - Needs >= 2 zero bits.
969  // If the shift is unsigned (srl):
970  // - Needs >= 1 zero bit for both operands.
971  // - Needs 1 demanded bit zero and >= 2 sign bits.
972  unsigned ShiftOpc = Op.getOpcode();
973  bool IsSigned = false;
974  unsigned KnownBits;
975  unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
976  unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
977  unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
978  unsigned NumZeroA =
979  DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
980  unsigned NumZeroB =
981  DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
982  unsigned NumZero = std::min(NumZeroA, NumZeroB);
983 
984  switch (ShiftOpc) {
985  default:
986  llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
987  case ISD::SRA: {
988  if (NumZero >= 2 && NumSigned < NumZero) {
989  IsSigned = false;
990  KnownBits = NumZero;
991  break;
992  }
993  if (NumSigned >= 1) {
994  IsSigned = true;
995  KnownBits = NumSigned;
996  break;
997  }
998  return SDValue();
999  }
1000  case ISD::SRL: {
1001  if (NumZero >= 1 && NumSigned < NumZero) {
1002  IsSigned = false;
1003  KnownBits = NumZero;
1004  break;
1005  }
1006  if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1007  IsSigned = true;
1008  KnownBits = NumSigned;
1009  break;
1010  }
1011  return SDValue();
1012  }
1013  }
1014 
1015  unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1016  : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1017 
1018  // Find the smallest power-2 type that is legal for this vector size and
1019  // operation, given the original type size and the number of known sign/zero
1020  // bits.
1021  EVT VT = Op.getValueType();
1022  unsigned MinWidth =
1023  std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1024  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
1025  if (VT.isVector())
1026  NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1027  if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
1028  return SDValue();
1029 
1030  SDLoc DL(Op);
1031  SDValue ResultAVG =
1032  DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1033  DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1034  return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1035  ResultAVG);
1036 }
1037 
1038 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1039 /// result of Op are ever used downstream. If we can use this information to
1040 /// simplify Op, create a new simplified DAG node and return true, returning the
1041 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1042 /// return a mask of Known bits for the expression (used to simplify the
1043 /// caller). The Known bits may only be accurate for those bits in the
1044 /// OriginalDemandedBits and OriginalDemandedElts.
1046  SDValue Op, const APInt &OriginalDemandedBits,
1047  const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1048  unsigned Depth, bool AssumeSingleUse) const {
1049  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1050  assert(Op.getScalarValueSizeInBits() == BitWidth &&
1051  "Mask size mismatches value type size!");
1052 
1053  // Don't know anything.
1054  Known = KnownBits(BitWidth);
1055 
1056  // TODO: We can probably do more work on calculating the known bits and
1057  // simplifying the operations for scalable vectors, but for now we just
1058  // bail out.
1059  if (Op.getValueType().isScalableVector())
1060  return false;
1061 
1062  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1063  unsigned NumElts = OriginalDemandedElts.getBitWidth();
1064  assert((!Op.getValueType().isVector() ||
1065  NumElts == Op.getValueType().getVectorNumElements()) &&
1066  "Unexpected vector size");
1067 
1068  APInt DemandedBits = OriginalDemandedBits;
1069  APInt DemandedElts = OriginalDemandedElts;
1070  SDLoc dl(Op);
1071  auto &DL = TLO.DAG.getDataLayout();
1072 
1073  // Undef operand.
1074  if (Op.isUndef())
1075  return false;
1076 
1077  if (Op.getOpcode() == ISD::Constant) {
1078  // We know all of the bits for a constant!
1079  Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1080  return false;
1081  }
1082 
1083  if (Op.getOpcode() == ISD::ConstantFP) {
1084  // We know all of the bits for a floating point constant!
1085  Known = KnownBits::makeConstant(
1086  cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1087  return false;
1088  }
1089 
1090  // Other users may use these bits.
1091  EVT VT = Op.getValueType();
1092  if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
1093  if (Depth != 0) {
1094  // If not at the root, Just compute the Known bits to
1095  // simplify things downstream.
1096  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1097  return false;
1098  }
1099  // If this is the root being simplified, allow it to have multiple uses,
1100  // just set the DemandedBits/Elts to all bits.
1102  DemandedElts = APInt::getAllOnes(NumElts);
1103  } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1104  // Not demanding any bits/elts from Op.
1105  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1106  } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1107  // Limit search depth.
1108  return false;
1109  }
1110 
1111  KnownBits Known2;
1112  switch (Op.getOpcode()) {
1113  case ISD::TargetConstant:
1114  llvm_unreachable("Can't simplify this node");
1115  case ISD::SCALAR_TO_VECTOR: {
1116  if (!DemandedElts[0])
1117  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1118 
1119  KnownBits SrcKnown;
1120  SDValue Src = Op.getOperand(0);
1121  unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1122  APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1123  if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1124  return true;
1125 
1126  // Upper elements are undef, so only get the knownbits if we just demand
1127  // the bottom element.
1128  if (DemandedElts == 1)
1129  Known = SrcKnown.anyextOrTrunc(BitWidth);
1130  break;
1131  }
1132  case ISD::BUILD_VECTOR:
1133  // Collect the known bits that are shared by every demanded element.
1134  // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1135  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1136  return false; // Don't fall through, will infinitely loop.
1137  case ISD::LOAD: {
1138  auto *LD = cast<LoadSDNode>(Op);
1139  if (getTargetConstantFromLoad(LD)) {
1140  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1141  return false; // Don't fall through, will infinitely loop.
1142  }
1143  if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1144  // If this is a ZEXTLoad and we are looking at the loaded value.
1145  EVT MemVT = LD->getMemoryVT();
1146  unsigned MemBits = MemVT.getScalarSizeInBits();
1147  Known.Zero.setBitsFrom(MemBits);
1148  return false; // Don't fall through, will infinitely loop.
1149  }
1150  break;
1151  }
1152  case ISD::INSERT_VECTOR_ELT: {
1153  SDValue Vec = Op.getOperand(0);
1154  SDValue Scl = Op.getOperand(1);
1155  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1156  EVT VecVT = Vec.getValueType();
1157 
1158  // If index isn't constant, assume we need all vector elements AND the
1159  // inserted element.
1160  APInt DemandedVecElts(DemandedElts);
1161  if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1162  unsigned Idx = CIdx->getZExtValue();
1163  DemandedVecElts.clearBit(Idx);
1164 
1165  // Inserted element is not required.
1166  if (!DemandedElts[Idx])
1167  return TLO.CombineTo(Op, Vec);
1168  }
1169 
1170  KnownBits KnownScl;
1171  unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1172  APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1173  if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1174  return true;
1175 
1176  Known = KnownScl.anyextOrTrunc(BitWidth);
1177 
1178  KnownBits KnownVec;
1179  if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1180  Depth + 1))
1181  return true;
1182 
1183  if (!!DemandedVecElts)
1184  Known = KnownBits::commonBits(Known, KnownVec);
1185 
1186  return false;
1187  }
1188  case ISD::INSERT_SUBVECTOR: {
1189  // Demand any elements from the subvector and the remainder from the src its
1190  // inserted into.
1191  SDValue Src = Op.getOperand(0);
1192  SDValue Sub = Op.getOperand(1);
1193  uint64_t Idx = Op.getConstantOperandVal(2);
1194  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1195  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1196  APInt DemandedSrcElts = DemandedElts;
1197  DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1198 
1199  KnownBits KnownSub, KnownSrc;
1200  if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1201  Depth + 1))
1202  return true;
1203  if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1204  Depth + 1))
1205  return true;
1206 
1207  Known.Zero.setAllBits();
1208  Known.One.setAllBits();
1209  if (!!DemandedSubElts)
1210  Known = KnownBits::commonBits(Known, KnownSub);
1211  if (!!DemandedSrcElts)
1212  Known = KnownBits::commonBits(Known, KnownSrc);
1213 
1214  // Attempt to avoid multi-use src if we don't need anything from it.
1215  if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1216  !DemandedSrcElts.isAllOnes()) {
1217  SDValue NewSub = SimplifyMultipleUseDemandedBits(
1218  Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1219  SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1220  Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1221  if (NewSub || NewSrc) {
1222  NewSub = NewSub ? NewSub : Sub;
1223  NewSrc = NewSrc ? NewSrc : Src;
1224  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1225  Op.getOperand(2));
1226  return TLO.CombineTo(Op, NewOp);
1227  }
1228  }
1229  break;
1230  }
1231  case ISD::EXTRACT_SUBVECTOR: {
1232  // Offset the demanded elts by the subvector index.
1233  SDValue Src = Op.getOperand(0);
1234  if (Src.getValueType().isScalableVector())
1235  break;
1236  uint64_t Idx = Op.getConstantOperandVal(1);
1237  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1238  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1239 
1240  if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1241  Depth + 1))
1242  return true;
1243 
1244  // Attempt to avoid multi-use src if we don't need anything from it.
1245  if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1246  SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1247  Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1248  if (DemandedSrc) {
1249  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1250  Op.getOperand(1));
1251  return TLO.CombineTo(Op, NewOp);
1252  }
1253  }
1254  break;
1255  }
1256  case ISD::CONCAT_VECTORS: {
1257  Known.Zero.setAllBits();
1258  Known.One.setAllBits();
1259  EVT SubVT = Op.getOperand(0).getValueType();
1260  unsigned NumSubVecs = Op.getNumOperands();
1261  unsigned NumSubElts = SubVT.getVectorNumElements();
1262  for (unsigned i = 0; i != NumSubVecs; ++i) {
1263  APInt DemandedSubElts =
1264  DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1265  if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1266  Known2, TLO, Depth + 1))
1267  return true;
1268  // Known bits are shared by every demanded subvector element.
1269  if (!!DemandedSubElts)
1270  Known = KnownBits::commonBits(Known, Known2);
1271  }
1272  break;
1273  }
1274  case ISD::VECTOR_SHUFFLE: {
1275  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1276 
1277  // Collect demanded elements from shuffle operands..
1278  APInt DemandedLHS(NumElts, 0);
1279  APInt DemandedRHS(NumElts, 0);
1280  for (unsigned i = 0; i != NumElts; ++i) {
1281  if (!DemandedElts[i])
1282  continue;
1283  int M = ShuffleMask[i];
1284  if (M < 0) {
1285  // For UNDEF elements, we don't know anything about the common state of
1286  // the shuffle result.
1287  DemandedLHS.clearAllBits();
1288  DemandedRHS.clearAllBits();
1289  break;
1290  }
1291  assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1292  if (M < (int)NumElts)
1293  DemandedLHS.setBit(M);
1294  else
1295  DemandedRHS.setBit(M - NumElts);
1296  }
1297 
1298  if (!!DemandedLHS || !!DemandedRHS) {
1299  SDValue Op0 = Op.getOperand(0);
1300  SDValue Op1 = Op.getOperand(1);
1301 
1302  Known.Zero.setAllBits();
1303  Known.One.setAllBits();
1304  if (!!DemandedLHS) {
1305  if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1306  Depth + 1))
1307  return true;
1308  Known = KnownBits::commonBits(Known, Known2);
1309  }
1310  if (!!DemandedRHS) {
1311  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1312  Depth + 1))
1313  return true;
1314  Known = KnownBits::commonBits(Known, Known2);
1315  }
1316 
1317  // Attempt to avoid multi-use ops if we don't need anything from them.
1318  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1319  Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1320  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1321  Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1322  if (DemandedOp0 || DemandedOp1) {
1323  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1324  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1325  SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1326  return TLO.CombineTo(Op, NewOp);
1327  }
1328  }
1329  break;
1330  }
1331  case ISD::AND: {
1332  SDValue Op0 = Op.getOperand(0);
1333  SDValue Op1 = Op.getOperand(1);
1334 
1335  // If the RHS is a constant, check to see if the LHS would be zero without
1336  // using the bits from the RHS. Below, we use knowledge about the RHS to
1337  // simplify the LHS, here we're using information from the LHS to simplify
1338  // the RHS.
1339  if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1340  // Do not increment Depth here; that can cause an infinite loop.
1341  KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1342  // If the LHS already has zeros where RHSC does, this 'and' is dead.
1343  if ((LHSKnown.Zero & DemandedBits) ==
1344  (~RHSC->getAPIntValue() & DemandedBits))
1345  return TLO.CombineTo(Op, Op0);
1346 
1347  // If any of the set bits in the RHS are known zero on the LHS, shrink
1348  // the constant.
1349  if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1350  DemandedElts, TLO))
1351  return true;
1352 
1353  // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1354  // constant, but if this 'and' is only clearing bits that were just set by
1355  // the xor, then this 'and' can be eliminated by shrinking the mask of
1356  // the xor. For example, for a 32-bit X:
1357  // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1358  if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1359  LHSKnown.One == ~RHSC->getAPIntValue()) {
1360  SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1361  return TLO.CombineTo(Op, Xor);
1362  }
1363  }
1364 
1365  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1366  Depth + 1))
1367  return true;
1368  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1369  if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1370  Known2, TLO, Depth + 1))
1371  return true;
1372  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1373 
1374  // Attempt to avoid multi-use ops if we don't need anything from them.
1375  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1376  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1377  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1378  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1379  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1380  if (DemandedOp0 || DemandedOp1) {
1381  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1382  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1383  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1384  return TLO.CombineTo(Op, NewOp);
1385  }
1386  }
1387 
1388  // If all of the demanded bits are known one on one side, return the other.
1389  // These bits cannot contribute to the result of the 'and'.
1390  if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1391  return TLO.CombineTo(Op, Op0);
1392  if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1393  return TLO.CombineTo(Op, Op1);
1394  // If all of the demanded bits in the inputs are known zeros, return zero.
1395  if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1396  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1397  // If the RHS is a constant, see if we can simplify it.
1398  if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1399  TLO))
1400  return true;
1401  // If the operation can be done in a smaller type, do so.
1402  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1403  return true;
1404 
1405  Known &= Known2;
1406  break;
1407  }
1408  case ISD::OR: {
1409  SDValue Op0 = Op.getOperand(0);
1410  SDValue Op1 = Op.getOperand(1);
1411 
1412  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1413  Depth + 1))
1414  return true;
1415  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1416  if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1417  Known2, TLO, Depth + 1))
1418  return true;
1419  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1420 
1421  // Attempt to avoid multi-use ops if we don't need anything from them.
1422  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1423  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1424  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1425  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1426  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1427  if (DemandedOp0 || DemandedOp1) {
1428  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1429  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1430  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1431  return TLO.CombineTo(Op, NewOp);
1432  }
1433  }
1434 
1435  // If all of the demanded bits are known zero on one side, return the other.
1436  // These bits cannot contribute to the result of the 'or'.
1437  if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1438  return TLO.CombineTo(Op, Op0);
1439  if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1440  return TLO.CombineTo(Op, Op1);
1441  // If the RHS is a constant, see if we can simplify it.
1442  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1443  return true;
1444  // If the operation can be done in a smaller type, do so.
1445  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1446  return true;
1447 
1448  Known |= Known2;
1449  break;
1450  }
1451  case ISD::XOR: {
1452  SDValue Op0 = Op.getOperand(0);
1453  SDValue Op1 = Op.getOperand(1);
1454 
1455  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1456  Depth + 1))
1457  return true;
1458  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1459  if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1460  Depth + 1))
1461  return true;
1462  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1463 
1464  // Attempt to avoid multi-use ops if we don't need anything from them.
1465  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1466  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1467  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1468  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1469  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1470  if (DemandedOp0 || DemandedOp1) {
1471  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1472  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1473  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1474  return TLO.CombineTo(Op, NewOp);
1475  }
1476  }
1477 
1478  // If all of the demanded bits are known zero on one side, return the other.
1479  // These bits cannot contribute to the result of the 'xor'.
1480  if (DemandedBits.isSubsetOf(Known.Zero))
1481  return TLO.CombineTo(Op, Op0);
1482  if (DemandedBits.isSubsetOf(Known2.Zero))
1483  return TLO.CombineTo(Op, Op1);
1484  // If the operation can be done in a smaller type, do so.
1485  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1486  return true;
1487 
1488  // If all of the unknown bits are known to be zero on one side or the other
1489  // turn this into an *inclusive* or.
1490  // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1491  if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1492  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1493 
1494  ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
1495  if (C) {
1496  // If one side is a constant, and all of the set bits in the constant are
1497  // also known set on the other side, turn this into an AND, as we know
1498  // the bits will be cleared.
1499  // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1500  // NB: it is okay if more bits are known than are requested
1501  if (C->getAPIntValue() == Known2.One) {
1502  SDValue ANDC =
1503  TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1504  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1505  }
1506 
1507  // If the RHS is a constant, see if we can change it. Don't alter a -1
1508  // constant because that's a 'not' op, and that is better for combining
1509  // and codegen.
1510  if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1511  // We're flipping all demanded bits. Flip the undemanded bits too.
1512  SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1513  return TLO.CombineTo(Op, New);
1514  }
1515  }
1516 
1517  // If we can't turn this into a 'not', try to shrink the constant.
1518  if (!C || !C->isAllOnes())
1519  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1520  return true;
1521 
1522  Known ^= Known2;
1523  break;
1524  }
1525  case ISD::SELECT:
1526  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1527  Depth + 1))
1528  return true;
1529  if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1530  Depth + 1))
1531  return true;
1532  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1533  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1534 
1535  // If the operands are constants, see if we can simplify them.
1536  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1537  return true;
1538 
1539  // Only known if known in both the LHS and RHS.
1540  Known = KnownBits::commonBits(Known, Known2);
1541  break;
1542  case ISD::VSELECT:
1543  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1544  Known, TLO, Depth + 1))
1545  return true;
1546  if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1547  Known2, TLO, Depth + 1))
1548  return true;
1549  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1550  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1551 
1552  // Only known if known in both the LHS and RHS.
1553  Known = KnownBits::commonBits(Known, Known2);
1554  break;
1555  case ISD::SELECT_CC:
1556  if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1557  Depth + 1))
1558  return true;
1559  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1560  Depth + 1))
1561  return true;
1562  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1563  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1564 
1565  // If the operands are constants, see if we can simplify them.
1566  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1567  return true;
1568 
1569  // Only known if known in both the LHS and RHS.
1570  Known = KnownBits::commonBits(Known, Known2);
1571  break;
1572  case ISD::SETCC: {
1573  SDValue Op0 = Op.getOperand(0);
1574  SDValue Op1 = Op.getOperand(1);
1575  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1576  // If (1) we only need the sign-bit, (2) the setcc operands are the same
1577  // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1578  // -1, we may be able to bypass the setcc.
1579  if (DemandedBits.isSignMask() &&
1582  BooleanContent::ZeroOrNegativeOneBooleanContent) {
1583  // If we're testing X < 0, then this compare isn't needed - just use X!
1584  // FIXME: We're limiting to integer types here, but this should also work
1585  // if we don't care about FP signed-zero. The use of SETLT with FP means
1586  // that we don't care about NaNs.
1587  if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1589  return TLO.CombineTo(Op, Op0);
1590 
1591  // TODO: Should we check for other forms of sign-bit comparisons?
1592  // Examples: X <= -1, X >= 0
1593  }
1594  if (getBooleanContents(Op0.getValueType()) ==
1596  BitWidth > 1)
1597  Known.Zero.setBitsFrom(1);
1598  break;
1599  }
1600  case ISD::SHL: {
1601  SDValue Op0 = Op.getOperand(0);
1602  SDValue Op1 = Op.getOperand(1);
1603  EVT ShiftVT = Op1.getValueType();
1604 
1605  if (const APInt *SA =
1606  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1607  unsigned ShAmt = SA->getZExtValue();
1608  if (ShAmt == 0)
1609  return TLO.CombineTo(Op, Op0);
1610 
1611  // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1612  // single shift. We can do this if the bottom bits (which are shifted
1613  // out) are never demanded.
1614  // TODO - support non-uniform vector amounts.
1615  if (Op0.getOpcode() == ISD::SRL) {
1616  if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1617  if (const APInt *SA2 =
1618  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1619  unsigned C1 = SA2->getZExtValue();
1620  unsigned Opc = ISD::SHL;
1621  int Diff = ShAmt - C1;
1622  if (Diff < 0) {
1623  Diff = -Diff;
1624  Opc = ISD::SRL;
1625  }
1626  SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1627  return TLO.CombineTo(
1628  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1629  }
1630  }
1631  }
1632 
1633  // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1634  // are not demanded. This will likely allow the anyext to be folded away.
1635  // TODO - support non-uniform vector amounts.
1636  if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1637  SDValue InnerOp = Op0.getOperand(0);
1638  EVT InnerVT = InnerOp.getValueType();
1639  unsigned InnerBits = InnerVT.getScalarSizeInBits();
1640  if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1641  isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1642  EVT ShTy = getShiftAmountTy(InnerVT, DL);
1643  if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1644  ShTy = InnerVT;
1645  SDValue NarrowShl =
1646  TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1647  TLO.DAG.getConstant(ShAmt, dl, ShTy));
1648  return TLO.CombineTo(
1649  Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1650  }
1651 
1652  // Repeat the SHL optimization above in cases where an extension
1653  // intervenes: (shl (anyext (shr x, c1)), c2) to
1654  // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1655  // aren't demanded (as above) and that the shifted upper c1 bits of
1656  // x aren't demanded.
1657  // TODO - support non-uniform vector amounts.
1658  if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1659  InnerOp.hasOneUse()) {
1660  if (const APInt *SA2 =
1661  TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1662  unsigned InnerShAmt = SA2->getZExtValue();
1663  if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1664  DemandedBits.getActiveBits() <=
1665  (InnerBits - InnerShAmt + ShAmt) &&
1666  DemandedBits.countTrailingZeros() >= ShAmt) {
1667  SDValue NewSA =
1668  TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1669  SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1670  InnerOp.getOperand(0));
1671  return TLO.CombineTo(
1672  Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1673  }
1674  }
1675  }
1676  }
1677 
1678  APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1679  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1680  Depth + 1))
1681  return true;
1682  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1683  Known.Zero <<= ShAmt;
1684  Known.One <<= ShAmt;
1685  // low bits known zero.
1686  Known.Zero.setLowBits(ShAmt);
1687 
1688  // Attempt to avoid multi-use ops if we don't need anything from them.
1689  if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1690  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1691  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1692  if (DemandedOp0) {
1693  SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1694  return TLO.CombineTo(Op, NewOp);
1695  }
1696  }
1697 
1698  // Try shrinking the operation as long as the shift amount will still be
1699  // in range.
1700  if ((ShAmt < DemandedBits.getActiveBits()) &&
1701  ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1702  return true;
1703  }
1704 
1705  // If we are only demanding sign bits then we can use the shift source
1706  // directly.
1707  if (const APInt *MaxSA =
1708  TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1709  unsigned ShAmt = MaxSA->getZExtValue();
1710  unsigned NumSignBits =
1711  TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1712  unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1713  if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1714  return TLO.CombineTo(Op, Op0);
1715  }
1716  break;
1717  }
1718  case ISD::SRL: {
1719  SDValue Op0 = Op.getOperand(0);
1720  SDValue Op1 = Op.getOperand(1);
1721  EVT ShiftVT = Op1.getValueType();
1722 
1723  // Try to match AVG patterns.
1724  if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1725  DemandedElts, Depth + 1))
1726  return TLO.CombineTo(Op, AVG);
1727 
1728  if (const APInt *SA =
1729  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1730  unsigned ShAmt = SA->getZExtValue();
1731  if (ShAmt == 0)
1732  return TLO.CombineTo(Op, Op0);
1733 
1734  // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1735  // single shift. We can do this if the top bits (which are shifted out)
1736  // are never demanded.
1737  // TODO - support non-uniform vector amounts.
1738  if (Op0.getOpcode() == ISD::SHL) {
1739  if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1740  if (const APInt *SA2 =
1741  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1742  unsigned C1 = SA2->getZExtValue();
1743  unsigned Opc = ISD::SRL;
1744  int Diff = ShAmt - C1;
1745  if (Diff < 0) {
1746  Diff = -Diff;
1747  Opc = ISD::SHL;
1748  }
1749  SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1750  return TLO.CombineTo(
1751  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1752  }
1753  }
1754  }
1755 
1756  APInt InDemandedMask = (DemandedBits << ShAmt);
1757 
1758  // If the shift is exact, then it does demand the low bits (and knows that
1759  // they are zero).
1760  if (Op->getFlags().hasExact())
1761  InDemandedMask.setLowBits(ShAmt);
1762 
1763  // Compute the new bits that are at the top now.
1764  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1765  Depth + 1))
1766  return true;
1767  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1768  Known.Zero.lshrInPlace(ShAmt);
1769  Known.One.lshrInPlace(ShAmt);
1770  // High bits known zero.
1771  Known.Zero.setHighBits(ShAmt);
1772  }
1773  break;
1774  }
1775  case ISD::SRA: {
1776  SDValue Op0 = Op.getOperand(0);
1777  SDValue Op1 = Op.getOperand(1);
1778  EVT ShiftVT = Op1.getValueType();
1779 
1780  // If we only want bits that already match the signbit then we don't need
1781  // to shift.
1782  unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1783  if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1784  NumHiDemandedBits)
1785  return TLO.CombineTo(Op, Op0);
1786 
1787  // If this is an arithmetic shift right and only the low-bit is set, we can
1788  // always convert this into a logical shr, even if the shift amount is
1789  // variable. The low bit of the shift cannot be an input sign bit unless
1790  // the shift amount is >= the size of the datatype, which is undefined.
1791  if (DemandedBits.isOne())
1792  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1793 
1794  // Try to match AVG patterns.
1795  if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1796  DemandedElts, Depth + 1))
1797  return TLO.CombineTo(Op, AVG);
1798 
1799  if (const APInt *SA =
1800  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1801  unsigned ShAmt = SA->getZExtValue();
1802  if (ShAmt == 0)
1803  return TLO.CombineTo(Op, Op0);
1804 
1805  APInt InDemandedMask = (DemandedBits << ShAmt);
1806 
1807  // If the shift is exact, then it does demand the low bits (and knows that
1808  // they are zero).
1809  if (Op->getFlags().hasExact())
1810  InDemandedMask.setLowBits(ShAmt);
1811 
1812  // If any of the demanded bits are produced by the sign extension, we also
1813  // demand the input sign bit.
1814  if (DemandedBits.countLeadingZeros() < ShAmt)
1815  InDemandedMask.setSignBit();
1816 
1817  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1818  Depth + 1))
1819  return true;
1820  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1821  Known.Zero.lshrInPlace(ShAmt);
1822  Known.One.lshrInPlace(ShAmt);
1823 
1824  // If the input sign bit is known to be zero, or if none of the top bits
1825  // are demanded, turn this into an unsigned shift right.
1826  if (Known.Zero[BitWidth - ShAmt - 1] ||
1827  DemandedBits.countLeadingZeros() >= ShAmt) {
1828  SDNodeFlags Flags;
1829  Flags.setExact(Op->getFlags().hasExact());
1830  return TLO.CombineTo(
1831  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1832  }
1833 
1834  int Log2 = DemandedBits.exactLogBase2();
1835  if (Log2 >= 0) {
1836  // The bit must come from the sign.
1837  SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1838  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1839  }
1840 
1841  if (Known.One[BitWidth - ShAmt - 1])
1842  // New bits are known one.
1843  Known.One.setHighBits(ShAmt);
1844 
1845  // Attempt to avoid multi-use ops if we don't need anything from them.
1846  if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1847  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1848  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1849  if (DemandedOp0) {
1850  SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1851  return TLO.CombineTo(Op, NewOp);
1852  }
1853  }
1854  }
1855  break;
1856  }
1857  case ISD::FSHL:
1858  case ISD::FSHR: {
1859  SDValue Op0 = Op.getOperand(0);
1860  SDValue Op1 = Op.getOperand(1);
1861  SDValue Op2 = Op.getOperand(2);
1862  bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1863 
1864  if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1865  unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1866 
1867  // For fshl, 0-shift returns the 1st arg.
1868  // For fshr, 0-shift returns the 2nd arg.
1869  if (Amt == 0) {
1870  if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1871  Known, TLO, Depth + 1))
1872  return true;
1873  break;
1874  }
1875 
1876  // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1877  // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1878  APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1879  APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1880  if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1881  Depth + 1))
1882  return true;
1883  if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1884  Depth + 1))
1885  return true;
1886 
1887  Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1888  Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1889  Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1890  Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1891  Known.One |= Known2.One;
1892  Known.Zero |= Known2.Zero;
1893 
1894  // Attempt to avoid multi-use ops if we don't need anything from them.
1895  if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
1896  !DemandedElts.isAllOnes()) {
1897  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1898  Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
1899  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1900  Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
1901  if (DemandedOp0 || DemandedOp1) {
1902  DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
1903  DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
1904  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
1905  DemandedOp1, Op2);
1906  return TLO.CombineTo(Op, NewOp);
1907  }
1908  }
1909  }
1910 
1911  // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1912  if (isPowerOf2_32(BitWidth)) {
1913  APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
1914  if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
1915  Known2, TLO, Depth + 1))
1916  return true;
1917  }
1918  break;
1919  }
1920  case ISD::ROTL:
1921  case ISD::ROTR: {
1922  SDValue Op0 = Op.getOperand(0);
1923  SDValue Op1 = Op.getOperand(1);
1924  bool IsROTL = (Op.getOpcode() == ISD::ROTL);
1925 
1926  // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1927  if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
1928  return TLO.CombineTo(Op, Op0);
1929 
1930  if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1931  unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1932  unsigned RevAmt = BitWidth - Amt;
1933 
1934  // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
1935  // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
1936  APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
1937  if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1938  Depth + 1))
1939  return true;
1940 
1941  // rot*(x, 0) --> x
1942  if (Amt == 0)
1943  return TLO.CombineTo(Op, Op0);
1944 
1945  // See if we don't demand either half of the rotated bits.
1946  if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
1947  DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
1948  Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
1949  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
1950  }
1951  if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
1952  DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
1953  Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
1954  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1955  }
1956  }
1957 
1958  // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1959  if (isPowerOf2_32(BitWidth)) {
1960  APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
1961  if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
1962  Depth + 1))
1963  return true;
1964  }
1965  break;
1966  }
1967  case ISD::UMIN: {
1968  // Check if one arg is always less than (or equal) to the other arg.
1969  SDValue Op0 = Op.getOperand(0);
1970  SDValue Op1 = Op.getOperand(1);
1971  KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1972  KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1973  Known = KnownBits::umin(Known0, Known1);
1974  if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
1975  return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
1976  if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
1977  return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
1978  break;
1979  }
1980  case ISD::UMAX: {
1981  // Check if one arg is always greater than (or equal) to the other arg.
1982  SDValue Op0 = Op.getOperand(0);
1983  SDValue Op1 = Op.getOperand(1);
1984  KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1985  KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1986  Known = KnownBits::umax(Known0, Known1);
1987  if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
1988  return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
1989  if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
1990  return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
1991  break;
1992  }
1993  case ISD::BITREVERSE: {
1994  SDValue Src = Op.getOperand(0);
1995  APInt DemandedSrcBits = DemandedBits.reverseBits();
1996  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1997  Depth + 1))
1998  return true;
1999  Known.One = Known2.One.reverseBits();
2000  Known.Zero = Known2.Zero.reverseBits();
2001  break;
2002  }
2003  case ISD::BSWAP: {
2004  SDValue Src = Op.getOperand(0);
2005 
2006  // If the only bits demanded come from one byte of the bswap result,
2007  // just shift the input byte into position to eliminate the bswap.
2008  unsigned NLZ = DemandedBits.countLeadingZeros();
2009  unsigned NTZ = DemandedBits.countTrailingZeros();
2010 
2011  // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2012  // we need all the bits down to bit 8. Likewise, round NLZ. If we
2013  // have 14 leading zeros, round to 8.
2014  NLZ = alignDown(NLZ, 8);
2015  NTZ = alignDown(NTZ, 8);
2016  // If we need exactly one byte, we can do this transformation.
2017  if (BitWidth - NLZ - NTZ == 8) {
2018  // Replace this with either a left or right shift to get the byte into
2019  // the right place.
2020  unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2021  if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2022  EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2023  unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2024  SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2025  SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2026  return TLO.CombineTo(Op, NewOp);
2027  }
2028  }
2029 
2030  APInt DemandedSrcBits = DemandedBits.byteSwap();
2031  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2032  Depth + 1))
2033  return true;
2034  Known.One = Known2.One.byteSwap();
2035  Known.Zero = Known2.Zero.byteSwap();
2036  break;
2037  }
2038  case ISD::CTPOP: {
2039  // If only 1 bit is demanded, replace with PARITY as long as we're before
2040  // op legalization.
2041  // FIXME: Limit to scalars for now.
2042  if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2043  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2044  Op.getOperand(0)));
2045 
2046  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2047  break;
2048  }
2049  case ISD::SIGN_EXTEND_INREG: {
2050  SDValue Op0 = Op.getOperand(0);
2051  EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2052  unsigned ExVTBits = ExVT.getScalarSizeInBits();
2053 
2054  // If we only care about the highest bit, don't bother shifting right.
2055  if (DemandedBits.isSignMask()) {
2056  unsigned MinSignedBits =
2057  TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2058  bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2059  // However if the input is already sign extended we expect the sign
2060  // extension to be dropped altogether later and do not simplify.
2061  if (!AlreadySignExtended) {
2062  // Compute the correct shift amount type, which must be getShiftAmountTy
2063  // for scalar types after legalization.
2064  SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2065  getShiftAmountTy(VT, DL));
2066  return TLO.CombineTo(Op,
2067  TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2068  }
2069  }
2070 
2071  // If none of the extended bits are demanded, eliminate the sextinreg.
2072  if (DemandedBits.getActiveBits() <= ExVTBits)
2073  return TLO.CombineTo(Op, Op0);
2074 
2075  APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2076 
2077  // Since the sign extended bits are demanded, we know that the sign
2078  // bit is demanded.
2079  InputDemandedBits.setBit(ExVTBits - 1);
2080 
2081  if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2082  Depth + 1))
2083  return true;
2084  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2085 
2086  // If the sign bit of the input is known set or clear, then we know the
2087  // top bits of the result.
2088 
2089  // If the input sign bit is known zero, convert this into a zero extension.
2090  if (Known.Zero[ExVTBits - 1])
2091  return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2092 
2093  APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2094  if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2095  Known.One.setBitsFrom(ExVTBits);
2096  Known.Zero &= Mask;
2097  } else { // Input sign bit unknown
2098  Known.Zero &= Mask;
2099  Known.One &= Mask;
2100  }
2101  break;
2102  }
2103  case ISD::BUILD_PAIR: {
2104  EVT HalfVT = Op.getOperand(0).getValueType();
2105  unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2106 
2107  APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2108  APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2109 
2110  KnownBits KnownLo, KnownHi;
2111 
2112  if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2113  return true;
2114 
2115  if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2116  return true;
2117 
2118  Known.Zero = KnownLo.Zero.zext(BitWidth) |
2119  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
2120 
2121  Known.One = KnownLo.One.zext(BitWidth) |
2122  KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
2123  break;
2124  }
2125  case ISD::ZERO_EXTEND:
2127  SDValue Src = Op.getOperand(0);
2128  EVT SrcVT = Src.getValueType();
2129  unsigned InBits = SrcVT.getScalarSizeInBits();
2130  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2131  bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2132 
2133  // If none of the top bits are demanded, convert this into an any_extend.
2134  if (DemandedBits.getActiveBits() <= InBits) {
2135  // If we only need the non-extended bits of the bottom element
2136  // then we can just bitcast to the result.
2137  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2138  VT.getSizeInBits() == SrcVT.getSizeInBits())
2139  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2140 
2141  unsigned Opc =
2143  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2144  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2145  }
2146 
2147  APInt InDemandedBits = DemandedBits.trunc(InBits);
2148  APInt InDemandedElts = DemandedElts.zext(InElts);
2149  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2150  Depth + 1))
2151  return true;
2152  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2153  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2154  Known = Known.zext(BitWidth);
2155 
2156  // Attempt to avoid multi-use ops if we don't need anything from them.
2157  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2158  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2159  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2160  break;
2161  }
2162  case ISD::SIGN_EXTEND:
2164  SDValue Src = Op.getOperand(0);
2165  EVT SrcVT = Src.getValueType();
2166  unsigned InBits = SrcVT.getScalarSizeInBits();
2167  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2168  bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2169 
2170  // If none of the top bits are demanded, convert this into an any_extend.
2171  if (DemandedBits.getActiveBits() <= InBits) {
2172  // If we only need the non-extended bits of the bottom element
2173  // then we can just bitcast to the result.
2174  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2175  VT.getSizeInBits() == SrcVT.getSizeInBits())
2176  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2177 
2178  unsigned Opc =
2180  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2181  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2182  }
2183 
2184  APInt InDemandedBits = DemandedBits.trunc(InBits);
2185  APInt InDemandedElts = DemandedElts.zext(InElts);
2186 
2187  // Since some of the sign extended bits are demanded, we know that the sign
2188  // bit is demanded.
2189  InDemandedBits.setBit(InBits - 1);
2190 
2191  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2192  Depth + 1))
2193  return true;
2194  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2195  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2196 
2197  // If the sign bit is known one, the top bits match.
2198  Known = Known.sext(BitWidth);
2199 
2200  // If the sign bit is known zero, convert this to a zero extend.
2201  if (Known.isNonNegative()) {
2202  unsigned Opc =
2204  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2205  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2206  }
2207 
2208  // Attempt to avoid multi-use ops if we don't need anything from them.
2209  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2210  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2211  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2212  break;
2213  }
2214  case ISD::ANY_EXTEND:
2216  SDValue Src = Op.getOperand(0);
2217  EVT SrcVT = Src.getValueType();
2218  unsigned InBits = SrcVT.getScalarSizeInBits();
2219  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2220  bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2221 
2222  // If we only need the bottom element then we can just bitcast.
2223  // TODO: Handle ANY_EXTEND?
2224  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2225  VT.getSizeInBits() == SrcVT.getSizeInBits())
2226  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2227 
2228  APInt InDemandedBits = DemandedBits.trunc(InBits);
2229  APInt InDemandedElts = DemandedElts.zext(InElts);
2230  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2231  Depth + 1))
2232  return true;
2233  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2234  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2235  Known = Known.anyext(BitWidth);
2236 
2237  // Attempt to avoid multi-use ops if we don't need anything from them.
2238  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2239  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2240  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2241  break;
2242  }
2243  case ISD::TRUNCATE: {
2244  SDValue Src = Op.getOperand(0);
2245 
2246  // Simplify the input, using demanded bit information, and compute the known
2247  // zero/one bits live out.
2248  unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2249  APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2250  if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2251  Depth + 1))
2252  return true;
2253  Known = Known.trunc(BitWidth);
2254 
2255  // Attempt to avoid multi-use ops if we don't need anything from them.
2256  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2257  Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2258  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2259 
2260  // If the input is only used by this truncate, see if we can shrink it based
2261  // on the known demanded bits.
2262  if (Src.getNode()->hasOneUse()) {
2263  switch (Src.getOpcode()) {
2264  default:
2265  break;
2266  case ISD::SRL:
2267  // Shrink SRL by a constant if none of the high bits shifted in are
2268  // demanded.
2269  if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2270  // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2271  // undesirable.
2272  break;
2273 
2274  const APInt *ShAmtC =
2275  TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2276  if (!ShAmtC || ShAmtC->uge(BitWidth))
2277  break;
2278  uint64_t ShVal = ShAmtC->getZExtValue();
2279 
2280  APInt HighBits =
2281  APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2282  HighBits.lshrInPlace(ShVal);
2283  HighBits = HighBits.trunc(BitWidth);
2284 
2285  if (!(HighBits & DemandedBits)) {
2286  // None of the shifted in bits are needed. Add a truncate of the
2287  // shift input, then shift it.
2288  SDValue NewShAmt = TLO.DAG.getConstant(
2289  ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2290  SDValue NewTrunc =
2291  TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2292  return TLO.CombineTo(
2293  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2294  }
2295  break;
2296  }
2297  }
2298 
2299  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2300  break;
2301  }
2302  case ISD::AssertZext: {
2303  // AssertZext demands all of the high bits, plus any of the low bits
2304  // demanded by its users.
2305  EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2307  if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2308  TLO, Depth + 1))
2309  return true;
2310  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2311 
2312  Known.Zero |= ~InMask;
2313  break;
2314  }
2315  case ISD::EXTRACT_VECTOR_ELT: {
2316  SDValue Src = Op.getOperand(0);
2317  SDValue Idx = Op.getOperand(1);
2318  ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2319  unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2320 
2321  if (SrcEltCnt.isScalable())
2322  return false;
2323 
2324  // Demand the bits from every vector element without a constant index.
2325  unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2326  APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2327  if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2328  if (CIdx->getAPIntValue().ult(NumSrcElts))
2329  DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2330 
2331  // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2332  // anything about the extended bits.
2333  APInt DemandedSrcBits = DemandedBits;
2334  if (BitWidth > EltBitWidth)
2335  DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2336 
2337  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2338  Depth + 1))
2339  return true;
2340 
2341  // Attempt to avoid multi-use ops if we don't need anything from them.
2342  if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2343  if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2344  Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2345  SDValue NewOp =
2346  TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2347  return TLO.CombineTo(Op, NewOp);
2348  }
2349  }
2350 
2351  Known = Known2;
2352  if (BitWidth > EltBitWidth)
2353  Known = Known.anyext(BitWidth);
2354  break;
2355  }
2356  case ISD::BITCAST: {
2357  SDValue Src = Op.getOperand(0);
2358  EVT SrcVT = Src.getValueType();
2359  unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2360 
2361  // If this is an FP->Int bitcast and if the sign bit is the only
2362  // thing demanded, turn this into a FGETSIGN.
2363  if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2364  DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2365  SrcVT.isFloatingPoint()) {
2366  bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2368  if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2369  SrcVT != MVT::f128) {
2370  // Cannot eliminate/lower SHL for f128 yet.
2371  EVT Ty = OpVTLegal ? VT : MVT::i32;
2372  // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2373  // place. We expect the SHL to be eliminated by other optimizations.
2374  SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2375  unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2376  if (!OpVTLegal && OpVTSizeInBits > 32)
2377  Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2378  unsigned ShVal = Op.getValueSizeInBits() - 1;
2379  SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2380  return TLO.CombineTo(Op,
2381  TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2382  }
2383  }
2384 
2385  // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2386  // Demand the elt/bit if any of the original elts/bits are demanded.
2387  if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2388  unsigned Scale = BitWidth / NumSrcEltBits;
2389  unsigned NumSrcElts = SrcVT.getVectorNumElements();
2390  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2391  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2392  for (unsigned i = 0; i != Scale; ++i) {
2393  unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2394  unsigned BitOffset = EltOffset * NumSrcEltBits;
2395  APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2396  if (!Sub.isZero()) {
2397  DemandedSrcBits |= Sub;
2398  for (unsigned j = 0; j != NumElts; ++j)
2399  if (DemandedElts[j])
2400  DemandedSrcElts.setBit((j * Scale) + i);
2401  }
2402  }
2403 
2404  APInt KnownSrcUndef, KnownSrcZero;
2405  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2406  KnownSrcZero, TLO, Depth + 1))
2407  return true;
2408 
2409  KnownBits KnownSrcBits;
2410  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2411  KnownSrcBits, TLO, Depth + 1))
2412  return true;
2413  } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2414  // TODO - bigendian once we have test coverage.
2415  unsigned Scale = NumSrcEltBits / BitWidth;
2416  unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2417  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2418  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2419  for (unsigned i = 0; i != NumElts; ++i)
2420  if (DemandedElts[i]) {
2421  unsigned Offset = (i % Scale) * BitWidth;
2422  DemandedSrcBits.insertBits(DemandedBits, Offset);
2423  DemandedSrcElts.setBit(i / Scale);
2424  }
2425 
2426  if (SrcVT.isVector()) {
2427  APInt KnownSrcUndef, KnownSrcZero;
2428  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2429  KnownSrcZero, TLO, Depth + 1))
2430  return true;
2431  }
2432 
2433  KnownBits KnownSrcBits;
2434  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2435  KnownSrcBits, TLO, Depth + 1))
2436  return true;
2437  }
2438 
2439  // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2440  // recursive call where Known may be useful to the caller.
2441  if (Depth > 0) {
2442  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2443  return false;
2444  }
2445  break;
2446  }
2447  case ISD::MUL:
2448  if (DemandedBits.isPowerOf2()) {
2449  // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2450  // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2451  // odd (has LSB set), then the left-shifted low bit of X is the answer.
2452  unsigned CTZ = DemandedBits.countTrailingZeros();
2453  ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2454  if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
2455  EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2456  SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2457  SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2458  return TLO.CombineTo(Op, Shl);
2459  }
2460  }
2461  // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2462  // X * X is odd iff X is odd.
2463  // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2464  if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2465  SDValue One = TLO.DAG.getConstant(1, dl, VT);
2466  SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2467  return TLO.CombineTo(Op, And1);
2468  }
2470  case ISD::ADD:
2471  case ISD::SUB: {
2472  // Add, Sub, and Mul don't demand any bits in positions beyond that
2473  // of the highest bit demanded of them.
2474  SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2475  SDNodeFlags Flags = Op.getNode()->getFlags();
2476  unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2477  APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2478  if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2479  Depth + 1) ||
2480  SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2481  Depth + 1) ||
2482  // See if the operation should be performed at a smaller bit width.
2483  ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2484  if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2485  // Disable the nsw and nuw flags. We can no longer guarantee that we
2486  // won't wrap after simplification.
2487  Flags.setNoSignedWrap(false);
2488  Flags.setNoUnsignedWrap(false);
2489  SDValue NewOp =
2490  TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2491  return TLO.CombineTo(Op, NewOp);
2492  }
2493  return true;
2494  }
2495 
2496  // Attempt to avoid multi-use ops if we don't need anything from them.
2497  if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2498  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2499  Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2500  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2501  Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2502  if (DemandedOp0 || DemandedOp1) {
2503  Flags.setNoSignedWrap(false);
2504  Flags.setNoUnsignedWrap(false);
2505  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2506  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2507  SDValue NewOp =
2508  TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2509  return TLO.CombineTo(Op, NewOp);
2510  }
2511  }
2512 
2513  // If we have a constant operand, we may be able to turn it into -1 if we
2514  // do not demand the high bits. This can make the constant smaller to
2515  // encode, allow more general folding, or match specialized instruction
2516  // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2517  // is probably not useful (and could be detrimental).
2519  APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2520  if (C && !C->isAllOnes() && !C->isOne() &&
2521  (C->getAPIntValue() | HighMask).isAllOnes()) {
2522  SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2523  // Disable the nsw and nuw flags. We can no longer guarantee that we
2524  // won't wrap after simplification.
2525  Flags.setNoSignedWrap(false);
2526  Flags.setNoUnsignedWrap(false);
2527  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2528  return TLO.CombineTo(Op, NewOp);
2529  }
2530 
2531  // Match a multiply with a disguised negated-power-of-2 and convert to a
2532  // an equivalent shift-left amount.
2533  // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2534  auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2535  if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2536  return 0;
2537 
2538  // Don't touch opaque constants. Also, ignore zero and power-of-2
2539  // multiplies. Those will get folded later.
2541  if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2542  !MulC->getAPIntValue().isPowerOf2()) {
2543  APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2544  if (UnmaskedC.isNegatedPowerOf2())
2545  return (-UnmaskedC).logBase2();
2546  }
2547  return 0;
2548  };
2549 
2550  auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2551  EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2552  SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2553  SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2554  SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2555  return TLO.CombineTo(Op, Res);
2556  };
2557 
2559  if (Op.getOpcode() == ISD::ADD) {
2560  // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2561  if (unsigned ShAmt = getShiftLeftAmt(Op0))
2562  return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2563  // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2564  if (unsigned ShAmt = getShiftLeftAmt(Op1))
2565  return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2566  }
2567  if (Op.getOpcode() == ISD::SUB) {
2568  // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2569  if (unsigned ShAmt = getShiftLeftAmt(Op1))
2570  return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2571  }
2572  }
2573 
2575  }
2576  default:
2577  if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2578  if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2579  Known, TLO, Depth))
2580  return true;
2581  break;
2582  }
2583 
2584  // Just use computeKnownBits to compute output bits.
2585  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2586  break;
2587  }
2588 
2589  // If we know the value of all of the demanded bits, return this as a
2590  // constant.
2591  if (!isTargetCanonicalConstantNode(Op) &&
2592  DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2593  // Avoid folding to a constant if any OpaqueConstant is involved.
2594  const SDNode *N = Op.getNode();
2595  for (SDNode *Op :
2597  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2598  if (C->isOpaque())
2599  return false;
2600  }
2601  if (VT.isInteger())
2602  return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2603  if (VT.isFloatingPoint())
2604  return TLO.CombineTo(
2605  Op,
2606  TLO.DAG.getConstantFP(
2607  APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2608  }
2609 
2610  return false;
2611 }
2612 
2614  const APInt &DemandedElts,
2615  DAGCombinerInfo &DCI) const {
2616  SelectionDAG &DAG = DCI.DAG;
2617  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2618  !DCI.isBeforeLegalizeOps());
2619 
2620  APInt KnownUndef, KnownZero;
2621  bool Simplified =
2622  SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2623  if (Simplified) {
2624  DCI.AddToWorklist(Op.getNode());
2625  DCI.CommitTargetLoweringOpt(TLO);
2626  }
2627 
2628  return Simplified;
2629 }
2630 
2631 /// Given a vector binary operation and known undefined elements for each input
2632 /// operand, compute whether each element of the output is undefined.
2634  const APInt &UndefOp0,
2635  const APInt &UndefOp1) {
2636  EVT VT = BO.getValueType();
2637  assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2638  "Vector binop only");
2639 
2640  EVT EltVT = VT.getVectorElementType();
2641  unsigned NumElts = VT.getVectorNumElements();
2642  assert(UndefOp0.getBitWidth() == NumElts &&
2643  UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2644 
2645  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2646  const APInt &UndefVals) {
2647  if (UndefVals[Index])
2648  return DAG.getUNDEF(EltVT);
2649 
2650  if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2651  // Try hard to make sure that the getNode() call is not creating temporary
2652  // nodes. Ignore opaque integers because they do not constant fold.
2653  SDValue Elt = BV->getOperand(Index);
2654  auto *C = dyn_cast<ConstantSDNode>(Elt);
2655  if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2656  return Elt;
2657  }
2658 
2659  return SDValue();
2660  };
2661 
2662  APInt KnownUndef = APInt::getZero(NumElts);
2663  for (unsigned i = 0; i != NumElts; ++i) {
2664  // If both inputs for this element are either constant or undef and match
2665  // the element type, compute the constant/undef result for this element of
2666  // the vector.
2667  // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2668  // not handle FP constants. The code within getNode() should be refactored
2669  // to avoid the danger of creating a bogus temporary node here.
2670  SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2671  SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2672  if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2673  if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2674  KnownUndef.setBit(i);
2675  }
2676  return KnownUndef;
2677 }
2678 
2680  SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2681  APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2682  bool AssumeSingleUse) const {
2683  EVT VT = Op.getValueType();
2684  unsigned Opcode = Op.getOpcode();
2685  APInt DemandedElts = OriginalDemandedElts;
2686  unsigned NumElts = DemandedElts.getBitWidth();
2687  assert(VT.isVector() && "Expected vector op");
2688 
2689  KnownUndef = KnownZero = APInt::getZero(NumElts);
2690 
2691  const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2692  if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2693  return false;
2694 
2695  // TODO: For now we assume we know nothing about scalable vectors.
2696  if (VT.isScalableVector())
2697  return false;
2698 
2699  assert(VT.getVectorNumElements() == NumElts &&
2700  "Mask size mismatches value type element count!");
2701 
2702  // Undef operand.
2703  if (Op.isUndef()) {
2704  KnownUndef.setAllBits();
2705  return false;
2706  }
2707 
2708  // If Op has other users, assume that all elements are needed.
2709  if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2710  DemandedElts.setAllBits();
2711 
2712  // Not demanding any elements from Op.
2713  if (DemandedElts == 0) {
2714  KnownUndef.setAllBits();
2715  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2716  }
2717 
2718  // Limit search depth.
2720  return false;
2721 
2722  SDLoc DL(Op);
2723  unsigned EltSizeInBits = VT.getScalarSizeInBits();
2724  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2725 
2726  // Helper for demanding the specified elements and all the bits of both binary
2727  // operands.
2728  auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2729  SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2730  TLO.DAG, Depth + 1);
2731  SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2732  TLO.DAG, Depth + 1);
2733  if (NewOp0 || NewOp1) {
2734  SDValue NewOp = TLO.DAG.getNode(
2735  Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2736  return TLO.CombineTo(Op, NewOp);
2737  }
2738  return false;
2739  };
2740 
2741  switch (Opcode) {
2742  case ISD::SCALAR_TO_VECTOR: {
2743  if (!DemandedElts[0]) {
2744  KnownUndef.setAllBits();
2745  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2746  }
2747  SDValue ScalarSrc = Op.getOperand(0);
2748  if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2749  SDValue Src = ScalarSrc.getOperand(0);
2750  SDValue Idx = ScalarSrc.getOperand(1);
2751  EVT SrcVT = Src.getValueType();
2752 
2753  ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2754 
2755  if (SrcEltCnt.isScalable())
2756  return false;
2757 
2758  unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2759  if (isNullConstant(Idx)) {
2760  APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2761  APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2762  APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2763  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2764  TLO, Depth + 1))
2765  return true;
2766  }
2767  }
2768  KnownUndef.setHighBits(NumElts - 1);
2769  break;
2770  }
2771  case ISD::BITCAST: {
2772  SDValue Src = Op.getOperand(0);
2773  EVT SrcVT = Src.getValueType();
2774 
2775  // We only handle vectors here.
2776  // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2777  if (!SrcVT.isVector())
2778  break;
2779 
2780  // Fast handling of 'identity' bitcasts.
2781  unsigned NumSrcElts = SrcVT.getVectorNumElements();
2782  if (NumSrcElts == NumElts)
2783  return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2784  KnownZero, TLO, Depth + 1);
2785 
2786  APInt SrcDemandedElts, SrcZero, SrcUndef;
2787 
2788  // Bitcast from 'large element' src vector to 'small element' vector, we
2789  // must demand a source element if any DemandedElt maps to it.
2790  if ((NumElts % NumSrcElts) == 0) {
2791  unsigned Scale = NumElts / NumSrcElts;
2792  SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2793  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2794  TLO, Depth + 1))
2795  return true;
2796 
2797  // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2798  // of the large element.
2799  // TODO - bigendian once we have test coverage.
2800  if (IsLE) {
2801  unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2802  APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2803  for (unsigned i = 0; i != NumElts; ++i)
2804  if (DemandedElts[i]) {
2805  unsigned Ofs = (i % Scale) * EltSizeInBits;
2806  SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2807  }
2808 
2809  KnownBits Known;
2810  if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2811  TLO, Depth + 1))
2812  return true;
2813 
2814  // The bitcast has split each wide element into a number of
2815  // narrow subelements. We have just computed the Known bits
2816  // for wide elements. See if element splitting results in
2817  // some subelements being zero. Only for demanded elements!
2818  for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2819  if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2820  .isAllOnes())
2821  continue;
2822  for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2823  unsigned Elt = Scale * SrcElt + SubElt;
2824  if (DemandedElts[Elt])
2825  KnownZero.setBit(Elt);
2826  }
2827  }
2828  }
2829 
2830  // If the src element is zero/undef then all the output elements will be -
2831  // only demanded elements are guaranteed to be correct.
2832  for (unsigned i = 0; i != NumSrcElts; ++i) {
2833  if (SrcDemandedElts[i]) {
2834  if (SrcZero[i])
2835  KnownZero.setBits(i * Scale, (i + 1) * Scale);
2836  if (SrcUndef[i])
2837  KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2838  }
2839  }
2840  }
2841 
2842  // Bitcast from 'small element' src vector to 'large element' vector, we
2843  // demand all smaller source elements covered by the larger demanded element
2844  // of this vector.
2845  if ((NumSrcElts % NumElts) == 0) {
2846  unsigned Scale = NumSrcElts / NumElts;
2847  SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2848  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2849  TLO, Depth + 1))
2850  return true;
2851 
2852  // If all the src elements covering an output element are zero/undef, then
2853  // the output element will be as well, assuming it was demanded.
2854  for (unsigned i = 0; i != NumElts; ++i) {
2855  if (DemandedElts[i]) {
2856  if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
2857  KnownZero.setBit(i);
2858  if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
2859  KnownUndef.setBit(i);
2860  }
2861  }
2862  }
2863  break;
2864  }
2865  case ISD::BUILD_VECTOR: {
2866  // Check all elements and simplify any unused elements with UNDEF.
2867  if (!DemandedElts.isAllOnes()) {
2868  // Don't simplify BROADCASTS.
2869  if (llvm::any_of(Op->op_values(),
2870  [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2871  SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2872  bool Updated = false;
2873  for (unsigned i = 0; i != NumElts; ++i) {
2874  if (!DemandedElts[i] && !Ops[i].isUndef()) {
2875  Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2876  KnownUndef.setBit(i);
2877  Updated = true;
2878  }
2879  }
2880  if (Updated)
2881  return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2882  }
2883  }
2884  for (unsigned i = 0; i != NumElts; ++i) {
2885  SDValue SrcOp = Op.getOperand(i);
2886  if (SrcOp.isUndef()) {
2887  KnownUndef.setBit(i);
2888  } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2890  KnownZero.setBit(i);
2891  }
2892  }
2893  break;
2894  }
2895  case ISD::CONCAT_VECTORS: {
2896  EVT SubVT = Op.getOperand(0).getValueType();
2897  unsigned NumSubVecs = Op.getNumOperands();
2898  unsigned NumSubElts = SubVT.getVectorNumElements();
2899  for (unsigned i = 0; i != NumSubVecs; ++i) {
2900  SDValue SubOp = Op.getOperand(i);
2901  APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2902  APInt SubUndef, SubZero;
2903  if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2904  Depth + 1))
2905  return true;
2906  KnownUndef.insertBits(SubUndef, i * NumSubElts);
2907  KnownZero.insertBits(SubZero, i * NumSubElts);
2908  }
2909 
2910  // Attempt to avoid multi-use ops if we don't need anything from them.
2911  if (!DemandedElts.isAllOnes()) {
2912  bool FoundNewSub = false;
2913  SmallVector<SDValue, 2> DemandedSubOps;
2914  for (unsigned i = 0; i != NumSubVecs; ++i) {
2915  SDValue SubOp = Op.getOperand(i);
2916  APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2917  SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
2918  SubOp, SubElts, TLO.DAG, Depth + 1);
2919  DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
2920  FoundNewSub = NewSubOp ? true : FoundNewSub;
2921  }
2922  if (FoundNewSub) {
2923  SDValue NewOp =
2924  TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
2925  return TLO.CombineTo(Op, NewOp);
2926  }
2927  }
2928  break;
2929  }
2930  case ISD::INSERT_SUBVECTOR: {
2931  // Demand any elements from the subvector and the remainder from the src its
2932  // inserted into.
2933  SDValue Src = Op.getOperand(0);
2934  SDValue Sub = Op.getOperand(1);
2935  uint64_t Idx = Op.getConstantOperandVal(2);
2936  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
2937  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
2938  APInt DemandedSrcElts = DemandedElts;
2939  DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
2940 
2941  APInt SubUndef, SubZero;
2942  if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
2943  Depth + 1))
2944  return true;
2945 
2946  // If none of the src operand elements are demanded, replace it with undef.
2947  if (!DemandedSrcElts && !Src.isUndef())
2948  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2949  TLO.DAG.getUNDEF(VT), Sub,
2950  Op.getOperand(2)));
2951 
2952  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
2953  TLO, Depth + 1))
2954  return true;
2955  KnownUndef.insertBits(SubUndef, Idx);
2956  KnownZero.insertBits(SubZero, Idx);
2957 
2958  // Attempt to avoid multi-use ops if we don't need anything from them.
2959  if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
2960  SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2961  Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2962  SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
2963  Sub, DemandedSubElts, TLO.DAG, Depth + 1);
2964  if (NewSrc || NewSub) {
2965  NewSrc = NewSrc ? NewSrc : Src;
2966  NewSub = NewSub ? NewSub : Sub;
2967  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2968  NewSub, Op.getOperand(2));
2969  return TLO.CombineTo(Op, NewOp);
2970  }
2971  }
2972  break;
2973  }
2974  case ISD::EXTRACT_SUBVECTOR: {
2975  // Offset the demanded elts by the subvector index.
2976  SDValue Src = Op.getOperand(0);
2977  if (Src.getValueType().isScalableVector())
2978  break;
2979  uint64_t Idx = Op.getConstantOperandVal(1);
2980  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2981  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
2982 
2983  APInt SrcUndef, SrcZero;
2984  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2985  Depth + 1))
2986  return true;
2987  KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2988  KnownZero = SrcZero.extractBits(NumElts, Idx);
2989 
2990  // Attempt to avoid multi-use ops if we don't need anything from them.
2991  if (!DemandedElts.isAllOnes()) {
2992  SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2993  Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2994  if (NewSrc) {
2995  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2996  Op.getOperand(1));
2997  return TLO.CombineTo(Op, NewOp);
2998  }
2999  }
3000  break;
3001  }
3002  case ISD::INSERT_VECTOR_ELT: {
3003  SDValue Vec = Op.getOperand(0);
3004  SDValue Scl = Op.getOperand(1);
3005  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3006 
3007  // For a legal, constant insertion index, if we don't need this insertion
3008  // then strip it, else remove it from the demanded elts.
3009  if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3010  unsigned Idx = CIdx->getZExtValue();
3011  if (!DemandedElts[Idx])
3012  return TLO.CombineTo(Op, Vec);
3013 
3014  APInt DemandedVecElts(DemandedElts);
3015  DemandedVecElts.clearBit(Idx);
3016  if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3017  KnownZero, TLO, Depth + 1))
3018  return true;
3019 
3020  KnownUndef.setBitVal(Idx, Scl.isUndef());
3021 
3022  KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3023  break;
3024  }
3025 
3026  APInt VecUndef, VecZero;
3027  if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3028  Depth + 1))
3029  return true;
3030  // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3031  break;
3032  }
3033  case ISD::VSELECT: {
3034  // Try to transform the select condition based on the current demanded
3035  // elements.
3036  // TODO: If a condition element is undef, we can choose from one arm of the
3037  // select (and if one arm is undef, then we can propagate that to the
3038  // result).
3039  // TODO - add support for constant vselect masks (see IR version of this).
3040  APInt UnusedUndef, UnusedZero;
3041  if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
3042  UnusedZero, TLO, Depth + 1))
3043  return true;
3044 
3045  // See if we can simplify either vselect operand.
3046  APInt DemandedLHS(DemandedElts);
3047  APInt DemandedRHS(DemandedElts);
3048  APInt UndefLHS, ZeroLHS;
3049  APInt UndefRHS, ZeroRHS;
3050  if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
3051  ZeroLHS, TLO, Depth + 1))
3052  return true;
3053  if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
3054  ZeroRHS, TLO, Depth + 1))
3055  return true;
3056 
3057  KnownUndef = UndefLHS & UndefRHS;
3058  KnownZero = ZeroLHS & ZeroRHS;
3059  break;
3060  }
3061  case ISD::VECTOR_SHUFFLE: {
3062  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3063 
3064  // Collect demanded elements from shuffle operands..
3065  APInt DemandedLHS(NumElts, 0);
3066  APInt DemandedRHS(NumElts, 0);
3067  for (unsigned i = 0; i != NumElts; ++i) {
3068  int M = ShuffleMask[i];
3069  if (M < 0 || !DemandedElts[i])
3070  continue;
3071  assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3072  if (M < (int)NumElts)
3073  DemandedLHS.setBit(M);
3074  else
3075  DemandedRHS.setBit(M - NumElts);
3076  }
3077 
3078  // See if we can simplify either shuffle operand.
3079  APInt UndefLHS, ZeroLHS;
3080  APInt UndefRHS, ZeroRHS;
3081  if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
3082  ZeroLHS, TLO, Depth + 1))
3083  return true;
3084  if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
3085  ZeroRHS, TLO, Depth + 1))
3086  return true;
3087 
3088  // Simplify mask using undef elements from LHS/RHS.
3089  bool Updated = false;
3090  bool IdentityLHS = true, IdentityRHS = true;
3091  SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
3092  for (unsigned i = 0; i != NumElts; ++i) {
3093  int &M = NewMask[i];
3094  if (M < 0)
3095  continue;
3096  if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3097  (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3098  Updated = true;
3099  M = -1;
3100  }
3101  IdentityLHS &= (M < 0) || (M == (int)i);
3102  IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3103  }
3104 
3105  // Update legal shuffle masks based on demanded elements if it won't reduce
3106  // to Identity which can cause premature removal of the shuffle mask.
3107  if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3108  SDValue LegalShuffle =
3109  buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
3110  NewMask, TLO.DAG);
3111  if (LegalShuffle)
3112  return TLO.CombineTo(Op, LegalShuffle);
3113  }
3114 
3115  // Propagate undef/zero elements from LHS/RHS.
3116  for (unsigned i = 0; i != NumElts; ++i) {
3117  int M = ShuffleMask[i];
3118  if (M < 0) {
3119  KnownUndef.setBit(i);
3120  } else if (M < (int)NumElts) {
3121  if (UndefLHS[M])
3122  KnownUndef.setBit(i);
3123  if (ZeroLHS[M])
3124  KnownZero.setBit(i);
3125  } else {
3126  if (UndefRHS[M - NumElts])
3127  KnownUndef.setBit(i);
3128  if (ZeroRHS[M - NumElts])
3129  KnownZero.setBit(i);
3130  }
3131  }
3132  break;
3133  }
3137  APInt SrcUndef, SrcZero;
3138  SDValue Src = Op.getOperand(0);
3139  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3140  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3141  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3142  Depth + 1))
3143  return true;
3144  KnownZero = SrcZero.zextOrTrunc(NumElts);
3145  KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3146 
3147  if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3148  Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3149  DemandedSrcElts == 1) {
3150  // aext - if we just need the bottom element then we can bitcast.
3151  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3152  }
3153 
3154  if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3155  // zext(undef) upper bits are guaranteed to be zero.
3156  if (DemandedElts.isSubsetOf(KnownUndef))
3157  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3158  KnownUndef.clearAllBits();
3159 
3160  // zext - if we just need the bottom element then we can mask:
3161  // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3162  if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3163  Op->isOnlyUserOf(Src.getNode()) &&
3164  Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3165  SDLoc DL(Op);
3166  EVT SrcVT = Src.getValueType();
3167  EVT SrcSVT = SrcVT.getScalarType();
3168  SmallVector<SDValue> MaskElts;
3169  MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3170  MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3171  SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3172  if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3173  ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3174  Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3175  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3176  }
3177  }
3178  }
3179  break;
3180  }
3181 
3182  // TODO: There are more binop opcodes that could be handled here - MIN,
3183  // MAX, saturated math, etc.
3184  case ISD::ADD: {
3185  SDValue Op0 = Op.getOperand(0);
3186  SDValue Op1 = Op.getOperand(1);
3187  if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3188  APInt UndefLHS, ZeroLHS;
3189  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3190  Depth + 1, /*AssumeSingleUse*/ true))
3191  return true;
3192  }
3194  }
3195  case ISD::OR:
3196  case ISD::XOR:
3197  case ISD::SUB:
3198  case ISD::FADD:
3199  case ISD::FSUB:
3200  case ISD::FMUL:
3201  case ISD::FDIV:
3202  case ISD::FREM: {
3203  SDValue Op0 = Op.getOperand(0);
3204  SDValue Op1 = Op.getOperand(1);
3205 
3206  APInt UndefRHS, ZeroRHS;
3207  if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3208  Depth + 1))
3209  return true;
3210  APInt UndefLHS, ZeroLHS;
3211  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3212  Depth + 1))
3213  return true;
3214 
3215  KnownZero = ZeroLHS & ZeroRHS;
3216  KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3217 
3218  // Attempt to avoid multi-use ops if we don't need anything from them.
3219  // TODO - use KnownUndef to relax the demandedelts?
3220  if (!DemandedElts.isAllOnes())
3221  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3222  return true;
3223  break;
3224  }
3225  case ISD::SHL:
3226  case ISD::SRL:
3227  case ISD::SRA:
3228  case ISD::ROTL:
3229  case ISD::ROTR: {
3230  SDValue Op0 = Op.getOperand(0);
3231  SDValue Op1 = Op.getOperand(1);
3232 
3233  APInt UndefRHS, ZeroRHS;
3234  if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3235  Depth + 1))
3236  return true;
3237  APInt UndefLHS, ZeroLHS;
3238  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3239  Depth + 1))
3240  return true;
3241 
3242  KnownZero = ZeroLHS;
3243  KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3244 
3245  // Attempt to avoid multi-use ops if we don't need anything from them.
3246  // TODO - use KnownUndef to relax the demandedelts?
3247  if (!DemandedElts.isAllOnes())
3248  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3249  return true;
3250  break;
3251  }
3252  case ISD::MUL:
3253  case ISD::AND: {
3254  SDValue Op0 = Op.getOperand(0);
3255  SDValue Op1 = Op.getOperand(1);
3256 
3257  APInt SrcUndef, SrcZero;
3258  if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3259  Depth + 1))
3260  return true;
3261  if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3262  TLO, Depth + 1))
3263  return true;
3264 
3265  // If either side has a zero element, then the result element is zero, even
3266  // if the other is an UNDEF.
3267  // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3268  // and then handle 'and' nodes with the rest of the binop opcodes.
3269  KnownZero |= SrcZero;
3270  KnownUndef &= SrcUndef;
3271  KnownUndef &= ~KnownZero;
3272 
3273  // Attempt to avoid multi-use ops if we don't need anything from them.
3274  // TODO - use KnownUndef to relax the demandedelts?
3275  if (!DemandedElts.isAllOnes())
3276  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3277  return true;
3278  break;
3279  }
3280  case ISD::TRUNCATE:
3281  case ISD::SIGN_EXTEND:
3282  case ISD::ZERO_EXTEND:
3283  if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3284  KnownZero, TLO, Depth + 1))
3285  return true;
3286 
3287  if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3288  // zext(undef) upper bits are guaranteed to be zero.
3289  if (DemandedElts.isSubsetOf(KnownUndef))
3290  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3291  KnownUndef.clearAllBits();
3292  }
3293  break;
3294  default: {
3295  if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3296  if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3297  KnownZero, TLO, Depth))
3298  return true;
3299  } else {
3300  KnownBits Known;
3301  APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3302  if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3303  TLO, Depth, AssumeSingleUse))
3304  return true;
3305  }
3306  break;
3307  }
3308  }
3309  assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3310 
3311  // Constant fold all undef cases.
3312  // TODO: Handle zero cases as well.
3313  if (DemandedElts.isSubsetOf(KnownUndef))
3314  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3315 
3316  return false;
3317 }
3318 
3319 /// Determine which of the bits specified in Mask are known to be either zero or
3320 /// one and return them in the Known.
3322  KnownBits &Known,
3323  const APInt &DemandedElts,
3324  const SelectionDAG &DAG,
3325  unsigned Depth) const {
3326  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3327  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3328  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3329  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3330  "Should use MaskedValueIsZero if you don't know whether Op"
3331  " is a target node!");
3332  Known.resetAll();
3333 }
3334 
3336  GISelKnownBits &Analysis, Register R, KnownBits &Known,
3337  const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3338  unsigned Depth) const {
3339  Known.resetAll();
3340 }
3341 
3343  const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3344  // The low bits are known zero if the pointer is aligned.
3345  Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3346 }
3347 
3349  GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3350  unsigned Depth) const {
3351  return Align(1);
3352 }
3353 
3354 /// This method can be implemented by targets that want to expose additional
3355 /// information about sign bits to the DAG Combiner.
3357  const APInt &,
3358  const SelectionDAG &,
3359  unsigned Depth) const {
3360  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3361  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3362  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3363  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3364  "Should use ComputeNumSignBits if you don't know whether Op"
3365  " is a target node!");
3366  return 1;
3367 }
3368 
3370  GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3371  const MachineRegisterInfo &MRI, unsigned Depth) const {
3372  return 1;
3373 }
3374 
3376  SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3377  TargetLoweringOpt &TLO, unsigned Depth) const {
3378  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3379  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3380  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3381  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3382  "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3383  " is a target node!");
3384  return false;
3385 }
3386 
3388  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3389  KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3390  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3391  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3392  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3393  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3394  "Should use SimplifyDemandedBits if you don't know whether Op"
3395  " is a target node!");
3396  computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3397  return false;
3398 }
3399 
3401  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3402  SelectionDAG &DAG, unsigned Depth) const {
3403  assert(
3404  (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3405  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3406  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3407  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3408  "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3409  " is a target node!");
3410  return SDValue();
3411 }
3412 
3413 SDValue
3416  SelectionDAG &DAG) const {
3417  bool LegalMask = isShuffleMaskLegal(Mask, VT);
3418  if (!LegalMask) {
3419  std::swap(N0, N1);
3421  LegalMask = isShuffleMaskLegal(Mask, VT);
3422  }
3423 
3424  if (!LegalMask)
3425  return SDValue();
3426 
3427  return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3428 }
3429 
3431  return nullptr;
3432 }
3433 
3435  SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3436  bool PoisonOnly, unsigned Depth) const {
3437  assert(
3438  (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3439  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3440  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3441  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3442  "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3443  " is a target node!");
3444  return false;
3445 }
3446 
3448  const SelectionDAG &DAG,
3449  bool SNaN,
3450  unsigned Depth) const {
3451  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3452  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3453  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3454  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3455  "Should use isKnownNeverNaN if you don't know whether Op"
3456  " is a target node!");
3457  return false;
3458 }
3459 
3461  const APInt &DemandedElts,
3462  APInt &UndefElts,
3463  unsigned Depth) const {
3464  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3465  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3466  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3467  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3468  "Should use isSplatValue if you don't know whether Op"
3469  " is a target node!");
3470  return false;
3471 }
3472 
3473 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3474 // work with truncating build vectors and vectors with elements of less than
3475 // 8 bits.
3477  if (!N)
3478  return false;
3479 
3480  unsigned EltWidth;
3481  APInt CVal;
3482  if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3483  /*AllowTruncation=*/true)) {
3484  CVal = CN->getAPIntValue();
3485  EltWidth = N.getValueType().getScalarSizeInBits();
3486  } else
3487  return false;
3488 
3489  // If this is a truncating splat, truncate the splat value.
3490  // Otherwise, we may fail to match the expected values below.
3491  if (EltWidth < CVal.getBitWidth())
3492  CVal = CVal.trunc(EltWidth);
3493 
3494  switch (getBooleanContents(N.getValueType())) {
3496  return CVal[0];
3498  return CVal.isOne();
3500  return CVal.isAllOnes();
3501  }
3502 
3503  llvm_unreachable("Invalid boolean contents");
3504 }
3505 
3507  if (!N)
3508  return false;
3509 
3510  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3511  if (!CN) {
3512  const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3513  if (!BV)
3514  return false;
3515 
3516  // Only interested in constant splats, we don't care about undef
3517  // elements in identifying boolean constants and getConstantSplatNode
3518  // returns NULL if all ops are undef;
3519  CN = BV->getConstantSplatNode();
3520  if (!CN)
3521  return false;
3522  }
3523 
3524  if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3525  return !CN->getAPIntValue()[0];
3526 
3527  return CN->isZero();
3528 }
3529 
3531  bool SExt) const {
3532  if (VT == MVT::i1)
3533  return N->isOne();
3534 
3536  switch (Cnt) {
3538  // An extended value of 1 is always true, unless its original type is i1,
3539  // in which case it will be sign extended to -1.
3540  return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3543  return N->isAllOnes() && SExt;
3544  }
3545  llvm_unreachable("Unexpected enumeration.");
3546 }
3547 
3548 /// This helper function of SimplifySetCC tries to optimize the comparison when
3549 /// either operand of the SetCC node is a bitwise-and instruction.
3550 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3551  ISD::CondCode Cond, const SDLoc &DL,
3552  DAGCombinerInfo &DCI) const {
3553  if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3554  std::swap(N0, N1);
3555 
3556  SelectionDAG &DAG = DCI.DAG;
3557  EVT OpVT = N0.getValueType();
3558  if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3559  (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3560  return SDValue();
3561 
3562  // (X & Y) != 0 --> zextOrTrunc(X & Y)
3563  // iff everything but LSB is known zero:
3564  if (Cond == ISD::SETNE && isNullConstant(N1) &&
3567  unsigned NumEltBits = OpVT.getScalarSizeInBits();
3568  APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3569  if (DAG.MaskedValueIsZero(N0, UpperBits))
3570  return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3571  }
3572 
3573  // Match these patterns in any of their permutations:
3574  // (X & Y) == Y
3575  // (X & Y) != Y
3576  SDValue X, Y;
3577  if (N0.getOperand(0) == N1) {
3578  X = N0.getOperand(1);
3579  Y = N0.getOperand(0);
3580  } else if (N0.getOperand(1) == N1) {
3581  X = N0.getOperand(0);
3582  Y = N0.getOperand(1);
3583  } else {
3584  return SDValue();
3585  }
3586 
3587  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3588  if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3589  // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3590  // Note that where Y is variable and is known to have at most one bit set
3591  // (for example, if it is Z & 1) we cannot do this; the expressions are not
3592  // equivalent when Y == 0.
3593  assert(OpVT.isInteger());
3594  Cond = ISD::getSetCCInverse(Cond, OpVT);
3595  if (DCI.isBeforeLegalizeOps() ||
3597  return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3598  } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3599  // If the target supports an 'and-not' or 'and-complement' logic operation,
3600  // try to use that to make a comparison operation more efficient.
3601  // But don't do this transform if the mask is a single bit because there are
3602  // more efficient ways to deal with that case (for example, 'bt' on x86 or
3603  // 'rlwinm' on PPC).
3604 
3605  // Bail out if the compare operand that we want to turn into a zero is
3606  // already a zero (otherwise, infinite loop).
3607  auto *YConst = dyn_cast<ConstantSDNode>(Y);
3608  if (YConst && YConst->isZero())
3609  return SDValue();
3610 
3611  // Transform this into: ~X & Y == 0.
3612  SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3613  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3614  return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3615  }
3616 
3617  return SDValue();
3618 }
3619 
3620 /// There are multiple IR patterns that could be checking whether certain
3621 /// truncation of a signed number would be lossy or not. The pattern which is
3622 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3623 /// We are looking for the following pattern: (KeptBits is a constant)
3624 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3625 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3626 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3627 /// We will unfold it into the natural trunc+sext pattern:
3628 /// ((%x << C) a>> C) dstcond %x
3629 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3630 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3631  EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3632  const SDLoc &DL) const {
3633  // We must be comparing with a constant.
3634  ConstantSDNode *C1;
3635  if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3636  return SDValue();
3637 
3638  // N0 should be: add %x, (1 << (KeptBits-1))
3639  if (N0->getOpcode() != ISD::ADD)
3640  return SDValue();
3641 
3642  // And we must be 'add'ing a constant.
3643  ConstantSDNode *C01;
3644  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3645  return SDValue();
3646 
3647  SDValue X = N0->getOperand(0);
3648  EVT XVT = X.getValueType();
3649 
3650  // Validate constants ...
3651 
3652  APInt I1 = C1->getAPIntValue();
3653 
3654  ISD::CondCode NewCond;
3655  if (Cond == ISD::CondCode::SETULT) {
3656  NewCond = ISD::CondCode::SETEQ;
3657  } else if (Cond == ISD::CondCode::SETULE) {
3658  NewCond = ISD::CondCode::SETEQ;
3659  // But need to 'canonicalize' the constant.
3660  I1 += 1;
3661  } else if (Cond == ISD::CondCode::SETUGT) {
3662  NewCond = ISD::CondCode::SETNE;
3663  // But need to 'canonicalize' the constant.
3664  I1 += 1;
3665  } else if (Cond == ISD::CondCode::SETUGE) {
3666  NewCond = ISD::CondCode::SETNE;
3667  } else
3668  return SDValue();
3669 
3670  APInt I01 = C01->getAPIntValue();
3671 
3672  auto checkConstants = [&I1, &I01]() -> bool {
3673  // Both of them must be power-of-two, and the constant from setcc is bigger.
3674  return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3675  };
3676 
3677  if (checkConstants()) {
3678  // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3679  } else {
3680  // What if we invert constants? (and the target predicate)
3681  I1.negate();
3682  I01.negate();
3683  assert(XVT.isInteger());
3684  NewCond = getSetCCInverse(NewCond, XVT);
3685  if (!checkConstants())
3686  return SDValue();
3687  // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3688  }
3689 
3690  // They are power-of-two, so which bit is set?
3691  const unsigned KeptBits = I1.logBase2();
3692  const unsigned KeptBitsMinusOne = I01.logBase2();
3693 
3694  // Magic!
3695  if (KeptBits != (KeptBitsMinusOne + 1))
3696  return SDValue();
3697  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3698 
3699  // We don't want to do this in every single case.
3700  SelectionDAG &DAG = DCI.DAG;
3702  XVT, KeptBits))
3703  return SDValue();
3704 
3705  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3706  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3707 
3708  // Unfold into: ((%x << C) a>> C) cond %x
3709  // Where 'cond' will be either 'eq' or 'ne'.
3710  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3711  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3712  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3713  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3714 
3715  return T2;
3716 }
3717 
3718 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3719 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3720  EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3721  DAGCombinerInfo &DCI, const SDLoc &DL) const {
3722  assert(isConstOrConstSplat(N1C) &&
3723  isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
3724  "Should be a comparison with 0.");
3725  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3726  "Valid only for [in]equality comparisons.");
3727 
3728  unsigned NewShiftOpcode;
3729  SDValue X, C, Y;
3730 
3731  SelectionDAG &DAG = DCI.DAG;
3732  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3733 
3734  // Look for '(C l>>/<< Y)'.
3735  auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3736  // The shift should be one-use.
3737  if (!V.hasOneUse())
3738  return false;
3739  unsigned OldShiftOpcode = V.getOpcode();
3740  switch (OldShiftOpcode) {
3741  case ISD::SHL:
3742  NewShiftOpcode = ISD::SRL;
3743  break;
3744  case ISD::SRL:
3745  NewShiftOpcode = ISD::SHL;
3746  break;
3747  default:
3748  return false; // must be a logical shift.
3749  }
3750  // We should be shifting a constant.
3751  // FIXME: best to use isConstantOrConstantVector().
3752  C = V.getOperand(0);
3753  ConstantSDNode *CC =
3754  isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3755  if (!CC)
3756  return false;
3757  Y = V.getOperand(1);
3758 
3759  ConstantSDNode *XC =
3760  isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3761  return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3762  X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3763  };
3764 
3765  // LHS of comparison should be an one-use 'and'.
3766  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3767  return SDValue();
3768 
3769  X = N0.getOperand(0);
3770  SDValue Mask = N0.getOperand(1);
3771 
3772  // 'and' is commutative!
3773  if (!Match(Mask)) {
3774  std::swap(X, Mask);
3775  if (!Match(Mask))
3776  return SDValue();
3777  }
3778 
3779  EVT VT = X.getValueType();
3780 
3781  // Produce:
3782  // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3783  SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3784  SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3785  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3786  return T2;
3787 }
3788 
3789 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3790 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3791 /// handle the commuted versions of these patterns.
3792 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3793  ISD::CondCode Cond, const SDLoc &DL,
3794  DAGCombinerInfo &DCI) const {
3795  unsigned BOpcode = N0.getOpcode();
3796  assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3797  "Unexpected binop");
3798  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3799 
3800  // (X + Y) == X --> Y == 0
3801  // (X - Y) == X --> Y == 0
3802  // (X ^ Y) == X --> Y == 0
3803  SelectionDAG &DAG = DCI.DAG;
3804  EVT OpVT = N0.getValueType();
3805  SDValue X = N0.getOperand(0);
3806  SDValue Y = N0.getOperand(1);
3807  if (X == N1)
3808  return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3809 
3810  if (Y != N1)
3811  return SDValue();
3812 
3813  // (X + Y) == Y --> X == 0
3814  // (X ^ Y) == Y --> X == 0
3815  if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3816  return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3817 
3818  // The shift would not be valid if the operands are boolean (i1).
3819  if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3820  return SDValue();
3821 
3822  // (X - Y) == Y --> X == Y << 1
3823  EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3824  !DCI.isBeforeLegalize());
3825  SDValue One = DAG.getConstant(1, DL, ShiftVT);
3826  SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3827  if (!DCI.isCalledByLegalizer())
3828  DCI.AddToWorklist(YShl1.getNode());
3829  return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3830 }
3831 
3833  SDValue N0, const APInt &C1,
3834  ISD::CondCode Cond, const SDLoc &dl,
3835  SelectionDAG &DAG) {
3836  // Look through truncs that don't change the value of a ctpop.
3837  // FIXME: Add vector support? Need to be careful with setcc result type below.
3838  SDValue CTPOP = N0;
3839  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
3841  CTPOP = N0.getOperand(0);
3842 
3843  if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
3844  return SDValue();
3845 
3846  EVT CTVT = CTPOP.getValueType();
3847  SDValue CTOp = CTPOP.getOperand(0);
3848 
3849  // If this is a vector CTPOP, keep the CTPOP if it is legal.
3850  // TODO: Should we check if CTPOP is legal(or custom) for scalars?
3851  if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
3852  return SDValue();
3853 
3854  // (ctpop x) u< 2 -> (x & x-1) == 0
3855  // (ctpop x) u> 1 -> (x & x-1) != 0
3856  if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
3857  unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
3858  if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
3859  return SDValue();
3860  if (C1 == 0 && (Cond == ISD::SETULT))
3861  return SDValue(); // This is handled elsewhere.
3862 
3863  unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
3864 
3865  SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3866  SDValue Result = CTOp;
3867  for (unsigned i = 0; i < Passes; i++) {
3868  SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
3869  Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
3870  }
3872  return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
3873  }
3874 
3875  // If ctpop is not supported, expand a power-of-2 comparison based on it.
3876  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
3877  // For scalars, keep CTPOP if it is legal or custom.
3878  if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
3879  return SDValue();
3880  // This is based on X86's custom lowering for CTPOP which produces more
3881  // instructions than the expansion here.
3882 
3883  // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3884  // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3885  SDValue Zero = DAG.getConstant(0, dl, CTVT);
3886  SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3887  assert(CTVT.isInteger());
3888  ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3889  SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3890  SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3891  SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3892  SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3893  unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3894  return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3895  }
3896 
3897  return SDValue();
3898 }
3899 
3901  ISD::CondCode Cond, const SDLoc &dl,
3902  SelectionDAG &DAG) {
3903  if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
3904  return SDValue();
3905 
3906  auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
3907  if (!C1 || !(C1->isZero() || C1->isAllOnes()))
3908  return SDValue();
3909 
3910  auto getRotateSource = [](SDValue X) {
3911  if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
3912  return X.getOperand(0);
3913  return SDValue();
3914  };
3915 
3916  // Peek through a rotated value compared against 0 or -1:
3917  // (rot X, Y) == 0/-1 --> X == 0/-1
3918  // (rot X, Y) != 0/-1 --> X != 0/-1
3919  if (SDValue R = getRotateSource(N0))
3920  return DAG.getSetCC(dl, VT, R, N1, Cond);
3921 
3922  // Peek through an 'or' of a rotated value compared against 0:
3923  // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
3924  // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
3925  //
3926  // TODO: Add the 'and' with -1 sibling.
3927  // TODO: Recurse through a series of 'or' ops to find the rotate.
3928  EVT OpVT = N0.getValueType();
3929  if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
3930  if (SDValue R = getRotateSource(N0.getOperand(0))) {
3931  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
3932  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3933  }
3934  if (SDValue R = getRotateSource(N0.getOperand(1))) {
3935  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
3936  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3937  }
3938  }
3939 
3940  return SDValue();
3941 }
3942 
3944  ISD::CondCode Cond, const SDLoc &dl,
3945  SelectionDAG &DAG) {
3946  // If we are testing for all-bits-clear, we might be able to do that with
3947  // less shifting since bit-order does not matter.
3948  if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
3949  return SDValue();
3950 
3951  auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
3952  if (!C1 || !C1->isZero())
3953  return SDValue();
3954 
3955  if (!N0.hasOneUse() ||
3956  (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
3957  return SDValue();
3958 
3959  unsigned BitWidth = N0.getScalarValueSizeInBits();
3960  auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
3961  if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
3962  return SDValue();
3963 
3964  // Canonicalize fshr as fshl to reduce pattern-matching.
3965  unsigned ShAmt = ShAmtC->getZExtValue();
3966  if (N0.getOpcode() == ISD::FSHR)
3967  ShAmt = BitWidth - ShAmt;
3968 
3969  // Match an 'or' with a specific operand 'Other' in either commuted variant.
3970  SDValue X, Y;
3971  auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
3972  if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
3973  return false;
3974  if (Or.getOperand(0) == Other) {
3975  X = Or.getOperand(0);
3976  Y = Or.getOperand(1);
3977  return true;
3978  }
3979  if (Or.getOperand(1) == Other) {
3980  X = Or.getOperand(1);
3981  Y = Or.getOperand(0);
3982  return true;
3983  }
3984  return false;
3985  };
3986 
3987  EVT OpVT = N0.getValueType();
3988  EVT ShAmtVT = N0.getOperand(2).getValueType();
3989  SDValue F0 = N0.getOperand(0);
3990  SDValue F1 = N0.getOperand(1);
3991  if (matchOr(F0, F1)) {
3992  // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
3993  SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
3994  SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
3995  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
3996  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3997  }
3998  if (matchOr(F1, F0)) {
3999  // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4000  SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4001  SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4002  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4003  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4004  }
4005 
4006  return SDValue();
4007 }
4008 
4009 /// Try to simplify a setcc built with the specified operands and cc. If it is
4010 /// unable to simplify it, return a null SDValue.
4012  ISD::CondCode Cond, bool foldBooleans,
4013  DAGCombinerInfo &DCI,
4014  const SDLoc &dl) const {
4015  SelectionDAG &DAG = DCI.DAG;
4016  const DataLayout &Layout = DAG.getDataLayout();
4017  EVT OpVT = N0.getValueType();
4018 
4019  // Constant fold or commute setcc.
4020  if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4021  return Fold;
4022 
4023  bool N0ConstOrSplat =
4024  isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4025  bool N1ConstOrSplat =
4026  isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4027 
4028  // Ensure that the constant occurs on the RHS and fold constant comparisons.
4029  // TODO: Handle non-splat vector constants. All undef causes trouble.
4030  // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4031  // infinite loop here when we encounter one.
4033  if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
4034  (DCI.isBeforeLegalizeOps() ||
4035  isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4036  return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4037 
4038  // If we have a subtract with the same 2 non-constant operands as this setcc
4039  // -- but in reverse order -- then try to commute the operands of this setcc
4040  // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4041  // instruction on some targets.
4042  if (!N0ConstOrSplat && !N1ConstOrSplat &&
4043  (DCI.isBeforeLegalizeOps() ||
4044  isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4045  DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4046  !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4047  return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4048 
4049  if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4050  return V;
4051 
4052  if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4053  return V;
4054 
4055  if (auto *N1C = isConstOrConstSplat(N1)) {
4056  const APInt &C1 = N1C->getAPIntValue();
4057 
4058  // Optimize some CTPOP cases.
4059  if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4060  return V;
4061 
4062  // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4063  // equality comparison, then we're just comparing whether X itself is
4064  // zero.
4065  if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4066  N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4068  if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4069  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4070  ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4071  if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4072  // (srl (ctlz x), 5) == 0 -> X != 0
4073  // (srl (ctlz x), 5) != 1 -> X != 0
4074  Cond = ISD::SETNE;
4075  } else {
4076  // (srl (ctlz x), 5) != 0 -> X == 0
4077  // (srl (ctlz x), 5) == 1 -> X == 0
4078  Cond = ISD::SETEQ;
4079  }
4080  SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4081  return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4082  Cond);
4083  }
4084  }
4085  }
4086  }
4087 
4088  // FIXME: Support vectors.
4089  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4090  const APInt &C1 = N1C->getAPIntValue();
4091 
4092  // (zext x) == C --> x == (trunc C)
4093  // (sext x) == C --> x == (trunc C)
4094  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4095  DCI.isBeforeLegalize() && N0->hasOneUse()) {
4096  unsigned MinBits = N0.getValueSizeInBits();
4097  SDValue PreExt;
4098  bool Signed = false;
4099  if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4100  // ZExt
4101  MinBits = N0->getOperand(0).getValueSizeInBits();
4102  PreExt = N0->getOperand(0);
4103  } else if (N0->getOpcode() == ISD::AND) {
4104  // DAGCombine turns costly ZExts into ANDs
4105  if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4106  if ((C->getAPIntValue()+1).isPowerOf2()) {
4107  MinBits = C->getAPIntValue().countTrailingOnes();
4108  PreExt = N0->getOperand(0);
4109  }
4110  } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4111  // SExt
4112  MinBits = N0->getOperand(0).getValueSizeInBits();
4113  PreExt = N0->getOperand(0);
4114  Signed = true;
4115  } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4116  // ZEXTLOAD / SEXTLOAD
4117  if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4118  MinBits = LN0->getMemoryVT().getSizeInBits();
4119  PreExt = N0;
4120  } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4121  Signed = true;
4122  MinBits = LN0->getMemoryVT().getSizeInBits();
4123  PreExt = N0;
4124  }
4125  }
4126 
4127  // Figure out how many bits we need to preserve this constant.
4128  unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
4129 
4130  // Make sure we're not losing bits from the constant.
4131  if (MinBits > 0 &&
4132  MinBits < C1.getBitWidth() &&
4133  MinBits >= ReqdBits) {
4134  EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4135  if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4136  // Will get folded away.
4137  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4138  if (MinBits == 1 && C1 == 1)
4139  // Invert the condition.
4140  return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4142  SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4143  return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4144  }
4145 
4146  // If truncating the setcc operands is not desirable, we can still
4147  // simplify the expression in some cases:
4148  // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4149  // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4150  // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4151  // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4152  // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4153  // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4154  SDValue TopSetCC = N0->getOperand(0);
4155  unsigned N0Opc = N0->getOpcode();
4156  bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4157  if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4158  TopSetCC.getOpcode() == ISD::SETCC &&
4159  (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4160  (isConstFalseVal(N1) ||
4161  isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4162 
4163  bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4164  (!N1C->isZero() && Cond == ISD::SETNE);
4165 
4166  if (!Inverse)
4167  return TopSetCC;
4168 
4170  cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4171  TopSetCC.getOperand(0).getValueType());
4172  return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4173  TopSetCC.getOperand(1),
4174  InvCond);
4175  }
4176  }
4177  }
4178 
4179  // If the LHS is '(and load, const)', the RHS is 0, the test is for
4180  // equality or unsigned, and all 1 bits of the const are in the same
4181  // partial word, see if we can shorten the load.
4182  if (DCI.isBeforeLegalize() &&
4184  N0.getOpcode() == ISD::AND && C1 == 0 &&
4185  N0.getNode()->hasOneUse() &&
4186  isa<LoadSDNode>(N0.getOperand(0)) &&
4187  N0.getOperand(0).getNode()->hasOneUse() &&
4188  isa<ConstantSDNode>(N0.getOperand(1))) {
4189  LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4190  APInt bestMask;
4191  unsigned bestWidth = 0, bestOffset = 0;
4192  if (Lod->isSimple() && Lod->isUnindexed()) {
4193  unsigned origWidth = N0.getValueSizeInBits();
4194  unsigned maskWidth = origWidth;
4195  // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4196  // 8 bits, but have to be careful...
4197  if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4198  origWidth = Lod->getMemoryVT().getSizeInBits();
4199  const APInt &Mask = N0.getConstantOperandAPInt(1);
4200  for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4201  APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4202  for (unsigned offset=0; offset<origWidth/width; offset++) {
4203  if (Mask.isSubsetOf(newMask)) {
4204  if (Layout.isLittleEndian())
4205  bestOffset = (uint64_t)offset * (width/8);
4206  else
4207  bestOffset = (origWidth/width - offset - 1) * (width/8);
4208  bestMask = Mask.lshr(offset * (width/8) * 8);
4209  bestWidth = width;
4210  break;
4211  }
4212  newMask <<= width;
4213  }
4214  }
4215  }
4216  if (bestWidth) {
4217  EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4218  if (newVT.isRound() &&
4219  shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
4220  SDValue Ptr = Lod->getBasePtr();
4221  if (bestOffset != 0)
4222  Ptr =
4223  DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4224  SDValue NewLoad =
4225  DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4226  Lod->getPointerInfo().getWithOffset(bestOffset),
4227  Lod->getOriginalAlign());
4228  return DAG.getSetCC(dl, VT,
4229  DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4230  DAG.getConstant(bestMask.trunc(bestWidth),
4231  dl, newVT)),
4232  DAG.getConstant(0LL, dl, newVT), Cond);
4233  }
4234  }
4235  }
4236 
4237  // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4238  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4239  unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4240 
4241  // If the comparison constant has bits in the upper part, the
4242  // zero-extended value could never match.
4243  if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4244  C1.getBitWidth() - InSize))) {
4245  switch (Cond) {
4246  case ISD::SETUGT:
4247  case ISD::SETUGE:
4248  case ISD::SETEQ:
4249  return DAG.getConstant(0, dl, VT);
4250  case ISD::SETULT:
4251  case ISD::SETULE:
4252  case ISD::SETNE:
4253  return DAG.getConstant(1, dl, VT);
4254  case ISD::SETGT:
4255  case ISD::SETGE:
4256  // True if the sign bit of C1 is set.
4257  return DAG.getConstant(C1.isNegative(), dl, VT);
4258  case ISD::SETLT:
4259  case ISD::SETLE:
4260  // True if the sign bit of C1 isn't set.
4261  return DAG.getConstant(C1.isNonNegative(), dl, VT);
4262  default:
4263  break;
4264  }
4265  }
4266 
4267  // Otherwise, we can perform the comparison with the low bits.
4268  switch (Cond) {
4269  case ISD::SETEQ:
4270  case ISD::SETNE:
4271  case ISD::SETUGT:
4272  case ISD::SETUGE:
4273  case ISD::SETULT:
4274  case ISD::SETULE: {
4275  EVT newVT = N0.getOperand(0).getValueType();
4276  if (DCI.isBeforeLegalizeOps() ||
4277  (isOperationLegal(ISD::SETCC, newVT) &&
4278  isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4279  EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4280  SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4281 
4282  SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4283  NewConst, Cond);
4284  return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4285  }
4286  break;
4287  }
4288  default:
4289  break; // todo, be more careful with signed comparisons
4290  }
4291  } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4292  (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4293  !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4294  OpVT)) {
4295  EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4296  unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4297  EVT ExtDstTy = N0.getValueType();
4298  unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4299 
4300  // If the constant doesn't fit into the number of bits for the source of
4301  // the sign extension, it is impossible for both sides to be equal.
4302  if (C1.getMinSignedBits() > ExtSrcTyBits)
4303  return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4304 
4305  assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4306  ExtDstTy != ExtSrcTy && "Unexpected types!");
4307  APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4308  SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4309  DAG.getConstant(Imm, dl, ExtDstTy));
4310  if (!DCI.isCalledByLegalizer())
4311  DCI.AddToWorklist(ZextOp.getNode());
4312  // Otherwise, make this a use of a zext.
4313  return DAG.getSetCC(dl, VT, ZextOp,
4314  DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4315  } else if ((N1C->isZero() || N1C->isOne()) &&
4316  (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4317  // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
4318  if (N0.getOpcode() == ISD::SETCC &&
4319  isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4320  (N0.getValueType() == MVT::i1 ||
4323  bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4324  if (TrueWhenTrue)
4325  return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4326  // Invert the condition.
4327  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4328  CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4329  if (DCI.isBeforeLegalizeOps() ||
4331  return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4332  }
4333 
4334  if ((N0.getOpcode() == ISD::XOR ||
4335  (N0.getOpcode() == ISD::AND &&
4336  N0.getOperand(0).getOpcode() == ISD::XOR &&
4337  N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4338  isOneConstant(N0.getOperand(1))) {
4339  // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4340  // can only do this if the top bits are known zero.
4341  unsigned BitWidth = N0.getValueSizeInBits();
4342  if (DAG.MaskedValueIsZero(N0,
4344  BitWidth-1))) {
4345  // Okay, get the un-inverted input value.
4346  SDValue Val;
4347  if (N0.getOpcode() == ISD::XOR) {
4348  Val = N0.getOperand(0);
4349  } else {
4350  assert(N0.getOpcode() == ISD::AND &&
4351  N0.getOperand(0).getOpcode() == ISD::XOR);
4352  // ((X^1)&1)^1 -> X & 1
4353  Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4354  N0.getOperand(0).getOperand(0),
4355  N0.getOperand(1));
4356  }
4357 
4358  return DAG.getSetCC(dl, VT, Val, N1,
4360  }
4361  } else if (N1C->isOne()) {
4362  SDValue Op0 = N0;
4363  if (Op0.getOpcode() == ISD::TRUNCATE)
4364  Op0 = Op0.getOperand(0);
4365 
4366  if ((Op0.getOpcode() == ISD::XOR) &&
4367  Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4368  Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4369  SDValue XorLHS = Op0.getOperand(0);
4370  SDValue XorRHS = Op0.getOperand(1);
4371  // Ensure that the input setccs return an i1 type or 0/1 value.
4372  if (Op0.getValueType() == MVT::i1 ||
4373  (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4377  // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4379  return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4380  }
4381  }
4382  if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4383  // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4384  if (Op0.getValueType().bitsGT(VT))
4385  Op0 = DAG.getNode(ISD::AND, dl, VT,
4386  DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4387  DAG.getConstant(1, dl, VT));
4388  else if (Op0.getValueType().bitsLT(VT))
4389  Op0 = DAG.getNode(ISD::AND, dl, VT,
4390  DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4391  DAG.getConstant(1, dl, VT));
4392 
4393  return DAG.getSetCC(dl, VT, Op0,
4394  DAG.getConstant(0, dl, Op0.getValueType()),
4396  }
4397  if (Op0.getOpcode() == ISD::AssertZext &&
4398  cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4399  return DAG.getSetCC(dl, VT, Op0,
4400  DAG.getConstant(0, dl, Op0.getValueType()),
4402  }
4403  }
4404 
4405  // Given:
4406  // icmp eq/ne (urem %x, %y), 0
4407  // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4408  // icmp eq/ne %x, 0
4409  if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4410  (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4411  KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4412  KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4413  if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4414  return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4415  }
4416 
4417  // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4418  // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4419  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4420  N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4421  N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4422  N1C && N1C->isAllOnes()) {
4423  return DAG.getSetCC(dl, VT, N0.getOperand(0),
4424  DAG.getConstant(0, dl, OpVT),
4426  }
4427 
4428  if (SDValue V =
4429  optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4430  return V;
4431  }
4432 
4433  // These simplifications apply to splat vectors as well.
4434  // TODO: Handle more splat vector cases.
4435  if (auto *N1C = isConstOrConstSplat(N1)) {
4436  const APInt &C1 = N1C->getAPIntValue();
4437 
4438  APInt MinVal, MaxVal;
4439  unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4440  if (ISD::isSignedIntSetCC(Cond)) {
4441  MinVal = APInt::getSignedMinValue(OperandBitSize);
4442  MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4443  } else {
4444  MinVal = APInt::getMinValue(OperandBitSize);
4445  MaxVal = APInt::getMaxValue(OperandBitSize);
4446  }
4447 
4448  // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4449  if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4450  // X >= MIN --> true
4451  if (C1 == MinVal)
4452  return DAG.getBoolConstant(true, dl, VT, OpVT);
4453 
4454  if (!VT.isVector()) { // TODO: Support this for vectors.
4455  // X >= C0 --> X > (C0 - 1)
4456  APInt C = C1 - 1;