LLVM 20.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
24#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/LLVMContext.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCExpr.h"
35#include <cctype>
36using namespace llvm;
37
38/// NOTE: The TargetMachine owns TLOF.
40 : TargetLoweringBase(tm) {}
41
42const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43 return nullptr;
44}
45
48}
49
50/// Check whether a given call node is in tail position within its function. If
51/// so, it sets Chain to the input chain of the tail call.
53 SDValue &Chain) const {
55
56 // First, check if tail calls have been disabled in this function.
57 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
58 return false;
59
60 // Conservatively require the attributes of the call to match those of
61 // the return. Ignore following attributes because they don't affect the
62 // call sequence.
63 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
64 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
65 Attribute::DereferenceableOrNull, Attribute::NoAlias,
66 Attribute::NonNull, Attribute::NoUndef,
67 Attribute::Range, Attribute::NoFPClass})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80}
81
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104 return false;
105 }
106 return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
116 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
117 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
118 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
119 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
120 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
121 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
122 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
123 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
124 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
125 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
126 Alignment = Call->getParamStackAlign(ArgIdx);
127 IndirectType = nullptr;
129 "multiple ABI attributes?");
130 if (IsByVal) {
131 IndirectType = Call->getParamByValType(ArgIdx);
132 if (!Alignment)
133 Alignment = Call->getParamAlign(ArgIdx);
134 }
135 if (IsPreallocated)
136 IndirectType = Call->getParamPreallocatedType(ArgIdx);
137 if (IsInAlloca)
138 IndirectType = Call->getParamInAllocaType(ArgIdx);
139 if (IsSRet)
140 IndirectType = Call->getParamStructRetType(ArgIdx);
141}
142
143/// Generate a libcall taking the given operands as arguments and returning a
144/// result of type RetVT.
145std::pair<SDValue, SDValue>
148 MakeLibCallOptions CallOptions,
149 const SDLoc &dl,
150 SDValue InChain) const {
151 if (!InChain)
152 InChain = DAG.getEntryNode();
153
155 Args.reserve(Ops.size());
156
158 for (unsigned i = 0; i < Ops.size(); ++i) {
159 SDValue NewOp = Ops[i];
160 Entry.Node = NewOp;
161 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
162 Entry.IsSExt =
163 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
164 Entry.IsZExt = !Entry.IsSExt;
165
166 if (CallOptions.IsSoften &&
168 Entry.IsSExt = Entry.IsZExt = false;
169 }
170 Args.push_back(Entry);
171 }
172
173 if (LC == RTLIB::UNKNOWN_LIBCALL)
174 report_fatal_error("Unsupported library call operation!");
177
178 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
181 bool zeroExtend = !signExtend;
182
183 if (CallOptions.IsSoften &&
185 signExtend = zeroExtend = false;
186 }
187
188 CLI.setDebugLoc(dl)
189 .setChain(InChain)
190 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
191 .setNoReturn(CallOptions.DoesNotReturn)
194 .setSExtResult(signExtend)
195 .setZExtResult(zeroExtend);
196 return LowerCallTo(CLI);
197}
198
200 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201 unsigned SrcAS, const AttributeList &FuncAttributes) const {
202 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
203 Op.getSrcAlign() < Op.getDstAlign())
204 return false;
205
206 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207
208 if (VT == MVT::Other) {
209 // Use the largest integer type whose alignment constraints are satisfied.
210 // We only need to check DstAlign here as SrcAlign is always greater or
211 // equal to DstAlign (or zero).
212 VT = MVT::LAST_INTEGER_VALUETYPE;
213 if (Op.isFixedDstAlign())
214 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
215 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217 assert(VT.isInteger());
218
219 // Find the largest legal integer type.
220 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
221 while (!isTypeLegal(LVT))
222 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
223 assert(LVT.isInteger());
224
225 // If the type we've chosen is larger than the largest legal integer type
226 // then use that instead.
227 if (VT.bitsGT(LVT))
228 VT = LVT;
229 }
230
231 unsigned NumMemOps = 0;
232 uint64_t Size = Op.size();
233 while (Size) {
234 unsigned VTSize = VT.getSizeInBits() / 8;
235 while (VTSize > Size) {
236 // For now, only use non-vector load / store's for the left-over pieces.
237 EVT NewVT = VT;
238 unsigned NewVTSize;
239
240 bool Found = false;
241 if (VT.isVector() || VT.isFloatingPoint()) {
242 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
245 Found = true;
246 else if (NewVT == MVT::i64 &&
248 isSafeMemOpType(MVT::f64)) {
249 // i64 is usually not legal on 32-bit targets, but f64 may be.
250 NewVT = MVT::f64;
251 Found = true;
252 }
253 }
254
255 if (!Found) {
256 do {
257 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
258 if (NewVT == MVT::i8)
259 break;
260 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
261 }
262 NewVTSize = NewVT.getSizeInBits() / 8;
263
264 // If the new VT cannot cover all of the remaining bits, then consider
265 // issuing a (or a pair of) unaligned and overlapping load / store.
266 unsigned Fast;
267 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271 Fast)
272 VTSize = Size;
273 else {
274 VT = NewVT;
275 VTSize = NewVTSize;
276 }
277 }
278
279 if (++NumMemOps > Limit)
280 return false;
281
282 MemOps.push_back(VT);
283 Size -= VTSize;
284 }
285
286 return true;
287}
288
289/// Soften the operands of a comparison. This code is shared among BR_CC,
290/// SELECT_CC, and SETCC handlers.
292 SDValue &NewLHS, SDValue &NewRHS,
293 ISD::CondCode &CCCode,
294 const SDLoc &dl, const SDValue OldLHS,
295 const SDValue OldRHS) const {
296 SDValue Chain;
297 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
298 OldRHS, Chain);
299}
300
302 SDValue &NewLHS, SDValue &NewRHS,
303 ISD::CondCode &CCCode,
304 const SDLoc &dl, const SDValue OldLHS,
305 const SDValue OldRHS,
306 SDValue &Chain,
307 bool IsSignaling) const {
308 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309 // not supporting it. We can update this code when libgcc provides such
310 // functions.
311
312 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
313 && "Unsupported setcc type!");
314
315 // Expand into one or more soft-fp libcall(s).
316 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317 bool ShouldInvertCC = false;
318 switch (CCCode) {
319 case ISD::SETEQ:
320 case ISD::SETOEQ:
321 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
323 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324 break;
325 case ISD::SETNE:
326 case ISD::SETUNE:
327 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328 (VT == MVT::f64) ? RTLIB::UNE_F64 :
329 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330 break;
331 case ISD::SETGE:
332 case ISD::SETOGE:
333 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334 (VT == MVT::f64) ? RTLIB::OGE_F64 :
335 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336 break;
337 case ISD::SETLT:
338 case ISD::SETOLT:
339 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340 (VT == MVT::f64) ? RTLIB::OLT_F64 :
341 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342 break;
343 case ISD::SETLE:
344 case ISD::SETOLE:
345 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346 (VT == MVT::f64) ? RTLIB::OLE_F64 :
347 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348 break;
349 case ISD::SETGT:
350 case ISD::SETOGT:
351 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352 (VT == MVT::f64) ? RTLIB::OGT_F64 :
353 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354 break;
355 case ISD::SETO:
356 ShouldInvertCC = true;
357 [[fallthrough]];
358 case ISD::SETUO:
359 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360 (VT == MVT::f64) ? RTLIB::UO_F64 :
361 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362 break;
363 case ISD::SETONE:
364 // SETONE = O && UNE
365 ShouldInvertCC = true;
366 [[fallthrough]];
367 case ISD::SETUEQ:
368 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369 (VT == MVT::f64) ? RTLIB::UO_F64 :
370 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
373 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374 break;
375 default:
376 // Invert CC for unordered comparisons
377 ShouldInvertCC = true;
378 switch (CCCode) {
379 case ISD::SETULT:
380 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381 (VT == MVT::f64) ? RTLIB::OGE_F64 :
382 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383 break;
384 case ISD::SETULE:
385 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386 (VT == MVT::f64) ? RTLIB::OGT_F64 :
387 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388 break;
389 case ISD::SETUGT:
390 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391 (VT == MVT::f64) ? RTLIB::OLE_F64 :
392 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393 break;
394 case ISD::SETUGE:
395 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396 (VT == MVT::f64) ? RTLIB::OLT_F64 :
397 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398 break;
399 default: llvm_unreachable("Do not know how to soften this setcc!");
400 }
401 }
402
403 // Use the target specific return value for comparison lib calls.
405 SDValue Ops[2] = {NewLHS, NewRHS};
407 EVT OpsVT[2] = { OldLHS.getValueType(),
408 OldRHS.getValueType() };
409 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
410 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
411 NewLHS = Call.first;
412 NewRHS = DAG.getConstant(0, dl, RetVT);
413
414 CCCode = getCmpLibcallCC(LC1);
415 if (ShouldInvertCC) {
416 assert(RetVT.isInteger());
417 CCCode = getSetCCInverse(CCCode, RetVT);
418 }
419
420 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421 // Update Chain.
422 Chain = Call.second;
423 } else {
424 EVT SetCCVT =
425 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
426 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
427 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
428 CCCode = getCmpLibcallCC(LC2);
429 if (ShouldInvertCC)
430 CCCode = getSetCCInverse(CCCode, RetVT);
431 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
432 if (Chain)
433 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
434 Call2.second);
435 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
436 Tmp.getValueType(), Tmp, NewLHS);
437 NewRHS = SDValue();
438 }
439}
440
441/// Return the entry encoding for a jump table in the current function. The
442/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 // In non-pic modes, just use the address of a block.
445 if (!isPositionIndependent())
447
448 // In PIC mode, if the target supports a GPRel32 directive, use it.
449 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451
452 // Otherwise, use a label difference.
454}
455
457 SelectionDAG &DAG) const {
458 // If our PIC model is GP relative, use the global offset table as the base.
459 unsigned JTEncoding = getJumpTableEncoding();
460
464
465 return Table;
466}
467
468/// This returns the relocation base for the given PIC jumptable, the same as
469/// getPICJumpTableRelocBase, but as an MCExpr.
470const MCExpr *
472 unsigned JTI,MCContext &Ctx) const{
473 // The normal PIC reloc base is the label at the start of the jump table.
474 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
475}
476
478 SDValue Addr, int JTI,
479 SelectionDAG &DAG) const {
480 SDValue Chain = Value;
481 // Jump table debug info is only needed if CodeView is enabled.
483 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
484 }
485 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
486}
487
488bool
490 const TargetMachine &TM = getTargetMachine();
491 const GlobalValue *GV = GA->getGlobal();
492
493 // If the address is not even local to this DSO we will have to load it from
494 // a got and then add the offset.
495 if (!TM.shouldAssumeDSOLocal(GV))
496 return false;
497
498 // If the code is position independent we will have to add a base register.
499 if (isPositionIndependent())
500 return false;
501
502 // Otherwise we can do it.
503 return true;
504}
505
506//===----------------------------------------------------------------------===//
507// Optimization Methods
508//===----------------------------------------------------------------------===//
509
510/// If the specified instruction has a constant integer operand and there are
511/// bits set in that constant that are not demanded, then clear those bits and
512/// return true.
514 const APInt &DemandedBits,
515 const APInt &DemandedElts,
516 TargetLoweringOpt &TLO) const {
517 SDLoc DL(Op);
518 unsigned Opcode = Op.getOpcode();
519
520 // Early-out if we've ended up calling an undemanded node, leave this to
521 // constant folding.
522 if (DemandedBits.isZero() || DemandedElts.isZero())
523 return false;
524
525 // Do target-specific constant optimization.
526 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527 return TLO.New.getNode();
528
529 // FIXME: ISD::SELECT, ISD::SELECT_CC
530 switch (Opcode) {
531 default:
532 break;
533 case ISD::XOR:
534 case ISD::AND:
535 case ISD::OR: {
536 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
537 if (!Op1C || Op1C->isOpaque())
538 return false;
539
540 // If this is a 'not' op, don't touch it because that's a canonical form.
541 const APInt &C = Op1C->getAPIntValue();
542 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
543 return false;
544
545 if (!C.isSubsetOf(DemandedBits)) {
546 EVT VT = Op.getValueType();
547 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
548 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
549 Op->getFlags());
550 return TLO.CombineTo(Op, NewOp);
551 }
552
553 break;
554 }
555 }
556
557 return false;
558}
559
561 const APInt &DemandedBits,
562 TargetLoweringOpt &TLO) const {
563 EVT VT = Op.getValueType();
564 APInt DemandedElts = VT.isVector()
566 : APInt(1, 1);
567 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568}
569
570/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572/// but it could be generalized for targets with other types of implicit
573/// widening casts.
575 const APInt &DemandedBits,
576 TargetLoweringOpt &TLO) const {
577 assert(Op.getNumOperands() == 2 &&
578 "ShrinkDemandedOp only supports binary operators!");
579 assert(Op.getNode()->getNumValues() == 1 &&
580 "ShrinkDemandedOp only supports nodes with one result!");
581
582 EVT VT = Op.getValueType();
583 SelectionDAG &DAG = TLO.DAG;
584 SDLoc dl(Op);
585
586 // Early return, as this function cannot handle vector types.
587 if (VT.isVector())
588 return false;
589
590 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
591 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
592 "ShrinkDemandedOp only supports operands that have the same size!");
593
594 // Don't do this if the node has another user, which may require the
595 // full value.
596 if (!Op.getNode()->hasOneUse())
597 return false;
598
599 // Search for the smallest integer type with free casts to and from
600 // Op's type. For expedience, just check power-of-2 integer types.
601 unsigned DemandedSize = DemandedBits.getActiveBits();
602 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
603 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
604 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
605 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
606 // We found a type with free casts.
607
608 // If the operation has the 'disjoint' flag, then the
609 // operands on the new node are also disjoint.
610 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
612 SDValue X = DAG.getNode(
613 Op.getOpcode(), dl, SmallVT,
614 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
615 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
616 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
617 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
618 return TLO.CombineTo(Op, Z);
619 }
620 }
621 return false;
622}
623
625 DAGCombinerInfo &DCI) const {
626 SelectionDAG &DAG = DCI.DAG;
627 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
628 !DCI.isBeforeLegalizeOps());
629 KnownBits Known;
630
631 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
632 if (Simplified) {
633 DCI.AddToWorklist(Op.getNode());
635 }
636 return Simplified;
637}
638
640 const APInt &DemandedElts,
641 DAGCombinerInfo &DCI) const {
642 SelectionDAG &DAG = DCI.DAG;
643 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
644 !DCI.isBeforeLegalizeOps());
645 KnownBits Known;
646
647 bool Simplified =
648 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
649 if (Simplified) {
650 DCI.AddToWorklist(Op.getNode());
652 }
653 return Simplified;
654}
655
657 KnownBits &Known,
659 unsigned Depth,
660 bool AssumeSingleUse) const {
661 EVT VT = Op.getValueType();
662
663 // Since the number of lanes in a scalable vector is unknown at compile time,
664 // we track one bit which is implicitly broadcast to all lanes. This means
665 // that all lanes in a scalable vector are considered demanded.
666 APInt DemandedElts = VT.isFixedLengthVector()
668 : APInt(1, 1);
669 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
670 AssumeSingleUse);
671}
672
673// TODO: Under what circumstances can we create nodes? Constant folding?
675 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
676 SelectionDAG &DAG, unsigned Depth) const {
677 EVT VT = Op.getValueType();
678
679 // Limit search depth.
681 return SDValue();
682
683 // Ignore UNDEFs.
684 if (Op.isUndef())
685 return SDValue();
686
687 // Not demanding any bits/elts from Op.
688 if (DemandedBits == 0 || DemandedElts == 0)
689 return DAG.getUNDEF(VT);
690
691 bool IsLE = DAG.getDataLayout().isLittleEndian();
692 unsigned NumElts = DemandedElts.getBitWidth();
693 unsigned BitWidth = DemandedBits.getBitWidth();
694 KnownBits LHSKnown, RHSKnown;
695 switch (Op.getOpcode()) {
696 case ISD::BITCAST: {
697 if (VT.isScalableVector())
698 return SDValue();
699
700 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
701 EVT SrcVT = Src.getValueType();
702 EVT DstVT = Op.getValueType();
703 if (SrcVT == DstVT)
704 return Src;
705
706 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
707 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
708 if (NumSrcEltBits == NumDstEltBits)
709 if (SDValue V = SimplifyMultipleUseDemandedBits(
710 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
711 return DAG.getBitcast(DstVT, V);
712
713 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
714 unsigned Scale = NumDstEltBits / NumSrcEltBits;
715 unsigned NumSrcElts = SrcVT.getVectorNumElements();
716 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
717 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
718 for (unsigned i = 0; i != Scale; ++i) {
719 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
720 unsigned BitOffset = EltOffset * NumSrcEltBits;
721 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
722 if (!Sub.isZero()) {
723 DemandedSrcBits |= Sub;
724 for (unsigned j = 0; j != NumElts; ++j)
725 if (DemandedElts[j])
726 DemandedSrcElts.setBit((j * Scale) + i);
727 }
728 }
729
730 if (SDValue V = SimplifyMultipleUseDemandedBits(
731 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
732 return DAG.getBitcast(DstVT, V);
733 }
734
735 // TODO - bigendian once we have test coverage.
736 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
737 unsigned Scale = NumSrcEltBits / NumDstEltBits;
738 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
739 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
740 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
741 for (unsigned i = 0; i != NumElts; ++i)
742 if (DemandedElts[i]) {
743 unsigned Offset = (i % Scale) * NumDstEltBits;
744 DemandedSrcBits.insertBits(DemandedBits, Offset);
745 DemandedSrcElts.setBit(i / Scale);
746 }
747
748 if (SDValue V = SimplifyMultipleUseDemandedBits(
749 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
750 return DAG.getBitcast(DstVT, V);
751 }
752
753 break;
754 }
755 case ISD::FREEZE: {
756 SDValue N0 = Op.getOperand(0);
757 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
758 /*PoisonOnly=*/false))
759 return N0;
760 break;
761 }
762 case ISD::AND: {
763 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
764 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
765
766 // If all of the demanded bits are known 1 on one side, return the other.
767 // These bits cannot contribute to the result of the 'and' in this
768 // context.
769 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
770 return Op.getOperand(0);
771 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
772 return Op.getOperand(1);
773 break;
774 }
775 case ISD::OR: {
776 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
777 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
778
779 // If all of the demanded bits are known zero on one side, return the
780 // other. These bits cannot contribute to the result of the 'or' in this
781 // context.
782 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
783 return Op.getOperand(0);
784 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
785 return Op.getOperand(1);
786 break;
787 }
788 case ISD::XOR: {
789 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
790 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
791
792 // If all of the demanded bits are known zero on one side, return the
793 // other.
794 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
795 return Op.getOperand(0);
796 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
797 return Op.getOperand(1);
798 break;
799 }
800 case ISD::ADD: {
801 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802 if (RHSKnown.isZero())
803 return Op.getOperand(0);
804
805 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
806 if (LHSKnown.isZero())
807 return Op.getOperand(1);
808 break;
809 }
810 case ISD::SHL: {
811 // If we are only demanding sign bits then we can use the shift source
812 // directly.
813 if (std::optional<uint64_t> MaxSA =
814 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
815 SDValue Op0 = Op.getOperand(0);
816 unsigned ShAmt = *MaxSA;
817 unsigned NumSignBits =
818 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
819 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
820 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
821 return Op0;
822 }
823 break;
824 }
825 case ISD::SRL: {
826 // If we are only demanding sign bits then we can use the shift source
827 // directly.
828 if (std::optional<uint64_t> MaxSA =
829 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
830 SDValue Op0 = Op.getOperand(0);
831 unsigned ShAmt = *MaxSA;
832 // Must already be signbits in DemandedBits bounds, and can't demand any
833 // shifted in zeroes.
834 if (DemandedBits.countl_zero() >= ShAmt) {
835 unsigned NumSignBits =
836 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
837 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
838 return Op0;
839 }
840 }
841 break;
842 }
843 case ISD::SETCC: {
844 SDValue Op0 = Op.getOperand(0);
845 SDValue Op1 = Op.getOperand(1);
846 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
847 // If (1) we only need the sign-bit, (2) the setcc operands are the same
848 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
849 // -1, we may be able to bypass the setcc.
850 if (DemandedBits.isSignMask() &&
854 // If we're testing X < 0, then this compare isn't needed - just use X!
855 // FIXME: We're limiting to integer types here, but this should also work
856 // if we don't care about FP signed-zero. The use of SETLT with FP means
857 // that we don't care about NaNs.
858 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
860 return Op0;
861 }
862 break;
863 }
865 // If none of the extended bits are demanded, eliminate the sextinreg.
866 SDValue Op0 = Op.getOperand(0);
867 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
868 unsigned ExBits = ExVT.getScalarSizeInBits();
869 if (DemandedBits.getActiveBits() <= ExBits &&
871 return Op0;
872 // If the input is already sign extended, just drop the extension.
873 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
874 if (NumSignBits >= (BitWidth - ExBits + 1))
875 return Op0;
876 break;
877 }
881 if (VT.isScalableVector())
882 return SDValue();
883
884 // If we only want the lowest element and none of extended bits, then we can
885 // return the bitcasted source vector.
886 SDValue Src = Op.getOperand(0);
887 EVT SrcVT = Src.getValueType();
888 EVT DstVT = Op.getValueType();
889 if (IsLE && DemandedElts == 1 &&
890 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
891 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
892 return DAG.getBitcast(DstVT, Src);
893 }
894 break;
895 }
897 if (VT.isScalableVector())
898 return SDValue();
899
900 // If we don't demand the inserted element, return the base vector.
901 SDValue Vec = Op.getOperand(0);
902 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
903 EVT VecVT = Vec.getValueType();
904 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
905 !DemandedElts[CIdx->getZExtValue()])
906 return Vec;
907 break;
908 }
910 if (VT.isScalableVector())
911 return SDValue();
912
913 SDValue Vec = Op.getOperand(0);
914 SDValue Sub = Op.getOperand(1);
915 uint64_t Idx = Op.getConstantOperandVal(2);
916 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
917 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
918 // If we don't demand the inserted subvector, return the base vector.
919 if (DemandedSubElts == 0)
920 return Vec;
921 break;
922 }
923 case ISD::VECTOR_SHUFFLE: {
925 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
926
927 // If all the demanded elts are from one operand and are inline,
928 // then we can use the operand directly.
929 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
930 for (unsigned i = 0; i != NumElts; ++i) {
931 int M = ShuffleMask[i];
932 if (M < 0 || !DemandedElts[i])
933 continue;
934 AllUndef = false;
935 IdentityLHS &= (M == (int)i);
936 IdentityRHS &= ((M - NumElts) == i);
937 }
938
939 if (AllUndef)
940 return DAG.getUNDEF(Op.getValueType());
941 if (IdentityLHS)
942 return Op.getOperand(0);
943 if (IdentityRHS)
944 return Op.getOperand(1);
945 break;
946 }
947 default:
948 // TODO: Probably okay to remove after audit; here to reduce change size
949 // in initial enablement patch for scalable vectors
950 if (VT.isScalableVector())
951 return SDValue();
952
953 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
954 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
955 Op, DemandedBits, DemandedElts, DAG, Depth))
956 return V;
957 break;
958 }
959 return SDValue();
960}
961
964 unsigned Depth) const {
965 EVT VT = Op.getValueType();
966 // Since the number of lanes in a scalable vector is unknown at compile time,
967 // we track one bit which is implicitly broadcast to all lanes. This means
968 // that all lanes in a scalable vector are considered demanded.
969 APInt DemandedElts = VT.isFixedLengthVector()
971 : APInt(1, 1);
972 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
973 Depth);
974}
975
977 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
978 unsigned Depth) const {
979 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
980 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
981 Depth);
982}
983
984// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
985// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
988 const TargetLowering &TLI,
989 const APInt &DemandedBits,
990 const APInt &DemandedElts, unsigned Depth) {
991 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
992 "SRL or SRA node is required here!");
993 // Is the right shift using an immediate value of 1?
994 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
995 if (!N1C || !N1C->isOne())
996 return SDValue();
997
998 // We are looking for an avgfloor
999 // add(ext, ext)
1000 // or one of these as a avgceil
1001 // add(add(ext, ext), 1)
1002 // add(add(ext, 1), ext)
1003 // add(ext, add(ext, 1))
1004 SDValue Add = Op.getOperand(0);
1005 if (Add.getOpcode() != ISD::ADD)
1006 return SDValue();
1007
1008 SDValue ExtOpA = Add.getOperand(0);
1009 SDValue ExtOpB = Add.getOperand(1);
1010 SDValue Add2;
1011 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1012 ConstantSDNode *ConstOp;
1013 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1014 ConstOp->isOne()) {
1015 ExtOpA = Op1;
1016 ExtOpB = Op3;
1017 Add2 = A;
1018 return true;
1019 }
1020 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1021 ConstOp->isOne()) {
1022 ExtOpA = Op1;
1023 ExtOpB = Op2;
1024 Add2 = A;
1025 return true;
1026 }
1027 return false;
1028 };
1029 bool IsCeil =
1030 (ExtOpA.getOpcode() == ISD::ADD &&
1031 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1032 (ExtOpB.getOpcode() == ISD::ADD &&
1033 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1034
1035 // If the shift is signed (sra):
1036 // - Needs >= 2 sign bit for both operands.
1037 // - Needs >= 2 zero bits.
1038 // If the shift is unsigned (srl):
1039 // - Needs >= 1 zero bit for both operands.
1040 // - Needs 1 demanded bit zero and >= 2 sign bits.
1041 SelectionDAG &DAG = TLO.DAG;
1042 unsigned ShiftOpc = Op.getOpcode();
1043 bool IsSigned = false;
1044 unsigned KnownBits;
1045 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1046 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1047 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1048 unsigned NumZeroA =
1049 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1050 unsigned NumZeroB =
1051 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1052 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1053
1054 switch (ShiftOpc) {
1055 default:
1056 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1057 case ISD::SRA: {
1058 if (NumZero >= 2 && NumSigned < NumZero) {
1059 IsSigned = false;
1060 KnownBits = NumZero;
1061 break;
1062 }
1063 if (NumSigned >= 1) {
1064 IsSigned = true;
1065 KnownBits = NumSigned;
1066 break;
1067 }
1068 return SDValue();
1069 }
1070 case ISD::SRL: {
1071 if (NumZero >= 1 && NumSigned < NumZero) {
1072 IsSigned = false;
1073 KnownBits = NumZero;
1074 break;
1075 }
1076 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1077 IsSigned = true;
1078 KnownBits = NumSigned;
1079 break;
1080 }
1081 return SDValue();
1082 }
1083 }
1084
1085 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1086 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1087
1088 // Find the smallest power-2 type that is legal for this vector size and
1089 // operation, given the original type size and the number of known sign/zero
1090 // bits.
1091 EVT VT = Op.getValueType();
1092 unsigned MinWidth =
1093 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1094 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1096 return SDValue();
1097 if (VT.isVector())
1098 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1099 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1100 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1101 // larger type size to do the transform.
1102 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1103 return SDValue();
1104 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1105 Add.getOperand(1)) &&
1106 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1107 Add2.getOperand(1))))
1108 NVT = VT;
1109 else
1110 return SDValue();
1111 }
1112
1113 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1114 // this is likely to stop other folds (reassociation, value tracking etc.)
1115 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1116 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1117 return SDValue();
1118
1119 SDLoc DL(Op);
1120 SDValue ResultAVG =
1121 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1122 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1123 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1124}
1125
1126/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1127/// result of Op are ever used downstream. If we can use this information to
1128/// simplify Op, create a new simplified DAG node and return true, returning the
1129/// original and new nodes in Old and New. Otherwise, analyze the expression and
1130/// return a mask of Known bits for the expression (used to simplify the
1131/// caller). The Known bits may only be accurate for those bits in the
1132/// OriginalDemandedBits and OriginalDemandedElts.
1134 SDValue Op, const APInt &OriginalDemandedBits,
1135 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1136 unsigned Depth, bool AssumeSingleUse) const {
1137 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1138 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1139 "Mask size mismatches value type size!");
1140
1141 // Don't know anything.
1142 Known = KnownBits(BitWidth);
1143
1144 EVT VT = Op.getValueType();
1145 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1146 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1147 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1148 "Unexpected vector size");
1149
1150 APInt DemandedBits = OriginalDemandedBits;
1151 APInt DemandedElts = OriginalDemandedElts;
1152 SDLoc dl(Op);
1153
1154 // Undef operand.
1155 if (Op.isUndef())
1156 return false;
1157
1158 // We can't simplify target constants.
1159 if (Op.getOpcode() == ISD::TargetConstant)
1160 return false;
1161
1162 if (Op.getOpcode() == ISD::Constant) {
1163 // We know all of the bits for a constant!
1164 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1165 return false;
1166 }
1167
1168 if (Op.getOpcode() == ISD::ConstantFP) {
1169 // We know all of the bits for a floating point constant!
1171 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1172 return false;
1173 }
1174
1175 // Other users may use these bits.
1176 bool HasMultiUse = false;
1177 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1179 // Limit search depth.
1180 return false;
1181 }
1182 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1184 DemandedElts = APInt::getAllOnes(NumElts);
1185 HasMultiUse = true;
1186 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1187 // Not demanding any bits/elts from Op.
1188 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1189 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1190 // Limit search depth.
1191 return false;
1192 }
1193
1194 KnownBits Known2;
1195 switch (Op.getOpcode()) {
1196 case ISD::SCALAR_TO_VECTOR: {
1197 if (VT.isScalableVector())
1198 return false;
1199 if (!DemandedElts[0])
1200 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1201
1202 KnownBits SrcKnown;
1203 SDValue Src = Op.getOperand(0);
1204 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1205 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1206 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1207 return true;
1208
1209 // Upper elements are undef, so only get the knownbits if we just demand
1210 // the bottom element.
1211 if (DemandedElts == 1)
1212 Known = SrcKnown.anyextOrTrunc(BitWidth);
1213 break;
1214 }
1215 case ISD::BUILD_VECTOR:
1216 // Collect the known bits that are shared by every demanded element.
1217 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1218 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1219 return false; // Don't fall through, will infinitely loop.
1220 case ISD::SPLAT_VECTOR: {
1221 SDValue Scl = Op.getOperand(0);
1222 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1223 KnownBits KnownScl;
1224 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1225 return true;
1226
1227 // Implicitly truncate the bits to match the official semantics of
1228 // SPLAT_VECTOR.
1229 Known = KnownScl.trunc(BitWidth);
1230 break;
1231 }
1232 case ISD::LOAD: {
1233 auto *LD = cast<LoadSDNode>(Op);
1234 if (getTargetConstantFromLoad(LD)) {
1235 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1236 return false; // Don't fall through, will infinitely loop.
1237 }
1238 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1239 // If this is a ZEXTLoad and we are looking at the loaded value.
1240 EVT MemVT = LD->getMemoryVT();
1241 unsigned MemBits = MemVT.getScalarSizeInBits();
1242 Known.Zero.setBitsFrom(MemBits);
1243 return false; // Don't fall through, will infinitely loop.
1244 }
1245 break;
1246 }
1248 if (VT.isScalableVector())
1249 return false;
1250 SDValue Vec = Op.getOperand(0);
1251 SDValue Scl = Op.getOperand(1);
1252 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1253 EVT VecVT = Vec.getValueType();
1254
1255 // If index isn't constant, assume we need all vector elements AND the
1256 // inserted element.
1257 APInt DemandedVecElts(DemandedElts);
1258 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1259 unsigned Idx = CIdx->getZExtValue();
1260 DemandedVecElts.clearBit(Idx);
1261
1262 // Inserted element is not required.
1263 if (!DemandedElts[Idx])
1264 return TLO.CombineTo(Op, Vec);
1265 }
1266
1267 KnownBits KnownScl;
1268 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1269 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1270 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1271 return true;
1272
1273 Known = KnownScl.anyextOrTrunc(BitWidth);
1274
1275 KnownBits KnownVec;
1276 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1277 Depth + 1))
1278 return true;
1279
1280 if (!!DemandedVecElts)
1281 Known = Known.intersectWith(KnownVec);
1282
1283 return false;
1284 }
1285 case ISD::INSERT_SUBVECTOR: {
1286 if (VT.isScalableVector())
1287 return false;
1288 // Demand any elements from the subvector and the remainder from the src its
1289 // inserted into.
1290 SDValue Src = Op.getOperand(0);
1291 SDValue Sub = Op.getOperand(1);
1292 uint64_t Idx = Op.getConstantOperandVal(2);
1293 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1294 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1295 APInt DemandedSrcElts = DemandedElts;
1296 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1297
1298 KnownBits KnownSub, KnownSrc;
1299 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1300 Depth + 1))
1301 return true;
1302 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1303 Depth + 1))
1304 return true;
1305
1306 Known.Zero.setAllBits();
1307 Known.One.setAllBits();
1308 if (!!DemandedSubElts)
1309 Known = Known.intersectWith(KnownSub);
1310 if (!!DemandedSrcElts)
1311 Known = Known.intersectWith(KnownSrc);
1312
1313 // Attempt to avoid multi-use src if we don't need anything from it.
1314 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1315 !DemandedSrcElts.isAllOnes()) {
1316 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1317 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1318 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1319 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1320 if (NewSub || NewSrc) {
1321 NewSub = NewSub ? NewSub : Sub;
1322 NewSrc = NewSrc ? NewSrc : Src;
1323 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1324 Op.getOperand(2));
1325 return TLO.CombineTo(Op, NewOp);
1326 }
1327 }
1328 break;
1329 }
1331 if (VT.isScalableVector())
1332 return false;
1333 // Offset the demanded elts by the subvector index.
1334 SDValue Src = Op.getOperand(0);
1335 if (Src.getValueType().isScalableVector())
1336 break;
1337 uint64_t Idx = Op.getConstantOperandVal(1);
1338 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1339 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1340
1341 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1342 Depth + 1))
1343 return true;
1344
1345 // Attempt to avoid multi-use src if we don't need anything from it.
1346 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1347 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1348 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1349 if (DemandedSrc) {
1350 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1351 Op.getOperand(1));
1352 return TLO.CombineTo(Op, NewOp);
1353 }
1354 }
1355 break;
1356 }
1357 case ISD::CONCAT_VECTORS: {
1358 if (VT.isScalableVector())
1359 return false;
1360 Known.Zero.setAllBits();
1361 Known.One.setAllBits();
1362 EVT SubVT = Op.getOperand(0).getValueType();
1363 unsigned NumSubVecs = Op.getNumOperands();
1364 unsigned NumSubElts = SubVT.getVectorNumElements();
1365 for (unsigned i = 0; i != NumSubVecs; ++i) {
1366 APInt DemandedSubElts =
1367 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1368 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1369 Known2, TLO, Depth + 1))
1370 return true;
1371 // Known bits are shared by every demanded subvector element.
1372 if (!!DemandedSubElts)
1373 Known = Known.intersectWith(Known2);
1374 }
1375 break;
1376 }
1377 case ISD::VECTOR_SHUFFLE: {
1378 assert(!VT.isScalableVector());
1379 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1380
1381 // Collect demanded elements from shuffle operands..
1382 APInt DemandedLHS, DemandedRHS;
1383 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1384 DemandedRHS))
1385 break;
1386
1387 if (!!DemandedLHS || !!DemandedRHS) {
1388 SDValue Op0 = Op.getOperand(0);
1389 SDValue Op1 = Op.getOperand(1);
1390
1391 Known.Zero.setAllBits();
1392 Known.One.setAllBits();
1393 if (!!DemandedLHS) {
1394 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1395 Depth + 1))
1396 return true;
1397 Known = Known.intersectWith(Known2);
1398 }
1399 if (!!DemandedRHS) {
1400 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1401 Depth + 1))
1402 return true;
1403 Known = Known.intersectWith(Known2);
1404 }
1405
1406 // Attempt to avoid multi-use ops if we don't need anything from them.
1407 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1408 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1409 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1410 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1411 if (DemandedOp0 || DemandedOp1) {
1412 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1413 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1414 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1415 return TLO.CombineTo(Op, NewOp);
1416 }
1417 }
1418 break;
1419 }
1420 case ISD::AND: {
1421 SDValue Op0 = Op.getOperand(0);
1422 SDValue Op1 = Op.getOperand(1);
1423
1424 // If the RHS is a constant, check to see if the LHS would be zero without
1425 // using the bits from the RHS. Below, we use knowledge about the RHS to
1426 // simplify the LHS, here we're using information from the LHS to simplify
1427 // the RHS.
1428 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1429 // Do not increment Depth here; that can cause an infinite loop.
1430 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1431 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1432 if ((LHSKnown.Zero & DemandedBits) ==
1433 (~RHSC->getAPIntValue() & DemandedBits))
1434 return TLO.CombineTo(Op, Op0);
1435
1436 // If any of the set bits in the RHS are known zero on the LHS, shrink
1437 // the constant.
1438 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1439 DemandedElts, TLO))
1440 return true;
1441
1442 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1443 // constant, but if this 'and' is only clearing bits that were just set by
1444 // the xor, then this 'and' can be eliminated by shrinking the mask of
1445 // the xor. For example, for a 32-bit X:
1446 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1447 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1448 LHSKnown.One == ~RHSC->getAPIntValue()) {
1449 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1450 return TLO.CombineTo(Op, Xor);
1451 }
1452 }
1453
1454 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1455 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1456 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1457 (Op0.getOperand(0).isUndef() ||
1459 Op0->hasOneUse()) {
1460 unsigned NumSubElts =
1462 unsigned SubIdx = Op0.getConstantOperandVal(2);
1463 APInt DemandedSub =
1464 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1465 KnownBits KnownSubMask =
1466 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1467 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1468 SDValue NewAnd =
1469 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1470 SDValue NewInsert =
1471 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1472 Op0.getOperand(1), Op0.getOperand(2));
1473 return TLO.CombineTo(Op, NewInsert);
1474 }
1475 }
1476
1477 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1478 Depth + 1))
1479 return true;
1480 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1481 Known2, TLO, Depth + 1))
1482 return true;
1483
1484 // If all of the demanded bits are known one on one side, return the other.
1485 // These bits cannot contribute to the result of the 'and'.
1486 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1487 return TLO.CombineTo(Op, Op0);
1488 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1489 return TLO.CombineTo(Op, Op1);
1490 // If all of the demanded bits in the inputs are known zeros, return zero.
1491 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1492 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1493 // If the RHS is a constant, see if we can simplify it.
1494 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1495 TLO))
1496 return true;
1497 // If the operation can be done in a smaller type, do so.
1498 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1499 return true;
1500
1501 // Attempt to avoid multi-use ops if we don't need anything from them.
1502 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1503 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1504 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1505 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1506 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1507 if (DemandedOp0 || DemandedOp1) {
1508 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1509 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1510 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1511 return TLO.CombineTo(Op, NewOp);
1512 }
1513 }
1514
1515 Known &= Known2;
1516 break;
1517 }
1518 case ISD::OR: {
1519 SDValue Op0 = Op.getOperand(0);
1520 SDValue Op1 = Op.getOperand(1);
1521 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1522 Depth + 1)) {
1523 Op->dropFlags(SDNodeFlags::Disjoint);
1524 return true;
1525 }
1526
1527 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1528 Known2, TLO, Depth + 1)) {
1529 Op->dropFlags(SDNodeFlags::Disjoint);
1530 return true;
1531 }
1532
1533 // If all of the demanded bits are known zero on one side, return the other.
1534 // These bits cannot contribute to the result of the 'or'.
1535 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1536 return TLO.CombineTo(Op, Op0);
1537 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1538 return TLO.CombineTo(Op, Op1);
1539 // If the RHS is a constant, see if we can simplify it.
1540 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1541 return true;
1542 // If the operation can be done in a smaller type, do so.
1543 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1544 return true;
1545
1546 // Attempt to avoid multi-use ops if we don't need anything from them.
1547 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1548 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1549 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1550 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1551 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1552 if (DemandedOp0 || DemandedOp1) {
1553 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1554 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1555 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1556 return TLO.CombineTo(Op, NewOp);
1557 }
1558 }
1559
1560 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1561 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1562 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1563 Op0->hasOneUse() && Op1->hasOneUse()) {
1564 // Attempt to match all commutations - m_c_Or would've been useful!
1565 for (int I = 0; I != 2; ++I) {
1566 SDValue X = Op.getOperand(I).getOperand(0);
1567 SDValue C1 = Op.getOperand(I).getOperand(1);
1568 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1569 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1570 if (Alt.getOpcode() == ISD::OR) {
1571 for (int J = 0; J != 2; ++J) {
1572 if (X == Alt.getOperand(J)) {
1573 SDValue Y = Alt.getOperand(1 - J);
1574 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1575 {C1, C2})) {
1576 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1577 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1578 return TLO.CombineTo(
1579 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1580 }
1581 }
1582 }
1583 }
1584 }
1585 }
1586
1587 Known |= Known2;
1588 break;
1589 }
1590 case ISD::XOR: {
1591 SDValue Op0 = Op.getOperand(0);
1592 SDValue Op1 = Op.getOperand(1);
1593
1594 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1595 Depth + 1))
1596 return true;
1597 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1598 Depth + 1))
1599 return true;
1600
1601 // If all of the demanded bits are known zero on one side, return the other.
1602 // These bits cannot contribute to the result of the 'xor'.
1603 if (DemandedBits.isSubsetOf(Known.Zero))
1604 return TLO.CombineTo(Op, Op0);
1605 if (DemandedBits.isSubsetOf(Known2.Zero))
1606 return TLO.CombineTo(Op, Op1);
1607 // If the operation can be done in a smaller type, do so.
1608 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1609 return true;
1610
1611 // If all of the unknown bits are known to be zero on one side or the other
1612 // turn this into an *inclusive* or.
1613 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1614 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1615 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1616
1617 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1618 if (C) {
1619 // If one side is a constant, and all of the set bits in the constant are
1620 // also known set on the other side, turn this into an AND, as we know
1621 // the bits will be cleared.
1622 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1623 // NB: it is okay if more bits are known than are requested
1624 if (C->getAPIntValue() == Known2.One) {
1625 SDValue ANDC =
1626 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1627 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1628 }
1629
1630 // If the RHS is a constant, see if we can change it. Don't alter a -1
1631 // constant because that's a 'not' op, and that is better for combining
1632 // and codegen.
1633 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1634 // We're flipping all demanded bits. Flip the undemanded bits too.
1635 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1636 return TLO.CombineTo(Op, New);
1637 }
1638
1639 unsigned Op0Opcode = Op0.getOpcode();
1640 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1641 if (ConstantSDNode *ShiftC =
1642 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1643 // Don't crash on an oversized shift. We can not guarantee that a
1644 // bogus shift has been simplified to undef.
1645 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1646 uint64_t ShiftAmt = ShiftC->getZExtValue();
1648 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1649 : Ones.lshr(ShiftAmt);
1650 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1651 isDesirableToCommuteXorWithShift(Op.getNode())) {
1652 // If the xor constant is a demanded mask, do a 'not' before the
1653 // shift:
1654 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1655 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1656 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1657 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1658 Op0.getOperand(1)));
1659 }
1660 }
1661 }
1662 }
1663 }
1664
1665 // If we can't turn this into a 'not', try to shrink the constant.
1666 if (!C || !C->isAllOnes())
1667 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1668 return true;
1669
1670 // Attempt to avoid multi-use ops if we don't need anything from them.
1671 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1672 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1673 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1674 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1675 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1676 if (DemandedOp0 || DemandedOp1) {
1677 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1678 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1679 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1680 return TLO.CombineTo(Op, NewOp);
1681 }
1682 }
1683
1684 Known ^= Known2;
1685 break;
1686 }
1687 case ISD::SELECT:
1688 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1689 Known, TLO, Depth + 1))
1690 return true;
1691 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1692 Known2, TLO, Depth + 1))
1693 return true;
1694
1695 // If the operands are constants, see if we can simplify them.
1696 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1697 return true;
1698
1699 // Only known if known in both the LHS and RHS.
1700 Known = Known.intersectWith(Known2);
1701 break;
1702 case ISD::VSELECT:
1703 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1704 Known, TLO, Depth + 1))
1705 return true;
1706 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1707 Known2, TLO, Depth + 1))
1708 return true;
1709
1710 // Only known if known in both the LHS and RHS.
1711 Known = Known.intersectWith(Known2);
1712 break;
1713 case ISD::SELECT_CC:
1714 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1715 Known, TLO, Depth + 1))
1716 return true;
1717 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1718 Known2, TLO, Depth + 1))
1719 return true;
1720
1721 // If the operands are constants, see if we can simplify them.
1722 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1723 return true;
1724
1725 // Only known if known in both the LHS and RHS.
1726 Known = Known.intersectWith(Known2);
1727 break;
1728 case ISD::SETCC: {
1729 SDValue Op0 = Op.getOperand(0);
1730 SDValue Op1 = Op.getOperand(1);
1731 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1732 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1733 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1734 // -1, we may be able to bypass the setcc.
1735 if (DemandedBits.isSignMask() &&
1739 // If we're testing X < 0, then this compare isn't needed - just use X!
1740 // FIXME: We're limiting to integer types here, but this should also work
1741 // if we don't care about FP signed-zero. The use of SETLT with FP means
1742 // that we don't care about NaNs.
1743 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1745 return TLO.CombineTo(Op, Op0);
1746
1747 // TODO: Should we check for other forms of sign-bit comparisons?
1748 // Examples: X <= -1, X >= 0
1749 }
1750 if (getBooleanContents(Op0.getValueType()) ==
1752 BitWidth > 1)
1753 Known.Zero.setBitsFrom(1);
1754 break;
1755 }
1756 case ISD::SHL: {
1757 SDValue Op0 = Op.getOperand(0);
1758 SDValue Op1 = Op.getOperand(1);
1759 EVT ShiftVT = Op1.getValueType();
1760
1761 if (std::optional<uint64_t> KnownSA =
1762 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1763 unsigned ShAmt = *KnownSA;
1764 if (ShAmt == 0)
1765 return TLO.CombineTo(Op, Op0);
1766
1767 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1768 // single shift. We can do this if the bottom bits (which are shifted
1769 // out) are never demanded.
1770 // TODO - support non-uniform vector amounts.
1771 if (Op0.getOpcode() == ISD::SRL) {
1772 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1773 if (std::optional<uint64_t> InnerSA =
1774 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1775 unsigned C1 = *InnerSA;
1776 unsigned Opc = ISD::SHL;
1777 int Diff = ShAmt - C1;
1778 if (Diff < 0) {
1779 Diff = -Diff;
1780 Opc = ISD::SRL;
1781 }
1782 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1783 return TLO.CombineTo(
1784 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1785 }
1786 }
1787 }
1788
1789 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1790 // are not demanded. This will likely allow the anyext to be folded away.
1791 // TODO - support non-uniform vector amounts.
1792 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1793 SDValue InnerOp = Op0.getOperand(0);
1794 EVT InnerVT = InnerOp.getValueType();
1795 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1796 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1797 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1798 SDValue NarrowShl = TLO.DAG.getNode(
1799 ISD::SHL, dl, InnerVT, InnerOp,
1800 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1801 return TLO.CombineTo(
1802 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1803 }
1804
1805 // Repeat the SHL optimization above in cases where an extension
1806 // intervenes: (shl (anyext (shr x, c1)), c2) to
1807 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1808 // aren't demanded (as above) and that the shifted upper c1 bits of
1809 // x aren't demanded.
1810 // TODO - support non-uniform vector amounts.
1811 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1812 InnerOp.hasOneUse()) {
1813 if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1814 InnerOp, DemandedElts, Depth + 2)) {
1815 unsigned InnerShAmt = *SA2;
1816 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1817 DemandedBits.getActiveBits() <=
1818 (InnerBits - InnerShAmt + ShAmt) &&
1819 DemandedBits.countr_zero() >= ShAmt) {
1820 SDValue NewSA =
1821 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1822 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1823 InnerOp.getOperand(0));
1824 return TLO.CombineTo(
1825 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1826 }
1827 }
1828 }
1829 }
1830
1831 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1832 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1833 Depth + 1)) {
1834 // Disable the nsw and nuw flags. We can no longer guarantee that we
1835 // won't wrap after simplification.
1836 Op->dropFlags(SDNodeFlags::NoWrap);
1837 return true;
1838 }
1839 Known.Zero <<= ShAmt;
1840 Known.One <<= ShAmt;
1841 // low bits known zero.
1842 Known.Zero.setLowBits(ShAmt);
1843
1844 // Attempt to avoid multi-use ops if we don't need anything from them.
1845 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1846 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1847 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1848 if (DemandedOp0) {
1849 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1850 return TLO.CombineTo(Op, NewOp);
1851 }
1852 }
1853
1854 // TODO: Can we merge this fold with the one below?
1855 // Try shrinking the operation as long as the shift amount will still be
1856 // in range.
1857 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1858 Op.getNode()->hasOneUse()) {
1859 // Search for the smallest integer type with free casts to and from
1860 // Op's type. For expedience, just check power-of-2 integer types.
1861 unsigned DemandedSize = DemandedBits.getActiveBits();
1862 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1863 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1864 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1865 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1866 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1867 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1868 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1869 assert(DemandedSize <= SmallVTBits &&
1870 "Narrowed below demanded bits?");
1871 // We found a type with free casts.
1872 SDValue NarrowShl = TLO.DAG.getNode(
1873 ISD::SHL, dl, SmallVT,
1874 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1875 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1876 return TLO.CombineTo(
1877 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1878 }
1879 }
1880 }
1881
1882 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1883 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1884 // Only do this if we demand the upper half so the knownbits are correct.
1885 unsigned HalfWidth = BitWidth / 2;
1886 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1887 DemandedBits.countLeadingOnes() >= HalfWidth) {
1888 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1889 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1890 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1891 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1892 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1893 // If we're demanding the upper bits at all, we must ensure
1894 // that the upper bits of the shift result are known to be zero,
1895 // which is equivalent to the narrow shift being NUW.
1896 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1897 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1898 SDNodeFlags Flags;
1899 Flags.setNoSignedWrap(IsNSW);
1900 Flags.setNoUnsignedWrap(IsNUW);
1901 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1902 SDValue NewShiftAmt =
1903 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1904 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1905 NewShiftAmt, Flags);
1906 SDValue NewExt =
1907 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1908 return TLO.CombineTo(Op, NewExt);
1909 }
1910 }
1911 }
1912 } else {
1913 // This is a variable shift, so we can't shift the demand mask by a known
1914 // amount. But if we are not demanding high bits, then we are not
1915 // demanding those bits from the pre-shifted operand either.
1916 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1917 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1918 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1919 Depth + 1)) {
1920 // Disable the nsw and nuw flags. We can no longer guarantee that we
1921 // won't wrap after simplification.
1922 Op->dropFlags(SDNodeFlags::NoWrap);
1923 return true;
1924 }
1925 Known.resetAll();
1926 }
1927 }
1928
1929 // If we are only demanding sign bits then we can use the shift source
1930 // directly.
1931 if (std::optional<uint64_t> MaxSA =
1932 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1933 unsigned ShAmt = *MaxSA;
1934 unsigned NumSignBits =
1935 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1936 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1937 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1938 return TLO.CombineTo(Op, Op0);
1939 }
1940 break;
1941 }
1942 case ISD::SRL: {
1943 SDValue Op0 = Op.getOperand(0);
1944 SDValue Op1 = Op.getOperand(1);
1945 EVT ShiftVT = Op1.getValueType();
1946
1947 if (std::optional<uint64_t> KnownSA =
1948 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1949 unsigned ShAmt = *KnownSA;
1950 if (ShAmt == 0)
1951 return TLO.CombineTo(Op, Op0);
1952
1953 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1954 // single shift. We can do this if the top bits (which are shifted out)
1955 // are never demanded.
1956 // TODO - support non-uniform vector amounts.
1957 if (Op0.getOpcode() == ISD::SHL) {
1958 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1959 if (std::optional<uint64_t> InnerSA =
1960 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1961 unsigned C1 = *InnerSA;
1962 unsigned Opc = ISD::SRL;
1963 int Diff = ShAmt - C1;
1964 if (Diff < 0) {
1965 Diff = -Diff;
1966 Opc = ISD::SHL;
1967 }
1968 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1969 return TLO.CombineTo(
1970 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1971 }
1972 }
1973 }
1974
1975 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1976 // single sra. We can do this if the top bits are never demanded.
1977 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1978 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1979 if (std::optional<uint64_t> InnerSA =
1980 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1981 unsigned C1 = *InnerSA;
1982 // Clamp the combined shift amount if it exceeds the bit width.
1983 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1984 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1985 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1986 Op0.getOperand(0), NewSA));
1987 }
1988 }
1989 }
1990
1991 APInt InDemandedMask = (DemandedBits << ShAmt);
1992
1993 // If the shift is exact, then it does demand the low bits (and knows that
1994 // they are zero).
1995 if (Op->getFlags().hasExact())
1996 InDemandedMask.setLowBits(ShAmt);
1997
1998 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1999 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2000 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2002 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2003 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2004 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2005 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2006 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2007 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2008 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2009 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2010 SDValue NewShiftAmt =
2011 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2012 SDValue NewShift =
2013 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2014 return TLO.CombineTo(
2015 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2016 }
2017 }
2018
2019 // Compute the new bits that are at the top now.
2020 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2021 Depth + 1))
2022 return true;
2023 Known.Zero.lshrInPlace(ShAmt);
2024 Known.One.lshrInPlace(ShAmt);
2025 // High bits known zero.
2026 Known.Zero.setHighBits(ShAmt);
2027
2028 // Attempt to avoid multi-use ops if we don't need anything from them.
2029 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2030 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2031 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2032 if (DemandedOp0) {
2033 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2034 return TLO.CombineTo(Op, NewOp);
2035 }
2036 }
2037 } else {
2038 // Use generic knownbits computation as it has support for non-uniform
2039 // shift amounts.
2040 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2041 }
2042
2043 // If we are only demanding sign bits then we can use the shift source
2044 // directly.
2045 if (std::optional<uint64_t> MaxSA =
2046 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2047 unsigned ShAmt = *MaxSA;
2048 // Must already be signbits in DemandedBits bounds, and can't demand any
2049 // shifted in zeroes.
2050 if (DemandedBits.countl_zero() >= ShAmt) {
2051 unsigned NumSignBits =
2052 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2053 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2054 return TLO.CombineTo(Op, Op0);
2055 }
2056 }
2057
2058 // Try to match AVG patterns (after shift simplification).
2059 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2060 DemandedElts, Depth + 1))
2061 return TLO.CombineTo(Op, AVG);
2062
2063 break;
2064 }
2065 case ISD::SRA: {
2066 SDValue Op0 = Op.getOperand(0);
2067 SDValue Op1 = Op.getOperand(1);
2068 EVT ShiftVT = Op1.getValueType();
2069
2070 // If we only want bits that already match the signbit then we don't need
2071 // to shift.
2072 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2073 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2074 NumHiDemandedBits)
2075 return TLO.CombineTo(Op, Op0);
2076
2077 // If this is an arithmetic shift right and only the low-bit is set, we can
2078 // always convert this into a logical shr, even if the shift amount is
2079 // variable. The low bit of the shift cannot be an input sign bit unless
2080 // the shift amount is >= the size of the datatype, which is undefined.
2081 if (DemandedBits.isOne())
2082 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2083
2084 if (std::optional<uint64_t> KnownSA =
2085 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2086 unsigned ShAmt = *KnownSA;
2087 if (ShAmt == 0)
2088 return TLO.CombineTo(Op, Op0);
2089
2090 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2091 // supports sext_inreg.
2092 if (Op0.getOpcode() == ISD::SHL) {
2093 if (std::optional<uint64_t> InnerSA =
2094 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2095 unsigned LowBits = BitWidth - ShAmt;
2096 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2097 if (VT.isVector())
2098 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2100
2101 if (*InnerSA == ShAmt) {
2102 if (!TLO.LegalOperations() ||
2104 return TLO.CombineTo(
2105 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2106 Op0.getOperand(0),
2107 TLO.DAG.getValueType(ExtVT)));
2108
2109 // Even if we can't convert to sext_inreg, we might be able to
2110 // remove this shift pair if the input is already sign extended.
2111 unsigned NumSignBits =
2112 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2113 if (NumSignBits > ShAmt)
2114 return TLO.CombineTo(Op, Op0.getOperand(0));
2115 }
2116 }
2117 }
2118
2119 APInt InDemandedMask = (DemandedBits << ShAmt);
2120
2121 // If the shift is exact, then it does demand the low bits (and knows that
2122 // they are zero).
2123 if (Op->getFlags().hasExact())
2124 InDemandedMask.setLowBits(ShAmt);
2125
2126 // If any of the demanded bits are produced by the sign extension, we also
2127 // demand the input sign bit.
2128 if (DemandedBits.countl_zero() < ShAmt)
2129 InDemandedMask.setSignBit();
2130
2131 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2132 Depth + 1))
2133 return true;
2134 Known.Zero.lshrInPlace(ShAmt);
2135 Known.One.lshrInPlace(ShAmt);
2136
2137 // If the input sign bit is known to be zero, or if none of the top bits
2138 // are demanded, turn this into an unsigned shift right.
2139 if (Known.Zero[BitWidth - ShAmt - 1] ||
2140 DemandedBits.countl_zero() >= ShAmt) {
2141 SDNodeFlags Flags;
2142 Flags.setExact(Op->getFlags().hasExact());
2143 return TLO.CombineTo(
2144 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2145 }
2146
2147 int Log2 = DemandedBits.exactLogBase2();
2148 if (Log2 >= 0) {
2149 // The bit must come from the sign.
2150 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2151 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2152 }
2153
2154 if (Known.One[BitWidth - ShAmt - 1])
2155 // New bits are known one.
2156 Known.One.setHighBits(ShAmt);
2157
2158 // Attempt to avoid multi-use ops if we don't need anything from them.
2159 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2160 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2161 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2162 if (DemandedOp0) {
2163 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2164 return TLO.CombineTo(Op, NewOp);
2165 }
2166 }
2167 }
2168
2169 // Try to match AVG patterns (after shift simplification).
2170 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2171 DemandedElts, Depth + 1))
2172 return TLO.CombineTo(Op, AVG);
2173
2174 break;
2175 }
2176 case ISD::FSHL:
2177 case ISD::FSHR: {
2178 SDValue Op0 = Op.getOperand(0);
2179 SDValue Op1 = Op.getOperand(1);
2180 SDValue Op2 = Op.getOperand(2);
2181 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2182
2183 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2184 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2185
2186 // For fshl, 0-shift returns the 1st arg.
2187 // For fshr, 0-shift returns the 2nd arg.
2188 if (Amt == 0) {
2189 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2190 Known, TLO, Depth + 1))
2191 return true;
2192 break;
2193 }
2194
2195 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2196 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2197 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2198 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2199 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2200 Depth + 1))
2201 return true;
2202 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2203 Depth + 1))
2204 return true;
2205
2206 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2207 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2208 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2209 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2210 Known = Known.unionWith(Known2);
2211
2212 // Attempt to avoid multi-use ops if we don't need anything from them.
2213 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2214 !DemandedElts.isAllOnes()) {
2215 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2216 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2217 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2218 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2219 if (DemandedOp0 || DemandedOp1) {
2220 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2221 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2222 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2223 DemandedOp1, Op2);
2224 return TLO.CombineTo(Op, NewOp);
2225 }
2226 }
2227 }
2228
2229 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2230 if (isPowerOf2_32(BitWidth)) {
2231 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2232 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2233 Known2, TLO, Depth + 1))
2234 return true;
2235 }
2236 break;
2237 }
2238 case ISD::ROTL:
2239 case ISD::ROTR: {
2240 SDValue Op0 = Op.getOperand(0);
2241 SDValue Op1 = Op.getOperand(1);
2242 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2243
2244 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2245 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2246 return TLO.CombineTo(Op, Op0);
2247
2248 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2249 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2250 unsigned RevAmt = BitWidth - Amt;
2251
2252 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2253 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2254 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2255 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2256 Depth + 1))
2257 return true;
2258
2259 // rot*(x, 0) --> x
2260 if (Amt == 0)
2261 return TLO.CombineTo(Op, Op0);
2262
2263 // See if we don't demand either half of the rotated bits.
2264 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2265 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2266 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2267 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2268 }
2269 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2270 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2271 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2272 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2273 }
2274 }
2275
2276 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2277 if (isPowerOf2_32(BitWidth)) {
2278 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2279 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2280 Depth + 1))
2281 return true;
2282 }
2283 break;
2284 }
2285 case ISD::SMIN:
2286 case ISD::SMAX:
2287 case ISD::UMIN:
2288 case ISD::UMAX: {
2289 unsigned Opc = Op.getOpcode();
2290 SDValue Op0 = Op.getOperand(0);
2291 SDValue Op1 = Op.getOperand(1);
2292
2293 // If we're only demanding signbits, then we can simplify to OR/AND node.
2294 unsigned BitOp =
2295 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2296 unsigned NumSignBits =
2297 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2298 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2299 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2300 if (NumSignBits >= NumDemandedUpperBits)
2301 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2302
2303 // Check if one arg is always less/greater than (or equal) to the other arg.
2304 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2305 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2306 switch (Opc) {
2307 case ISD::SMIN:
2308 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2309 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2310 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2311 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2312 Known = KnownBits::smin(Known0, Known1);
2313 break;
2314 case ISD::SMAX:
2315 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2316 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2317 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2318 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2319 Known = KnownBits::smax(Known0, Known1);
2320 break;
2321 case ISD::UMIN:
2322 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2323 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2324 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2325 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2326 Known = KnownBits::umin(Known0, Known1);
2327 break;
2328 case ISD::UMAX:
2329 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2330 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2331 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2332 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2333 Known = KnownBits::umax(Known0, Known1);
2334 break;
2335 }
2336 break;
2337 }
2338 case ISD::BITREVERSE: {
2339 SDValue Src = Op.getOperand(0);
2340 APInt DemandedSrcBits = DemandedBits.reverseBits();
2341 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2342 Depth + 1))
2343 return true;
2344 Known.One = Known2.One.reverseBits();
2345 Known.Zero = Known2.Zero.reverseBits();
2346 break;
2347 }
2348 case ISD::BSWAP: {
2349 SDValue Src = Op.getOperand(0);
2350
2351 // If the only bits demanded come from one byte of the bswap result,
2352 // just shift the input byte into position to eliminate the bswap.
2353 unsigned NLZ = DemandedBits.countl_zero();
2354 unsigned NTZ = DemandedBits.countr_zero();
2355
2356 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2357 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2358 // have 14 leading zeros, round to 8.
2359 NLZ = alignDown(NLZ, 8);
2360 NTZ = alignDown(NTZ, 8);
2361 // If we need exactly one byte, we can do this transformation.
2362 if (BitWidth - NLZ - NTZ == 8) {
2363 // Replace this with either a left or right shift to get the byte into
2364 // the right place.
2365 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2366 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2367 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2368 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2369 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2370 return TLO.CombineTo(Op, NewOp);
2371 }
2372 }
2373
2374 APInt DemandedSrcBits = DemandedBits.byteSwap();
2375 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2376 Depth + 1))
2377 return true;
2378 Known.One = Known2.One.byteSwap();
2379 Known.Zero = Known2.Zero.byteSwap();
2380 break;
2381 }
2382 case ISD::CTPOP: {
2383 // If only 1 bit is demanded, replace with PARITY as long as we're before
2384 // op legalization.
2385 // FIXME: Limit to scalars for now.
2386 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2387 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2388 Op.getOperand(0)));
2389
2390 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2391 break;
2392 }
2394 SDValue Op0 = Op.getOperand(0);
2395 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2396 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2397
2398 // If we only care about the highest bit, don't bother shifting right.
2399 if (DemandedBits.isSignMask()) {
2400 unsigned MinSignedBits =
2401 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2402 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2403 // However if the input is already sign extended we expect the sign
2404 // extension to be dropped altogether later and do not simplify.
2405 if (!AlreadySignExtended) {
2406 // Compute the correct shift amount type, which must be getShiftAmountTy
2407 // for scalar types after legalization.
2408 SDValue ShiftAmt =
2409 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2410 return TLO.CombineTo(Op,
2411 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2412 }
2413 }
2414
2415 // If none of the extended bits are demanded, eliminate the sextinreg.
2416 if (DemandedBits.getActiveBits() <= ExVTBits)
2417 return TLO.CombineTo(Op, Op0);
2418
2419 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2420
2421 // Since the sign extended bits are demanded, we know that the sign
2422 // bit is demanded.
2423 InputDemandedBits.setBit(ExVTBits - 1);
2424
2425 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2426 Depth + 1))
2427 return true;
2428
2429 // If the sign bit of the input is known set or clear, then we know the
2430 // top bits of the result.
2431
2432 // If the input sign bit is known zero, convert this into a zero extension.
2433 if (Known.Zero[ExVTBits - 1])
2434 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2435
2436 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2437 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2438 Known.One.setBitsFrom(ExVTBits);
2439 Known.Zero &= Mask;
2440 } else { // Input sign bit unknown
2441 Known.Zero &= Mask;
2442 Known.One &= Mask;
2443 }
2444 break;
2445 }
2446 case ISD::BUILD_PAIR: {
2447 EVT HalfVT = Op.getOperand(0).getValueType();
2448 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2449
2450 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2451 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2452
2453 KnownBits KnownLo, KnownHi;
2454
2455 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2456 return true;
2457
2458 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2459 return true;
2460
2461 Known = KnownHi.concat(KnownLo);
2462 break;
2463 }
2465 if (VT.isScalableVector())
2466 return false;
2467 [[fallthrough]];
2468 case ISD::ZERO_EXTEND: {
2469 SDValue Src = Op.getOperand(0);
2470 EVT SrcVT = Src.getValueType();
2471 unsigned InBits = SrcVT.getScalarSizeInBits();
2472 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2473 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2474
2475 // If none of the top bits are demanded, convert this into an any_extend.
2476 if (DemandedBits.getActiveBits() <= InBits) {
2477 // If we only need the non-extended bits of the bottom element
2478 // then we can just bitcast to the result.
2479 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2480 VT.getSizeInBits() == SrcVT.getSizeInBits())
2481 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2482
2483 unsigned Opc =
2485 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2486 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2487 }
2488
2489 APInt InDemandedBits = DemandedBits.trunc(InBits);
2490 APInt InDemandedElts = DemandedElts.zext(InElts);
2491 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2492 Depth + 1)) {
2493 Op->dropFlags(SDNodeFlags::NonNeg);
2494 return true;
2495 }
2496 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2497 Known = Known.zext(BitWidth);
2498
2499 // Attempt to avoid multi-use ops if we don't need anything from them.
2500 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2501 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2502 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2503 break;
2504 }
2506 if (VT.isScalableVector())
2507 return false;
2508 [[fallthrough]];
2509 case ISD::SIGN_EXTEND: {
2510 SDValue Src = Op.getOperand(0);
2511 EVT SrcVT = Src.getValueType();
2512 unsigned InBits = SrcVT.getScalarSizeInBits();
2513 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2514 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2515
2516 APInt InDemandedElts = DemandedElts.zext(InElts);
2517 APInt InDemandedBits = DemandedBits.trunc(InBits);
2518
2519 // Since some of the sign extended bits are demanded, we know that the sign
2520 // bit is demanded.
2521 InDemandedBits.setBit(InBits - 1);
2522
2523 // If none of the top bits are demanded, convert this into an any_extend.
2524 if (DemandedBits.getActiveBits() <= InBits) {
2525 // If we only need the non-extended bits of the bottom element
2526 // then we can just bitcast to the result.
2527 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2528 VT.getSizeInBits() == SrcVT.getSizeInBits())
2529 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2530
2531 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2533 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2534 InBits) {
2535 unsigned Opc =
2537 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2538 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2539 }
2540 }
2541
2542 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2543 Depth + 1))
2544 return true;
2545 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2546
2547 // If the sign bit is known one, the top bits match.
2548 Known = Known.sext(BitWidth);
2549
2550 // If the sign bit is known zero, convert this to a zero extend.
2551 if (Known.isNonNegative()) {
2552 unsigned Opc =
2554 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2555 SDNodeFlags Flags;
2556 if (!IsVecInReg)
2557 Flags |= SDNodeFlags::NonNeg;
2558 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2559 }
2560 }
2561
2562 // Attempt to avoid multi-use ops if we don't need anything from them.
2563 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2564 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2565 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2566 break;
2567 }
2569 if (VT.isScalableVector())
2570 return false;
2571 [[fallthrough]];
2572 case ISD::ANY_EXTEND: {
2573 SDValue Src = Op.getOperand(0);
2574 EVT SrcVT = Src.getValueType();
2575 unsigned InBits = SrcVT.getScalarSizeInBits();
2576 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2577 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2578
2579 // If we only need the bottom element then we can just bitcast.
2580 // TODO: Handle ANY_EXTEND?
2581 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2582 VT.getSizeInBits() == SrcVT.getSizeInBits())
2583 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2584
2585 APInt InDemandedBits = DemandedBits.trunc(InBits);
2586 APInt InDemandedElts = DemandedElts.zext(InElts);
2587 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2588 Depth + 1))
2589 return true;
2590 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2591 Known = Known.anyext(BitWidth);
2592
2593 // Attempt to avoid multi-use ops if we don't need anything from them.
2594 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2595 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2596 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2597 break;
2598 }
2599 case ISD::TRUNCATE: {
2600 SDValue Src = Op.getOperand(0);
2601
2602 // Simplify the input, using demanded bit information, and compute the known
2603 // zero/one bits live out.
2604 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2605 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2606 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2607 Depth + 1)) {
2608 // Disable the nsw and nuw flags. We can no longer guarantee that we
2609 // won't wrap after simplification.
2610 Op->dropFlags(SDNodeFlags::NoWrap);
2611 return true;
2612 }
2613 Known = Known.trunc(BitWidth);
2614
2615 // Attempt to avoid multi-use ops if we don't need anything from them.
2616 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2617 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2618 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2619
2620 // If the input is only used by this truncate, see if we can shrink it based
2621 // on the known demanded bits.
2622 switch (Src.getOpcode()) {
2623 default:
2624 break;
2625 case ISD::SRL:
2626 // Shrink SRL by a constant if none of the high bits shifted in are
2627 // demanded.
2628 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2629 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2630 // undesirable.
2631 break;
2632
2633 if (Src.getNode()->hasOneUse()) {
2634 if (isTruncateFree(Src, VT) &&
2635 !isTruncateFree(Src.getValueType(), VT)) {
2636 // If truncate is only free at trunc(srl), do not turn it into
2637 // srl(trunc). The check is done by first check the truncate is free
2638 // at Src's opcode(srl), then check the truncate is not done by
2639 // referencing sub-register. In test, if both trunc(srl) and
2640 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2641 // trunc(srl)'s trunc is free, trunc(srl) is better.
2642 break;
2643 }
2644
2645 std::optional<uint64_t> ShAmtC =
2646 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2647 if (!ShAmtC || *ShAmtC >= BitWidth)
2648 break;
2649 uint64_t ShVal = *ShAmtC;
2650
2651 APInt HighBits =
2652 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2653 HighBits.lshrInPlace(ShVal);
2654 HighBits = HighBits.trunc(BitWidth);
2655 if (!(HighBits & DemandedBits)) {
2656 // None of the shifted in bits are needed. Add a truncate of the
2657 // shift input, then shift it.
2658 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2659 SDValue NewTrunc =
2660 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2661 return TLO.CombineTo(
2662 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2663 }
2664 }
2665 break;
2666 }
2667
2668 break;
2669 }
2670 case ISD::AssertZext: {
2671 // AssertZext demands all of the high bits, plus any of the low bits
2672 // demanded by its users.
2673 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2675 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2676 TLO, Depth + 1))
2677 return true;
2678
2679 Known.Zero |= ~InMask;
2680 Known.One &= (~Known.Zero);
2681 break;
2682 }
2684 SDValue Src = Op.getOperand(0);
2685 SDValue Idx = Op.getOperand(1);
2686 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2687 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2688
2689 if (SrcEltCnt.isScalable())
2690 return false;
2691
2692 // Demand the bits from every vector element without a constant index.
2693 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2694 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2695 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2696 if (CIdx->getAPIntValue().ult(NumSrcElts))
2697 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2698
2699 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2700 // anything about the extended bits.
2701 APInt DemandedSrcBits = DemandedBits;
2702 if (BitWidth > EltBitWidth)
2703 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2704
2705 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2706 Depth + 1))
2707 return true;
2708
2709 // Attempt to avoid multi-use ops if we don't need anything from them.
2710 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2711 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2712 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2713 SDValue NewOp =
2714 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2715 return TLO.CombineTo(Op, NewOp);
2716 }
2717 }
2718
2719 Known = Known2;
2720 if (BitWidth > EltBitWidth)
2721 Known = Known.anyext(BitWidth);
2722 break;
2723 }
2724 case ISD::BITCAST: {
2725 if (VT.isScalableVector())
2726 return false;
2727 SDValue Src = Op.getOperand(0);
2728 EVT SrcVT = Src.getValueType();
2729 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2730
2731 // If this is an FP->Int bitcast and if the sign bit is the only
2732 // thing demanded, turn this into a FGETSIGN.
2733 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2734 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2735 SrcVT.isFloatingPoint()) {
2736 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2737 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2738 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2739 SrcVT != MVT::f128) {
2740 // Cannot eliminate/lower SHL for f128 yet.
2741 EVT Ty = OpVTLegal ? VT : MVT::i32;
2742 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2743 // place. We expect the SHL to be eliminated by other optimizations.
2744 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2745 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2746 if (!OpVTLegal && OpVTSizeInBits > 32)
2747 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2748 unsigned ShVal = Op.getValueSizeInBits() - 1;
2749 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2750 return TLO.CombineTo(Op,
2751 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2752 }
2753 }
2754
2755 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2756 // Demand the elt/bit if any of the original elts/bits are demanded.
2757 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2758 unsigned Scale = BitWidth / NumSrcEltBits;
2759 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2760 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2761 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2762 for (unsigned i = 0; i != Scale; ++i) {
2763 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2764 unsigned BitOffset = EltOffset * NumSrcEltBits;
2765 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2766 if (!Sub.isZero()) {
2767 DemandedSrcBits |= Sub;
2768 for (unsigned j = 0; j != NumElts; ++j)
2769 if (DemandedElts[j])
2770 DemandedSrcElts.setBit((j * Scale) + i);
2771 }
2772 }
2773
2774 APInt KnownSrcUndef, KnownSrcZero;
2775 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2776 KnownSrcZero, TLO, Depth + 1))
2777 return true;
2778
2779 KnownBits KnownSrcBits;
2780 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2781 KnownSrcBits, TLO, Depth + 1))
2782 return true;
2783 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2784 // TODO - bigendian once we have test coverage.
2785 unsigned Scale = NumSrcEltBits / BitWidth;
2786 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2787 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2788 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2789 for (unsigned i = 0; i != NumElts; ++i)
2790 if (DemandedElts[i]) {
2791 unsigned Offset = (i % Scale) * BitWidth;
2792 DemandedSrcBits.insertBits(DemandedBits, Offset);
2793 DemandedSrcElts.setBit(i / Scale);
2794 }
2795
2796 if (SrcVT.isVector()) {
2797 APInt KnownSrcUndef, KnownSrcZero;
2798 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2799 KnownSrcZero, TLO, Depth + 1))
2800 return true;
2801 }
2802
2803 KnownBits KnownSrcBits;
2804 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2805 KnownSrcBits, TLO, Depth + 1))
2806 return true;
2807
2808 // Attempt to avoid multi-use ops if we don't need anything from them.
2809 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2810 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2811 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2812 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2813 return TLO.CombineTo(Op, NewOp);
2814 }
2815 }
2816 }
2817
2818 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2819 // recursive call where Known may be useful to the caller.
2820 if (Depth > 0) {
2821 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2822 return false;
2823 }
2824 break;
2825 }
2826 case ISD::MUL:
2827 if (DemandedBits.isPowerOf2()) {
2828 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2829 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2830 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2831 unsigned CTZ = DemandedBits.countr_zero();
2832 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2833 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2834 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2835 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2836 return TLO.CombineTo(Op, Shl);
2837 }
2838 }
2839 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2840 // X * X is odd iff X is odd.
2841 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2842 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2843 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2844 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2845 return TLO.CombineTo(Op, And1);
2846 }
2847 [[fallthrough]];
2848 case ISD::ADD:
2849 case ISD::SUB: {
2850 // Add, Sub, and Mul don't demand any bits in positions beyond that
2851 // of the highest bit demanded of them.
2852 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2853 SDNodeFlags Flags = Op.getNode()->getFlags();
2854 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2855 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2856 KnownBits KnownOp0, KnownOp1;
2857 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2858 const KnownBits &KnownRHS) {
2859 if (Op.getOpcode() == ISD::MUL)
2860 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2861 return Demanded;
2862 };
2863 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2864 Depth + 1) ||
2865 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2866 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2867 // See if the operation should be performed at a smaller bit width.
2868 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2869 // Disable the nsw and nuw flags. We can no longer guarantee that we
2870 // won't wrap after simplification.
2871 Op->dropFlags(SDNodeFlags::NoWrap);
2872 return true;
2873 }
2874
2875 // neg x with only low bit demanded is simply x.
2876 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2877 isNullConstant(Op0))
2878 return TLO.CombineTo(Op, Op1);
2879
2880 // Attempt to avoid multi-use ops if we don't need anything from them.
2881 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2882 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2883 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2884 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2885 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2886 if (DemandedOp0 || DemandedOp1) {
2887 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2888 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2889 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2890 Flags & ~SDNodeFlags::NoWrap);
2891 return TLO.CombineTo(Op, NewOp);
2892 }
2893 }
2894
2895 // If we have a constant operand, we may be able to turn it into -1 if we
2896 // do not demand the high bits. This can make the constant smaller to
2897 // encode, allow more general folding, or match specialized instruction
2898 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2899 // is probably not useful (and could be detrimental).
2901 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2902 if (C && !C->isAllOnes() && !C->isOne() &&
2903 (C->getAPIntValue() | HighMask).isAllOnes()) {
2904 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2905 // Disable the nsw and nuw flags. We can no longer guarantee that we
2906 // won't wrap after simplification.
2907 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2908 Flags & ~SDNodeFlags::NoWrap);
2909 return TLO.CombineTo(Op, NewOp);
2910 }
2911
2912 // Match a multiply with a disguised negated-power-of-2 and convert to a
2913 // an equivalent shift-left amount.
2914 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2915 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2916 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2917 return 0;
2918
2919 // Don't touch opaque constants. Also, ignore zero and power-of-2
2920 // multiplies. Those will get folded later.
2921 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2922 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2923 !MulC->getAPIntValue().isPowerOf2()) {
2924 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2925 if (UnmaskedC.isNegatedPowerOf2())
2926 return (-UnmaskedC).logBase2();
2927 }
2928 return 0;
2929 };
2930
2931 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2932 unsigned ShlAmt) {
2933 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2934 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2935 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2936 return TLO.CombineTo(Op, Res);
2937 };
2938
2940 if (Op.getOpcode() == ISD::ADD) {
2941 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2942 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2943 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2944 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2945 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2946 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2947 }
2948 if (Op.getOpcode() == ISD::SUB) {
2949 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2950 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2951 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2952 }
2953 }
2954
2955 if (Op.getOpcode() == ISD::MUL) {
2956 Known = KnownBits::mul(KnownOp0, KnownOp1);
2957 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2959 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2960 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2961 }
2962 break;
2963 }
2964 default:
2965 // We also ask the target about intrinsics (which could be specific to it).
2966 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2967 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2968 // TODO: Probably okay to remove after audit; here to reduce change size
2969 // in initial enablement patch for scalable vectors
2970 if (Op.getValueType().isScalableVector())
2971 break;
2972 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2973 Known, TLO, Depth))
2974 return true;
2975 break;
2976 }
2977
2978 // Just use computeKnownBits to compute output bits.
2979 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2980 break;
2981 }
2982
2983 // If we know the value of all of the demanded bits, return this as a
2984 // constant.
2985 if (!isTargetCanonicalConstantNode(Op) &&
2986 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2987 // Avoid folding to a constant if any OpaqueConstant is involved.
2988 if (llvm::any_of(Op->ops(), [](SDValue V) {
2989 auto *C = dyn_cast<ConstantSDNode>(V);
2990 return C && C->isOpaque();
2991 }))
2992 return false;
2993 if (VT.isInteger())
2994 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2995 if (VT.isFloatingPoint())
2996 return TLO.CombineTo(
2997 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2998 dl, VT));
2999 }
3000
3001 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3002 // Try again just for the original demanded elts.
3003 // Ensure we do this AFTER constant folding above.
3004 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3005 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3006
3007 return false;
3008}
3009
3011 const APInt &DemandedElts,
3012 DAGCombinerInfo &DCI) const {
3013 SelectionDAG &DAG = DCI.DAG;
3014 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3015 !DCI.isBeforeLegalizeOps());
3016
3017 APInt KnownUndef, KnownZero;
3018 bool Simplified =
3019 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3020 if (Simplified) {
3021 DCI.AddToWorklist(Op.getNode());
3022 DCI.CommitTargetLoweringOpt(TLO);
3023 }
3024
3025 return Simplified;
3026}
3027
3028/// Given a vector binary operation and known undefined elements for each input
3029/// operand, compute whether each element of the output is undefined.
3031 const APInt &UndefOp0,
3032 const APInt &UndefOp1) {
3033 EVT VT = BO.getValueType();
3035 "Vector binop only");
3036
3037 EVT EltVT = VT.getVectorElementType();
3038 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3039 assert(UndefOp0.getBitWidth() == NumElts &&
3040 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3041
3042 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3043 const APInt &UndefVals) {
3044 if (UndefVals[Index])
3045 return DAG.getUNDEF(EltVT);
3046
3047 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3048 // Try hard to make sure that the getNode() call is not creating temporary
3049 // nodes. Ignore opaque integers because they do not constant fold.
3050 SDValue Elt = BV->getOperand(Index);
3051 auto *C = dyn_cast<ConstantSDNode>(Elt);
3052 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3053 return Elt;
3054 }
3055
3056 return SDValue();
3057 };
3058
3059 APInt KnownUndef = APInt::getZero(NumElts);
3060 for (unsigned i = 0; i != NumElts; ++i) {
3061 // If both inputs for this element are either constant or undef and match
3062 // the element type, compute the constant/undef result for this element of
3063 // the vector.
3064 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3065 // not handle FP constants. The code within getNode() should be refactored
3066 // to avoid the danger of creating a bogus temporary node here.
3067 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3068 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3069 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3070 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3071 KnownUndef.setBit(i);
3072 }
3073 return KnownUndef;
3074}
3075
3077 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3078 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3079 bool AssumeSingleUse) const {
3080 EVT VT = Op.getValueType();
3081 unsigned Opcode = Op.getOpcode();
3082 APInt DemandedElts = OriginalDemandedElts;
3083 unsigned NumElts = DemandedElts.getBitWidth();
3084 assert(VT.isVector() && "Expected vector op");
3085
3086 KnownUndef = KnownZero = APInt::getZero(NumElts);
3087
3088 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3089 return false;
3090
3091 // TODO: For now we assume we know nothing about scalable vectors.
3092 if (VT.isScalableVector())
3093 return false;
3094
3095 assert(VT.getVectorNumElements() == NumElts &&
3096 "Mask size mismatches value type element count!");
3097
3098 // Undef operand.
3099 if (Op.isUndef()) {
3100 KnownUndef.setAllBits();
3101 return false;
3102 }
3103
3104 // If Op has other users, assume that all elements are needed.
3105 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3106 DemandedElts.setAllBits();
3107
3108 // Not demanding any elements from Op.
3109 if (DemandedElts == 0) {
3110 KnownUndef.setAllBits();
3111 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3112 }
3113
3114 // Limit search depth.
3116 return false;
3117
3118 SDLoc DL(Op);
3119 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3120 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3121
3122 // Helper for demanding the specified elements and all the bits of both binary
3123 // operands.
3124 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3125 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3126 TLO.DAG, Depth + 1);
3127 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3128 TLO.DAG, Depth + 1);
3129 if (NewOp0 || NewOp1) {
3130 SDValue NewOp =
3131 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3132 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3133 return TLO.CombineTo(Op, NewOp);
3134 }
3135 return false;
3136 };
3137
3138 switch (Opcode) {
3139 case ISD::SCALAR_TO_VECTOR: {
3140 if (!DemandedElts[0]) {
3141 KnownUndef.setAllBits();
3142 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3143 }
3144 SDValue ScalarSrc = Op.getOperand(0);
3145 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3146 SDValue Src = ScalarSrc.getOperand(0);
3147 SDValue Idx = ScalarSrc.getOperand(1);
3148 EVT SrcVT = Src.getValueType();
3149
3150 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3151
3152 if (SrcEltCnt.isScalable())
3153 return false;
3154
3155 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3156 if (isNullConstant(Idx)) {
3157 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3158 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3159 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3160 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3161 TLO, Depth + 1))
3162 return true;
3163 }
3164 }
3165 KnownUndef.setHighBits(NumElts - 1);
3166 break;
3167 }
3168 case ISD::BITCAST: {
3169 SDValue Src = Op.getOperand(0);
3170 EVT SrcVT = Src.getValueType();
3171
3172 // We only handle vectors here.
3173 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3174 if (!SrcVT.isVector())
3175 break;
3176
3177 // Fast handling of 'identity' bitcasts.
3178 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3179 if (NumSrcElts == NumElts)
3180 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3181 KnownZero, TLO, Depth + 1);
3182
3183 APInt SrcDemandedElts, SrcZero, SrcUndef;
3184
3185 // Bitcast from 'large element' src vector to 'small element' vector, we
3186 // must demand a source element if any DemandedElt maps to it.
3187 if ((NumElts % NumSrcElts) == 0) {
3188 unsigned Scale = NumElts / NumSrcElts;
3189 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3190 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3191 TLO, Depth + 1))
3192 return true;
3193
3194 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3195 // of the large element.
3196 // TODO - bigendian once we have test coverage.
3197 if (IsLE) {
3198 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3199 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3200 for (unsigned i = 0; i != NumElts; ++i)
3201 if (DemandedElts[i]) {
3202 unsigned Ofs = (i % Scale) * EltSizeInBits;
3203 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3204 }
3205
3206 KnownBits Known;
3207 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3208 TLO, Depth + 1))
3209 return true;
3210
3211 // The bitcast has split each wide element into a number of
3212 // narrow subelements. We have just computed the Known bits
3213 // for wide elements. See if element splitting results in
3214 // some subelements being zero. Only for demanded elements!
3215 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3216 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3217 .isAllOnes())
3218 continue;
3219 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3220 unsigned Elt = Scale * SrcElt + SubElt;
3221 if (DemandedElts[Elt])
3222 KnownZero.setBit(Elt);
3223 }
3224 }
3225 }
3226
3227 // If the src element is zero/undef then all the output elements will be -
3228 // only demanded elements are guaranteed to be correct.
3229 for (unsigned i = 0; i != NumSrcElts; ++i) {
3230 if (SrcDemandedElts[i]) {
3231 if (SrcZero[i])
3232 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3233 if (SrcUndef[i])
3234 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3235 }
3236 }
3237 }
3238
3239 // Bitcast from 'small element' src vector to 'large element' vector, we
3240 // demand all smaller source elements covered by the larger demanded element
3241 // of this vector.
3242 if ((NumSrcElts % NumElts) == 0) {
3243 unsigned Scale = NumSrcElts / NumElts;
3244 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3245 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3246 TLO, Depth + 1))
3247 return true;
3248
3249 // If all the src elements covering an output element are zero/undef, then
3250 // the output element will be as well, assuming it was demanded.
3251 for (unsigned i = 0; i != NumElts; ++i) {
3252 if (DemandedElts[i]) {
3253 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3254 KnownZero.setBit(i);
3255 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3256 KnownUndef.setBit(i);
3257 }
3258 }
3259 }
3260 break;
3261 }
3262 case ISD::FREEZE: {
3263 SDValue N0 = Op.getOperand(0);
3264 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3265 /*PoisonOnly=*/false))
3266 return TLO.CombineTo(Op, N0);
3267
3268 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3269 // freeze(op(x, ...)) -> op(freeze(x), ...).
3270 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3271 return TLO.CombineTo(
3273 TLO.DAG.getFreeze(N0.getOperand(0))));
3274 break;
3275 }
3276 case ISD::BUILD_VECTOR: {
3277 // Check all elements and simplify any unused elements with UNDEF.
3278 if (!DemandedElts.isAllOnes()) {
3279 // Don't simplify BROADCASTS.
3280 if (llvm::any_of(Op->op_values(),
3281 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3282 SmallVector<SDValue, 32> Ops(Op->ops());
3283 bool Updated = false;
3284 for (unsigned i = 0; i != NumElts; ++i) {
3285 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3286 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3287 KnownUndef.setBit(i);
3288 Updated = true;
3289 }
3290 }
3291 if (Updated)
3292 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3293 }
3294 }
3295 for (unsigned i = 0; i != NumElts; ++i) {
3296 SDValue SrcOp = Op.getOperand(i);
3297 if (SrcOp.isUndef()) {
3298 KnownUndef.setBit(i);
3299 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3301 KnownZero.setBit(i);
3302 }
3303 }
3304 break;
3305 }
3306 case ISD::CONCAT_VECTORS: {
3307 EVT SubVT = Op.getOperand(0).getValueType();
3308 unsigned NumSubVecs = Op.getNumOperands();
3309 unsigned NumSubElts = SubVT.getVectorNumElements();
3310 for (unsigned i = 0; i != NumSubVecs; ++i) {
3311 SDValue SubOp = Op.getOperand(i);
3312 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3313 APInt SubUndef, SubZero;
3314 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3315 Depth + 1))
3316 return true;
3317 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3318 KnownZero.insertBits(SubZero, i * NumSubElts);
3319 }
3320
3321 // Attempt to avoid multi-use ops if we don't need anything from them.
3322 if (!DemandedElts.isAllOnes()) {
3323 bool FoundNewSub = false;
3324 SmallVector<SDValue, 2> DemandedSubOps;
3325 for (unsigned i = 0; i != NumSubVecs; ++i) {
3326 SDValue SubOp = Op.getOperand(i);
3327 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3328 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3329 SubOp, SubElts, TLO.DAG, Depth + 1);
3330 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3331 FoundNewSub = NewSubOp ? true : FoundNewSub;
3332 }
3333 if (FoundNewSub) {
3334 SDValue NewOp =
3335 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3336 return TLO.CombineTo(Op, NewOp);
3337 }
3338 }
3339 break;
3340 }
3341 case ISD::INSERT_SUBVECTOR: {
3342 // Demand any elements from the subvector and the remainder from the src its
3343 // inserted into.
3344 SDValue Src = Op.getOperand(0);
3345 SDValue Sub = Op.getOperand(1);
3346 uint64_t Idx = Op.getConstantOperandVal(2);
3347 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3348 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3349 APInt DemandedSrcElts = DemandedElts;
3350 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3351
3352 APInt SubUndef, SubZero;
3353 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3354 Depth + 1))
3355 return true;
3356
3357 // If none of the src operand elements are demanded, replace it with undef.
3358 if (!DemandedSrcElts && !Src.isUndef())
3359 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3360 TLO.DAG.getUNDEF(VT), Sub,
3361 Op.getOperand(2)));
3362
3363 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3364 TLO, Depth + 1))
3365 return true;
3366 KnownUndef.insertBits(SubUndef, Idx);
3367 KnownZero.insertBits(SubZero, Idx);
3368
3369 // Attempt to avoid multi-use ops if we don't need anything from them.
3370 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3371 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3372 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3373 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3374 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3375 if (NewSrc || NewSub) {
3376 NewSrc = NewSrc ? NewSrc : Src;
3377 NewSub = NewSub ? NewSub : Sub;
3378 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3379 NewSub, Op.getOperand(2));
3380 return TLO.CombineTo(Op, NewOp);
3381 }
3382 }
3383 break;
3384 }
3386 // Offset the demanded elts by the subvector index.
3387 SDValue Src = Op.getOperand(0);
3388 if (Src.getValueType().isScalableVector())
3389 break;
3390 uint64_t Idx = Op.getConstantOperandVal(1);
3391 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3392 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3393
3394 APInt SrcUndef, SrcZero;
3395 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3396 Depth + 1))
3397 return true;
3398 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3399 KnownZero = SrcZero.extractBits(NumElts, Idx);
3400
3401 // Attempt to avoid multi-use ops if we don't need anything from them.
3402 if (!DemandedElts.isAllOnes()) {
3403 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3404 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3405 if (NewSrc) {
3406 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3407 Op.getOperand(1));
3408 return TLO.CombineTo(Op, NewOp);
3409 }
3410 }
3411 break;
3412 }
3414 SDValue Vec = Op.getOperand(0);
3415 SDValue Scl = Op.getOperand(1);
3416 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3417
3418 // For a legal, constant insertion index, if we don't need this insertion
3419 // then strip it, else remove it from the demanded elts.
3420 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3421 unsigned Idx = CIdx->getZExtValue();
3422 if (!DemandedElts[Idx])
3423 return TLO.CombineTo(Op, Vec);
3424
3425 APInt DemandedVecElts(DemandedElts);
3426 DemandedVecElts.clearBit(Idx);
3427 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3428 KnownZero, TLO, Depth + 1))
3429 return true;
3430
3431 KnownUndef.setBitVal(Idx, Scl.isUndef());
3432
3433 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3434 break;
3435 }
3436
3437 APInt VecUndef, VecZero;
3438 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3439 Depth + 1))
3440 return true;
3441 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3442 break;
3443 }
3444 case ISD::VSELECT: {
3445 SDValue Sel = Op.getOperand(0);
3446 SDValue LHS = Op.getOperand(1);
3447 SDValue RHS = Op.getOperand(2);
3448
3449 // Try to transform the select condition based on the current demanded
3450 // elements.
3451 APInt UndefSel, ZeroSel;
3452 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3453 Depth + 1))
3454 return true;
3455
3456 // See if we can simplify either vselect operand.
3457 APInt DemandedLHS(DemandedElts);
3458 APInt DemandedRHS(DemandedElts);
3459 APInt UndefLHS, ZeroLHS;
3460 APInt UndefRHS, ZeroRHS;
3461 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3462 Depth + 1))
3463 return true;
3464 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3465 Depth + 1))
3466 return true;
3467
3468 KnownUndef = UndefLHS & UndefRHS;
3469 KnownZero = ZeroLHS & ZeroRHS;
3470
3471 // If we know that the selected element is always zero, we don't need the
3472 // select value element.
3473 APInt DemandedSel = DemandedElts & ~KnownZero;
3474 if (DemandedSel != DemandedElts)
3475 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3476 Depth + 1))
3477 return true;
3478
3479 break;
3480 }
3481 case ISD::VECTOR_SHUFFLE: {
3482 SDValue LHS = Op.getOperand(0);
3483 SDValue RHS = Op.getOperand(1);
3484 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3485
3486 // Collect demanded elements from shuffle operands..
3487 APInt DemandedLHS(NumElts, 0);
3488 APInt DemandedRHS(NumElts, 0);
3489 for (unsigned i = 0; i != NumElts; ++i) {
3490 int M = ShuffleMask[i];
3491 if (M < 0 || !DemandedElts[i])
3492 continue;
3493 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3494 if (M < (int)NumElts)
3495 DemandedLHS.setBit(M);
3496 else
3497 DemandedRHS.setBit(M - NumElts);
3498 }
3499
3500 // See if we can simplify either shuffle operand.
3501 APInt UndefLHS, ZeroLHS;
3502 APInt UndefRHS, ZeroRHS;
3503 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3504 Depth + 1))
3505 return true;
3506 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3507 Depth + 1))
3508 return true;
3509
3510 // Simplify mask using undef elements from LHS/RHS.
3511 bool Updated = false;
3512 bool IdentityLHS = true, IdentityRHS = true;
3513 SmallVector<int, 32> NewMask(ShuffleMask);
3514 for (unsigned i = 0; i != NumElts; ++i) {
3515 int &M = NewMask[i];
3516 if (M < 0)
3517 continue;
3518 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3519 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3520 Updated = true;
3521 M = -1;
3522 }
3523 IdentityLHS &= (M < 0) || (M == (int)i);
3524 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3525 }
3526
3527 // Update legal shuffle masks based on demanded elements if it won't reduce
3528 // to Identity which can cause premature removal of the shuffle mask.
3529 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3530 SDValue LegalShuffle =
3531 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3532 if (LegalShuffle)
3533 return TLO.CombineTo(Op, LegalShuffle);
3534 }
3535
3536 // Propagate undef/zero elements from LHS/RHS.
3537 for (unsigned i = 0; i != NumElts; ++i) {
3538 int M = ShuffleMask[i];
3539 if (M < 0) {
3540 KnownUndef.setBit(i);
3541 } else if (M < (int)NumElts) {
3542 if (UndefLHS[M])
3543 KnownUndef.setBit(i);
3544 if (ZeroLHS[M])
3545 KnownZero.setBit(i);
3546 } else {
3547 if (UndefRHS[M - NumElts])
3548 KnownUndef.setBit(i);
3549 if (ZeroRHS[M - NumElts])
3550 KnownZero.setBit(i);
3551 }
3552 }
3553 break;
3554 }
3558 APInt SrcUndef, SrcZero;
3559 SDValue Src = Op.getOperand(0);
3560 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3561 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3562 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3563 Depth + 1))
3564 return true;
3565 KnownZero = SrcZero.zextOrTrunc(NumElts);
3566 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3567
3568 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3569 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3570 DemandedSrcElts == 1) {
3571 // aext - if we just need the bottom element then we can bitcast.
3572 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3573 }
3574
3575 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3576 // zext(undef) upper bits are guaranteed to be zero.
3577 if (DemandedElts.isSubsetOf(KnownUndef))
3578 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3579 KnownUndef.clearAllBits();
3580
3581 // zext - if we just need the bottom element then we can mask:
3582 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3583 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3584 Op->isOnlyUserOf(Src.getNode()) &&
3585 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3586 SDLoc DL(Op);
3587 EVT SrcVT = Src.getValueType();
3588 EVT SrcSVT = SrcVT.getScalarType();
3589 SmallVector<SDValue> MaskElts;
3590 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3591 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3592 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3593 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3594 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3595 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3596 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3597 }
3598 }
3599 }
3600 break;
3601 }
3602
3603 // TODO: There are more binop opcodes that could be handled here - MIN,
3604 // MAX, saturated math, etc.
3605 case ISD::ADD: {
3606 SDValue Op0 = Op.getOperand(0);
3607 SDValue Op1 = Op.getOperand(1);
3608 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3609 APInt UndefLHS, ZeroLHS;
3610 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3611 Depth + 1, /*AssumeSingleUse*/ true))
3612 return true;
3613 }
3614 [[fallthrough]];
3615 }
3616 case ISD::AVGCEILS:
3617 case ISD::AVGCEILU:
3618 case ISD::AVGFLOORS:
3619 case ISD::AVGFLOORU:
3620 case ISD::OR:
3621 case ISD::XOR:
3622 case ISD::SUB:
3623 case ISD::FADD:
3624 case ISD::FSUB:
3625 case ISD::FMUL:
3626 case ISD::FDIV:
3627 case ISD::FREM: {
3628 SDValue Op0 = Op.getOperand(0);
3629 SDValue Op1 = Op.getOperand(1);
3630
3631 APInt UndefRHS, ZeroRHS;
3632 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3633 Depth + 1))
3634 return true;
3635 APInt UndefLHS, ZeroLHS;
3636 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3637 Depth + 1))
3638 return true;
3639
3640 KnownZero = ZeroLHS & ZeroRHS;
3641 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3642
3643 // Attempt to avoid multi-use ops if we don't need anything from them.
3644 // TODO - use KnownUndef to relax the demandedelts?
3645 if (!DemandedElts.isAllOnes())
3646 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3647 return true;
3648 break;
3649 }
3650 case ISD::SHL:
3651 case ISD::SRL:
3652 case ISD::SRA:
3653 case ISD::ROTL:
3654 case ISD::ROTR: {
3655 SDValue Op0 = Op.getOperand(0);
3656 SDValue Op1 = Op.getOperand(1);
3657
3658 APInt UndefRHS, ZeroRHS;
3659 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3660 Depth + 1))
3661 return true;
3662 APInt UndefLHS, ZeroLHS;
3663 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3664 Depth + 1))
3665 return true;
3666
3667 KnownZero = ZeroLHS;
3668 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3669
3670 // Attempt to avoid multi-use ops if we don't need anything from them.
3671 // TODO - use KnownUndef to relax the demandedelts?
3672 if (!DemandedElts.isAllOnes())
3673 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3674 return true;
3675 break;
3676 }
3677 case ISD::MUL:
3678 case ISD::MULHU:
3679 case ISD::MULHS:
3680 case ISD::AND: {
3681 SDValue Op0 = Op.getOperand(0);
3682 SDValue Op1 = Op.getOperand(1);
3683
3684 APInt SrcUndef, SrcZero;
3685 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3686 Depth + 1))
3687 return true;
3688 // If we know that a demanded element was zero in Op1 we don't need to
3689 // demand it in Op0 - its guaranteed to be zero.
3690 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3691 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3692 TLO, Depth + 1))
3693 return true;
3694
3695 KnownUndef &= DemandedElts0;
3696 KnownZero &= DemandedElts0;
3697
3698 // If every element pair has a zero/undef then just fold to zero.
3699 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3700 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3701 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3702 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3703
3704 // If either side has a zero element, then the result element is zero, even
3705 // if the other is an UNDEF.
3706 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3707 // and then handle 'and' nodes with the rest of the binop opcodes.
3708 KnownZero |= SrcZero;
3709 KnownUndef &= SrcUndef;
3710 KnownUndef &= ~KnownZero;
3711
3712 // Attempt to avoid multi-use ops if we don't need anything from them.
3713 if (!DemandedElts.isAllOnes())
3714 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3715 return true;
3716 break;
3717 }
3718 case ISD::TRUNCATE:
3719 case ISD::SIGN_EXTEND:
3720 case ISD::ZERO_EXTEND:
3721 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3722 KnownZero, TLO, Depth + 1))
3723 return true;
3724
3725 if (!DemandedElts.isAllOnes())
3726 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3727 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3728 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3729
3730 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3731 // zext(undef) upper bits are guaranteed to be zero.
3732 if (DemandedElts.isSubsetOf(KnownUndef))
3733 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3734 KnownUndef.clearAllBits();
3735 }
3736 break;
3737 case ISD::SINT_TO_FP:
3738 case ISD::UINT_TO_FP:
3739 case ISD::FP_TO_SINT:
3740 case ISD::FP_TO_UINT:
3741 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3742 KnownZero, TLO, Depth + 1))
3743 return true;
3744 // Don't fall through to generic undef -> undef handling.
3745 return false;
3746 default: {
3747 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3748 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3749 KnownZero, TLO, Depth))
3750 return true;
3751 } else {
3752 KnownBits Known;
3753 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3754 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3755 TLO, Depth, AssumeSingleUse))
3756 return true;
3757 }
3758 break;
3759 }
3760 }
3761 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3762
3763 // Constant fold all undef cases.
3764 // TODO: Handle zero cases as well.
3765 if (DemandedElts.isSubsetOf(KnownUndef))
3766 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3767
3768 return false;
3769}
3770
3771/// Determine which of the bits specified in Mask are known to be either zero or
3772/// one and return them in the Known.
3774 KnownBits &Known,
3775 const APInt &DemandedElts,
3776 const SelectionDAG &DAG,
3777 unsigned Depth) const {
3778 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3779 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3780 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3781 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3782 "Should use MaskedValueIsZero if you don't know whether Op"
3783 " is a target node!");
3784 Known.resetAll();
3785}
3786
3789 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3790 unsigned Depth) const {
3791 Known.resetAll();
3792}
3793
3795 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3796 // The low bits are known zero if the pointer is aligned.
3797 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3798}
3799
3802 unsigned Depth) const {
3803 return Align(1);
3804}
3805
3806/// This method can be implemented by targets that want to expose additional
3807/// information about sign bits to the DAG Combiner.
3809 const APInt &,
3810 const SelectionDAG &,
3811 unsigned Depth) const {
3812 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3813 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3814 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3815 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3816 "Should use ComputeNumSignBits if you don't know whether Op"
3817 " is a target node!");
3818 return 1;
3819}
3820
3822 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3823 const MachineRegisterInfo &MRI, unsigned Depth) const {
3824 return 1;
3825}
3826
3828 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3829 TargetLoweringOpt &TLO, unsigned Depth) const {
3830 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3831 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3832 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3833 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3834 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3835 " is a target node!");
3836 return false;
3837}
3838
3840 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3841 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3842 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3843 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3844 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3845 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3846 "Should use SimplifyDemandedBits if you don't know whether Op"
3847 " is a target node!");
3848 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3849 return false;
3850}
3851
3853 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3854 SelectionDAG &DAG, unsigned Depth) const {
3855 assert(
3856 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3857 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3858 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3859 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3860 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3861 " is a target node!");
3862 return SDValue();
3863}
3864
3865SDValue
3868 SelectionDAG &DAG) const {
3869 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3870 if (!LegalMask) {
3871 std::swap(N0, N1);
3873 LegalMask = isShuffleMaskLegal(Mask, VT);
3874 }
3875
3876 if (!LegalMask)
3877 return SDValue();
3878
3879 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3880}
3881
3883 return nullptr;
3884}
3885
3887 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3888 bool PoisonOnly, unsigned Depth) const {
3889 assert(
3890 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3891 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3892 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3893 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3894 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3895 " is a target node!");
3896
3897 // If Op can't create undef/poison and none of its operands are undef/poison
3898 // then Op is never undef/poison.
3899 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3900 /*ConsiderFlags*/ true, Depth) &&
3901 all_of(Op->ops(), [&](SDValue V) {
3902 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3903 Depth + 1);
3904 });
3905}
3906
3908 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3909 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3910 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3911 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3912 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3913 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3914 "Should use canCreateUndefOrPoison if you don't know whether Op"
3915 " is a target node!");
3916 // Be conservative and return true.
3917 return true;
3918}
3919
3921 const SelectionDAG &DAG,
3922 bool SNaN,
3923 unsigned Depth) const {
3924 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3925 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3926 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3927 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3928 "Should use isKnownNeverNaN if you don't know whether Op"
3929 " is a target node!");
3930 return false;
3931}
3932
3934 const APInt &DemandedElts,
3935 APInt &UndefElts,
3936 const SelectionDAG &DAG,
3937 unsigned Depth) const {
3938 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3939 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3940 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3941 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3942 "Should use isSplatValue if you don't know whether Op"
3943 " is a target node!");
3944 return false;
3945}
3946
3947// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3948// work with truncating build vectors and vectors with elements of less than
3949// 8 bits.
3951 if (!N)
3952 return false;
3953
3954 unsigned EltWidth;
3955 APInt CVal;
3956 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3957 /*AllowTruncation=*/true)) {
3958 CVal = CN->getAPIntValue();
3959 EltWidth = N.getValueType().getScalarSizeInBits();
3960 } else
3961 return false;
3962
3963 // If this is a truncating splat, truncate the splat value.
3964 // Otherwise, we may fail to match the expected values below.
3965 if (EltWidth < CVal.getBitWidth())
3966 CVal = CVal.trunc(EltWidth);
3967
3968 switch (getBooleanContents(N.getValueType())) {
3970 return CVal[0];
3972 return CVal.isOne();
3974 return CVal.isAllOnes();
3975 }
3976
3977 llvm_unreachable("Invalid boolean contents");
3978}
3979
3981 if (!N)
3982 return false;
3983
3984 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3985 if (!CN) {
3986 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3987 if (!BV)
3988 return false;
3989
3990 // Only interested in constant splats, we don't care about undef
3991 // elements in identifying boolean constants and getConstantSplatNode
3992 // returns NULL if all ops are undef;
3993 CN = BV->getConstantSplatNode();
3994 if (!CN)
3995 return false;
3996 }
3997
3998 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3999 return !CN->getAPIntValue()[0];
4000
4001 return CN->isZero();
4002}
4003
4005 bool SExt) const {
4006 if (VT == MVT::i1)
4007 return N->isOne();
4008
4010 switch (Cnt) {
4012 // An extended value of 1 is always true, unless its original type is i1,
4013 // in which case it will be sign extended to -1.
4014 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4017 return N->isAllOnes() && SExt;
4018 }
4019 llvm_unreachable("Unexpected enumeration.");
4020}
4021
4022/// This helper function of SimplifySetCC tries to optimize the comparison when
4023/// either operand of the SetCC node is a bitwise-and instruction.
4024SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4025 ISD::CondCode Cond, const SDLoc &DL,
4026 DAGCombinerInfo &DCI) const {
4027 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4028 std::swap(N0, N1);
4029
4030 SelectionDAG &DAG = DCI.DAG;
4031 EVT OpVT = N0.getValueType();
4032 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4033 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4034 return SDValue();
4035
4036 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4037 // iff everything but LSB is known zero:
4038 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4041 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4042 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4043 if (DAG.MaskedValueIsZero(N0, UpperBits))
4044 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4045 }
4046
4047 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4048 // test in a narrow type that we can truncate to with no cost. Examples:
4049 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4050 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4051 // TODO: This conservatively checks for type legality on the source and
4052 // destination types. That may inhibit optimizations, but it also
4053 // allows setcc->shift transforms that may be more beneficial.
4054 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4055 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4056 isTypeLegal(OpVT) && N0.hasOneUse()) {
4057 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4058 AndC->getAPIntValue().getActiveBits());
4059 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4060 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4061 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4062 return DAG.getSetCC(DL, VT, Trunc, Zero,
4064 }
4065 }
4066
4067 // Match these patterns in any of their permutations:
4068 // (X & Y) == Y
4069 // (X & Y) != Y
4070 SDValue X, Y;
4071 if (N0.getOperand(0) == N1) {
4072 X = N0.getOperand(1);
4073 Y = N0.getOperand(0);
4074 } else if (N0.getOperand(1) == N1) {
4075 X = N0.getOperand(0);
4076 Y = N0.getOperand(1);
4077 } else {
4078 return SDValue();
4079 }
4080
4081 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4082 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4083 // its liable to create and infinite loop.
4084 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4085 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4087 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4088 // Note that where Y is variable and is known to have at most one bit set
4089 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4090 // equivalent when Y == 0.
4091 assert(OpVT.isInteger());
4093 if (DCI.isBeforeLegalizeOps() ||
4095 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4096 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4097 // If the target supports an 'and-not' or 'and-complement' logic operation,
4098 // try to use that to make a comparison operation more efficient.
4099 // But don't do this transform if the mask is a single bit because there are
4100 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4101 // 'rlwinm' on PPC).
4102
4103 // Bail out if the compare operand that we want to turn into a zero is
4104 // already a zero (otherwise, infinite loop).
4105 if (isNullConstant(Y))
4106 return SDValue();
4107
4108 // Transform this into: ~X & Y == 0.
4109 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4110 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4111 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4112 }
4113
4114 return SDValue();
4115}
4116
4117/// There are multiple IR patterns that could be checking whether certain
4118/// truncation of a signed number would be lossy or not. The pattern which is
4119/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4120/// We are looking for the following pattern: (KeptBits is a constant)
4121/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4122/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4123/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4124/// We will unfold it into the natural trunc+sext pattern:
4125/// ((%x << C) a>> C) dstcond %x
4126/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4127SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4128 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4129 const SDLoc &DL) const {
4130 // We must be comparing with a constant.
4131 ConstantSDNode *C1;
4132 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4133 return SDValue();
4134
4135 // N0 should be: add %x, (1 << (KeptBits-1))
4136 if (N0->getOpcode() != ISD::ADD)
4137 return SDValue();
4138
4139 // And we must be 'add'ing a constant.
4140 ConstantSDNode *C01;
4141 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4142 return SDValue();
4143
4144 SDValue X = N0->getOperand(0);
4145 EVT XVT = X.getValueType();
4146
4147 // Validate constants ...
4148
4149 APInt I1 = C1->getAPIntValue();
4150
4151 ISD::CondCode NewCond;
4152 if (Cond == ISD::CondCode::SETULT) {
4153 NewCond = ISD::CondCode::SETEQ;
4154 } else if (Cond == ISD::CondCode::SETULE) {
4155 NewCond = ISD::CondCode::SETEQ;
4156 // But need to 'canonicalize' the constant.
4157 I1 += 1;
4158 } else if (Cond == ISD::CondCode::SETUGT) {
4159 NewCond = ISD::CondCode::SETNE;
4160 // But need to 'canonicalize' the constant.
4161 I1 += 1;
4162 } else if (Cond == ISD::CondCode::SETUGE) {
4163 NewCond = ISD::CondCode::SETNE;
4164 } else
4165 return SDValue();
4166
4167 APInt I01 = C01->getAPIntValue();
4168
4169 auto checkConstants = [&I1, &I01]() -> bool {
4170 // Both of them must be power-of-two, and the constant from setcc is bigger.
4171 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4172 };
4173
4174 if (checkConstants()) {
4175 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4176 } else {
4177 // What if we invert constants? (and the target predicate)
4178 I1.negate();
4179 I01.negate();
4180 assert(XVT.isInteger());
4181 NewCond = getSetCCInverse(NewCond, XVT);
4182 if (!checkConstants())
4183 return SDValue();
4184 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4185 }
4186
4187 // They are power-of-two, so which bit is set?
4188 const unsigned KeptBits = I1.logBase2();
4189 const unsigned KeptBitsMinusOne = I01.logBase2();
4190
4191 // Magic!
4192 if (KeptBits != (KeptBitsMinusOne + 1))
4193 return SDValue();
4194 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4195
4196 // We don't want to do this in every single case.
4197 SelectionDAG &DAG = DCI.DAG;
4198 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4199 return SDValue();
4200
4201 // Unfold into: sext_inreg(%x) cond %x
4202 // Where 'cond' will be either 'eq' or 'ne'.
4203 SDValue SExtInReg = DAG.getNode(
4205 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4206 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4207}
4208
4209// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4210SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4211 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4212 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4214 "Should be a comparison with 0.");
4215 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4216 "Valid only for [in]equality comparisons.");
4217
4218 unsigned NewShiftOpcode;
4219 SDValue X, C, Y;
4220
4221 SelectionDAG &DAG = DCI.DAG;
4222
4223 // Look for '(C l>>/<< Y)'.
4224 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4225 // The shift should be one-use.
4226 if (!V.hasOneUse())
4227 return false;
4228 unsigned OldShiftOpcode = V.getOpcode();
4229 switch (OldShiftOpcode) {
4230 case ISD::SHL:
4231 NewShiftOpcode = ISD::SRL;
4232 break;
4233 case ISD::SRL:
4234 NewShiftOpcode = ISD::SHL;
4235 break;
4236 default:
4237 return false; // must be a logical shift.
4238 }
4239 // We should be shifting a constant.
4240 // FIXME: best to use isConstantOrConstantVector().
4241 C = V.getOperand(0);
4243 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4244 if (!CC)
4245 return false;
4246 Y = V.getOperand(1);
4247
4249 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4251 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4252 };
4253
4254 // LHS of comparison should be an one-use 'and'.
4255 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4256 return SDValue();
4257
4258 X = N0.getOperand(0);
4259 SDValue Mask = N0.getOperand(1);
4260
4261 // 'and' is commutative!
4262 if (!Match(Mask)) {
4263 std::swap(X, Mask);
4264 if (!Match(Mask))
4265 return SDValue();
4266 }
4267
4268 EVT VT = X.getValueType();
4269
4270 // Produce:
4271 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4272 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4273 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4274 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4275 return T2;
4276}
4277
4278/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4279/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4280/// handle the commuted versions of these patterns.
4281SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4282 ISD::CondCode Cond, const SDLoc &DL,
4283 DAGCombinerInfo &DCI) const {
4284 unsigned BOpcode = N0.getOpcode();
4285 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4286 "Unexpected binop");
4287 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4288
4289 // (X + Y) == X --> Y == 0
4290 // (X - Y) == X --> Y == 0
4291 // (X ^ Y) == X --> Y == 0
4292 SelectionDAG &DAG = DCI.DAG;
4293 EVT OpVT = N0.getValueType();
4294 SDValue X = N0.getOperand(0);
4295 SDValue Y = N0.getOperand(1);
4296 if (X == N1)
4297 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4298
4299 if (Y != N1)
4300 return SDValue();
4301
4302 // (X + Y) == Y --> X == 0
4303 // (X ^ Y) == Y --> X == 0
4304 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4305 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4306
4307 // The shift would not be valid if the operands are boolean (i1).
4308 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4309 return SDValue();
4310
4311 // (X - Y) == Y --> X == Y << 1
4312 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4313 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4314 if (!DCI.isCalledByLegalizer())
4315 DCI.AddToWorklist(YShl1.getNode());
4316 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4317}
4318
4320 SDValue N0, const APInt &C1,
4321 ISD::CondCode Cond, const SDLoc &dl,
4322 SelectionDAG &DAG) {
4323 // Look through truncs that don't change the value of a ctpop.
4324 // FIXME: Add vector support? Need to be careful with setcc result type below.
4325 SDValue CTPOP = N0;
4326 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4328 CTPOP = N0.getOperand(0);
4329
4330 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4331 return SDValue();
4332
4333 EVT CTVT = CTPOP.getValueType();
4334 SDValue CTOp = CTPOP.getOperand(0);
4335
4336 // Expand a power-of-2-or-zero comparison based on ctpop:
4337 // (ctpop x) u< 2 -> (x & x-1) == 0
4338 // (ctpop x) u> 1 -> (x & x-1) != 0
4339 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4340 // Keep the CTPOP if it is a cheap vector op.
4341 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4342 return SDValue();
4343
4344 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4345 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4346 return SDValue();
4347 if (C1 == 0 && (Cond == ISD::SETULT))
4348 return SDValue(); // This is handled elsewhere.
4349
4350 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4351
4352 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4353 SDValue Result = CTOp;
4354 for (unsigned i = 0; i < Passes; i++) {
4355 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4356 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4357 }
4359 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4360 }
4361
4362 // Expand a power-of-2 comparison based on ctpop
4363 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4364 // Keep the CTPOP if it is cheap.
4365 if (TLI.isCtpopFast(CTVT))
4366 return SDValue();
4367
4368 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4369 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4370 assert(CTVT.isInteger());
4371 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4372
4373 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4374 // check before emitting a potentially unnecessary op.
4375 if (DAG.isKnownNeverZero(CTOp)) {
4376 // (ctpop x) == 1 --> (x & x-1) == 0
4377 // (ctpop x) != 1 --> (x & x-1) != 0
4378 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4379 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4380 return RHS;
4381 }
4382
4383 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4384 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4385 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4387 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4388 }
4389
4390 return SDValue();
4391}
4392
4394 ISD::CondCode Cond, const SDLoc &dl,
4395 SelectionDAG &DAG) {
4396 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4397 return SDValue();
4398
4399 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4400 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4401 return SDValue();
4402
4403 auto getRotateSource = [](SDValue X) {
4404 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4405 return X.getOperand(0);
4406 return SDValue();
4407 };
4408
4409 // Peek through a rotated value compared against 0 or -1:
4410 // (rot X, Y) == 0/-1 --> X == 0/-1
4411 // (rot X, Y) != 0/-1 --> X != 0/-1
4412 if (SDValue R = getRotateSource(N0))
4413 return DAG.getSetCC(dl, VT, R, N1, Cond);
4414
4415 // Peek through an 'or' of a rotated value compared against 0:
4416 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4417 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4418 //
4419 // TODO: Add the 'and' with -1 sibling.
4420 // TODO: Recurse through a series of 'or' ops to find the rotate.
4421 EVT OpVT = N0.getValueType();
4422 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4423 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4424 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4425 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4426 }
4427 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4428 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4429 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4430 }
4431 }
4432
4433 return SDValue();
4434}
4435
4437 ISD::CondCode Cond, const SDLoc &dl,
4438 SelectionDAG &DAG) {
4439 // If we are testing for all-bits-clear, we might be able to do that with
4440 // less shifting since bit-order does not matter.
4441 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4442 return SDValue();
4443
4444 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4445 if (!C1 || !C1->isZero())
4446 return SDValue();
4447
4448 if (!N0.hasOneUse() ||
4449 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4450 return SDValue();
4451
4452 unsigned BitWidth = N0.getScalarValueSizeInBits();
4453 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4454 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4455 return SDValue();
4456
4457 // Canonicalize fshr as fshl to reduce pattern-matching.
4458 unsigned ShAmt = ShAmtC->getZExtValue();
4459 if (N0.getOpcode() == ISD::FSHR)
4460 ShAmt = BitWidth - ShAmt;
4461
4462 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4463 SDValue X, Y;
4464 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4465 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4466 return false;
4467 if (Or.getOperand(0) == Other) {
4468 X = Or.getOperand(0);
4469 Y = Or.getOperand(1);
4470 return true;
4471 }
4472 if (Or.getOperand(1) == Other) {
4473 X = Or.getOperand(1);
4474 Y = Or.getOperand(0);
4475 return true;
4476 }
4477 return false;
4478 };
4479
4480 EVT OpVT = N0.getValueType();
4481 EVT ShAmtVT = N0.getOperand(2).getValueType();
4482 SDValue F0 = N0.getOperand(0);
4483 SDValue F1 = N0.getOperand(1);
4484 if (matchOr(F0, F1)) {
4485 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4486 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4487 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4488 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4489 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4490 }
4491 if (matchOr(F1, F0)) {
4492 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4493 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4494 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4495 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4496 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4497 }
4498
4499 return SDValue();
4500}
4501
4502/// Try to simplify a setcc built with the specified operands and cc. If it is
4503/// unable to simplify it, return a null SDValue.
4505 ISD::CondCode Cond, bool foldBooleans,
4506 DAGCombinerInfo &DCI,
4507 const SDLoc &dl) const {
4508 SelectionDAG &DAG = DCI.DAG;
4509 const DataLayout &Layout = DAG.getDataLayout();
4510 EVT OpVT = N0.getValueType();
4512
4513 // Constant fold or commute setcc.
4514 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4515 return Fold;
4516
4517 bool N0ConstOrSplat =
4518 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4519 bool N1ConstOrSplat =
4520 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4521
4522 // Canonicalize toward having the constant on the RHS.
4523 // TODO: Handle non-splat vector constants. All undef causes trouble.
4524 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4525 // infinite loop here when we encounter one.
4527 if (N0ConstOrSplat && !N1ConstOrSplat &&
4528 (DCI.isBeforeLegalizeOps() ||
4529 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4530 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4531
4532 // If we have a subtract with the same 2 non-constant operands as this setcc
4533 // -- but in reverse order -- then try to commute the operands of this setcc
4534 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4535 // instruction on some targets.
4536 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4537 (DCI.isBeforeLegalizeOps() ||
4538 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4539 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4540 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4541 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4542
4543 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4544 return V;
4545
4546 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4547 return V;
4548
4549 if (auto *N1C = isConstOrConstSplat(N1)) {
4550 const APInt &C1 = N1C->getAPIntValue();
4551
4552 // Optimize some CTPOP cases.
4553 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4554 return V;
4555
4556 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4557 // X * Y == 0 --> (X == 0) || (Y == 0)
4558 // X * Y != 0 --> (X != 0) && (Y != 0)
4559 // TODO: This bails out if minsize is set, but if the target doesn't have a
4560 // single instruction multiply for this type, it would likely be
4561 // smaller to decompose.
4562 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4563 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4564 (N0->getFlags().hasNoUnsignedWrap() ||
4565 N0->getFlags().hasNoSignedWrap()) &&
4566 !Attr.hasFnAttr(Attribute::MinSize)) {
4567 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4568 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4569 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4570 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4571 }
4572
4573 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4574 // equality comparison, then we're just comparing whether X itself is
4575 // zero.
4576 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4577 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4578 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4579 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4580 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4581 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4582 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4583 // (srl (ctlz x), 5) == 0 -> X != 0
4584 // (srl (ctlz x), 5) != 1 -> X != 0
4585 Cond = ISD::SETNE;
4586 } else {
4587 // (srl (ctlz x), 5) != 0 -> X == 0
4588 // (srl (ctlz x), 5) == 1 -> X == 0
4589 Cond = ISD::SETEQ;
4590 }
4591 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4592 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4593 Cond);
4594 }
4595 }
4596 }
4597 }
4598
4599 // FIXME: Support vectors.
4600 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4601 const APInt &C1 = N1C->getAPIntValue();
4602
4603 // (zext x) == C --> x == (trunc C)
4604 // (sext x) == C --> x == (trunc C)
4605 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4606 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4607 unsigned MinBits = N0.getValueSizeInBits();
4608 SDValue PreExt;
4609 bool Signed = false;
4610 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4611 // ZExt
4612 MinBits = N0->getOperand(0).getValueSizeInBits();
4613 PreExt = N0->getOperand(0);
4614 } else if (N0->getOpcode() == ISD::AND) {
4615 // DAGCombine turns costly ZExts into ANDs
4616 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4617 if ((C->getAPIntValue()+1).isPowerOf2()) {
4618 MinBits = C->getAPIntValue().countr_one();
4619 PreExt = N0->getOperand(0);
4620 }
4621 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4622 // SExt
4623 MinBits = N0->getOperand(0).getValueSizeInBits();
4624 PreExt = N0->getOperand(0);
4625 Signed = true;
4626 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4627 // ZEXTLOAD / SEXTLOAD
4628 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4629 MinBits = LN0->getMemoryVT().getSizeInBits();
4630 PreExt = N0;
4631 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4632 Signed = true;
4633 MinBits = LN0->getMemoryVT().getSizeInBits();
4634 PreExt = N0;
4635 }
4636 }
4637
4638 // Figure out how many bits we need to preserve this constant.
4639 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4640
4641 // Make sure we're not losing bits from the constant.
4642 if (MinBits > 0 &&
4643 MinBits < C1.getBitWidth() &&
4644 MinBits >= ReqdBits) {
4645 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4646 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4647 // Will get folded away.
4648 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4649 if (MinBits == 1 && C1 == 1)
4650 // Invert the condition.
4651 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4653 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4654 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4655 }
4656
4657 // If truncating the setcc operands is not desirable, we can still
4658 // simplify the expression in some cases:
4659 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4660 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4661 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4662 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4663 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4664 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4665 SDValue TopSetCC = N0->getOperand(0);
4666 unsigned N0Opc = N0->getOpcode();
4667 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4668 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4669 TopSetCC.getOpcode() == ISD::SETCC &&
4670 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4671 (isConstFalseVal(N1) ||
4672 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4673
4674 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4675 (!N1C->isZero() && Cond == ISD::SETNE);
4676
4677 if (!Inverse)
4678 return TopSetCC;
4679
4681 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4682 TopSetCC.getOperand(0).getValueType());
4683 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4684 TopSetCC.getOperand(1),
4685 InvCond);
4686 }
4687 }
4688 }
4689
4690 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4691 // equality or unsigned, and all 1 bits of the const are in the same
4692 // partial word, see if we can shorten the load.
4693 if (DCI.isBeforeLegalize() &&
4695 N0.getOpcode() == ISD::AND && C1 == 0 &&
4696 N0.getNode()->hasOneUse() &&
4697 isa<LoadSDNode>(N0.getOperand(0)) &&
4698 N0.getOperand(0).getNode()->hasOneUse() &&
4699 isa<ConstantSDNode>(N0.getOperand(1))) {
4700 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4701 APInt bestMask;
4702 unsigned bestWidth = 0, bestOffset = 0;
4703 if (Lod->isSimple() && Lod->isUnindexed() &&
4704 (Lod->getMemoryVT().isByteSized() ||
4705 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4706 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4707 unsigned origWidth = N0.getValueSizeInBits();
4708 unsigned maskWidth = origWidth;
4709 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4710 // 8 bits, but have to be careful...
4711 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4712 origWidth = Lod->getMemoryVT().getSizeInBits();
4713 const APInt &Mask = N0.getConstantOperandAPInt(1);
4714 // Only consider power-of-2 widths (and at least one byte) as candiates
4715 // for the narrowed load.
4716 for (unsigned width = 8; width < origWidth; width *= 2) {
4717 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4718 if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4719 continue;
4720 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4721 // Avoid accessing any padding here for now (we could use memWidth
4722 // instead of origWidth here otherwise).
4723 unsigned maxOffset = origWidth - width;
4724 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4725 if (Mask.isSubsetOf(newMask)) {
4726 unsigned ptrOffset =
4727 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4728 unsigned IsFast = 0;
4729 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4731 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4732 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4733 IsFast) {
4734 bestOffset = ptrOffset / 8;
4735 bestMask = Mask.lshr(offset);
4736 bestWidth = width;
4737 break;
4738 }
4739 }
4740 newMask <<= 8;
4741 }
4742 if (bestWidth)
4743 break;
4744 }
4745 }
4746 if (bestWidth) {
4747 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4748 SDValue Ptr = Lod->getBasePtr();
4749 if (bestOffset != 0)
4750 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4751 SDValue NewLoad =
4752 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4753 Lod->getPointerInfo().getWithOffset(bestOffset),
4754 Lod->getOriginalAlign());
4755 SDValue And =
4756 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4757 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4758 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4759 }
4760 }
4761
4762 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4763 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4764 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4765
4766 // If the comparison constant has bits in the upper part, the
4767 // zero-extended value could never match.
4769 C1.getBitWidth() - InSize))) {
4770 switch (Cond) {
4771 case ISD::SETUGT:
4772 case ISD::SETUGE:
4773 case ISD::SETEQ:
4774 return DAG.getConstant(0, dl, VT);
4775 case ISD::SETULT:
4776 case ISD::SETULE:
4777 case ISD::SETNE:
4778 return DAG.getConstant(1, dl, VT);
4779 case ISD::SETGT:
4780 case ISD::SETGE:
4781 // True if the sign bit of C1 is set.
4782 return DAG.getConstant(C1.isNegative(), dl, VT);
4783 case ISD::SETLT:
4784 case ISD::SETLE:
4785 // True if the sign bit of C1 isn't set.
4786 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4787 default:
4788 break;
4789 }
4790 }
4791
4792 // Otherwise, we can perform the comparison with the low bits.
4793 switch (Cond) {
4794 case ISD::SETEQ:
4795 case ISD::SETNE:
4796 case ISD::SETUGT:
4797 case ISD::SETUGE:
4798 case ISD::SETULT:
4799 case ISD::SETULE: {
4800 EVT newVT = N0.getOperand(0).getValueType();
4801 // FIXME: Should use isNarrowingProfitable.
4802 if (DCI.isBeforeLegalizeOps() ||
4803 (isOperationLegal(ISD::SETCC, newVT) &&
4804 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4805 isTypeDesirableForOp(ISD::SETCC, newVT))) {
4806 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4807 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4808
4809 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4810 NewConst, Cond);
4811 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4812 }
4813 break;
4814 }
4815 default:
4816 break; // todo, be more careful with signed comparisons
4817 }
4818 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4819 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4820 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4821 OpVT)) {
4822 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4823 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4824 EVT ExtDstTy = N0.getValueType();
4825 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4826
4827 // If the constant doesn't fit into the number of bits for the source of
4828 // the sign extension, it is impossible for both sides to be equal.
4829 if (C1.getSignificantBits() > ExtSrcTyBits)
4830 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4831
4832 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4833 ExtDstTy != ExtSrcTy && "Unexpected types!");
4834 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4835 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4836 DAG.getConstant(Imm, dl, ExtDstTy));
4837 if (!DCI.isCalledByLegalizer())
4838 DCI.AddToWorklist(ZextOp.getNode());
4839 // Otherwise, make this a use of a zext.
4840 return DAG.getSetCC(dl, VT, ZextOp,
4841 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4842 } else if ((N1C->isZero() || N1C->isOne()) &&
4843 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4844 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4845 // excluded as they are handled below whilst checking for foldBooleans.
4846 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4847 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4848 (N0.getValueType() == MVT::i1 ||
4852 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4853 if (TrueWhenTrue)
4854 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4855 // Invert the condition.
4856 if (N0.getOpcode() == ISD::SETCC) {
4857 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4859 if (DCI.isBeforeLegalizeOps() ||
4861 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4862 }
4863 }
4864
4865 if ((N0.getOpcode() == ISD::XOR ||
4866 (N0.getOpcode() == ISD::AND &&
4867 N0.getOperand(0).getOpcode() == ISD::XOR &&
4868 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4869 isOneConstant(N0.getOperand(1))) {
4870 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4871 // can only do this if the top bits are known zero.
4872 unsigned BitWidth = N0.getValueSizeInBits();
4873 if (DAG.MaskedValueIsZero(N0,
4875 BitWidth-1))) {
4876 // Okay, get the un-inverted input value.
4877 SDValue Val;
4878 if (N0.getOpcode() == ISD::XOR) {
4879 Val = N0.getOperand(0);
4880 } else {
4881 assert(N0.getOpcode() == ISD::AND &&
4882 N0.getOperand(0).getOpcode() == ISD::XOR);
4883 // ((X^1)&1)^1 -> X & 1
4884 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4885 N0.getOperand(0).getOperand(0),
4886 N0.getOperand(1));
4887 }
4888
4889 return DAG.getSetCC(dl, VT, Val, N1,
4891 }
4892 } else if (N1C->isOne()) {
4893 SDValue Op0 = N0;
4894 if (Op0.getOpcode() == ISD::TRUNCATE)
4895 Op0 = Op0.getOperand(0);
4896
4897 if ((Op0.getOpcode() == ISD::XOR) &&
4898 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4899 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4900 SDValue XorLHS = Op0.getOperand(0);
4901 SDValue XorRHS = Op0.getOperand(1);
4902 // Ensure that the input setccs return an i1 type or 0/1 value.
4903 if (Op0.getValueType() == MVT::i1 ||
4908 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4910 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4911 }
4912 }
4913 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4914 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4915 if (Op0.getValueType().bitsGT(VT))
4916 Op0 = DAG.getNode(ISD::AND, dl, VT,
4917 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4918 DAG.getConstant(1, dl, VT));
4919 else if (Op0.getValueType().bitsLT(VT))
4920 Op0 = DAG.getNode(ISD::AND, dl, VT,
4921 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4922 DAG.getConstant(1, dl, VT));
4923
4924 return DAG.getSetCC(dl, VT, Op0,
4925 DAG.getConstant(0, dl, Op0.getValueType()),
4927 }
4928 if (Op0.getOpcode() == ISD::AssertZext &&
4929 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4930 return DAG.getSetCC(dl, VT, Op0,
4931 DAG.getConstant(0, dl, Op0.getValueType()),
4933 }
4934 }
4935
4936 // Given:
4937 // icmp eq/ne (urem %x, %y), 0
4938 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4939 // icmp eq/ne %x, 0
4940 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4941 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4942 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4943 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4944 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4945 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4946 }
4947
4948 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4949 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4950 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4951 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4952 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4953 N1C->isAllOnes()) {
4954 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4955 DAG.getConstant(0, dl, OpVT),
4957 }
4958
4959 if (SDValue V =
4960 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4961 return V;
4962 }
4963
4964 // These simplifications apply to splat vectors as well.
4965 // TODO: Handle more splat vector cases.
4966 if (auto *N1C = isConstOrConstSplat(N1)) {
4967 const APInt &C1 = N1C->getAPIntValue();
4968
4969 APInt MinVal, MaxVal;
4970 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4972 MinVal = APInt::getSignedMinValue(OperandBitSize);
4973 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4974 } else {
4975 MinVal = APInt::getMinValue(OperandBitSize);
4976 MaxVal = APInt::getMaxValue(OperandBitSize);
4977 }
4978
4979 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4980 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4981 // X >= MIN --> true
4982 if (C1 == MinVal)
4983 return DAG.getBoolConstant(true, dl, VT, OpVT);
4984
4985 if (!VT.isVector()) { // TODO: Support this for vectors.
4986 // X >= C0 --> X > (C0 - 1)
4987 APInt C = C1 - 1;
4989 if ((DCI.isBeforeLegalizeOps() ||
4990 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
4991 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4992 isLegalICmpImmediate(C.getSExtValue())))) {
4993 return DAG.getSetCC(dl, VT, N0,
4994 DAG.getConstant(C, dl, N1.getValueType()),
4995 NewCC);
4996 }
4997 }
4998 }
4999
5000 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5001 // X <= MAX --> true
5002 if (C1 == MaxVal)
5003 return DAG.getBoolConstant(true, dl, VT, OpVT);
5004
5005 // X <= C0 --> X < (C0 + 1)
5006 if (!VT.isVector()) { // TODO: Support this for vectors.
5007 APInt C = C1 + 1;
5009 if ((DCI.isBeforeLegalizeOps() ||
5010 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5011 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5012 isLegalICmpImmediate(C.getSExtValue())))) {
5013 return DAG.getSetCC(dl, VT, N0,
5014 DAG.getConstant(C, dl, N1.getValueType()),
5015 NewCC);
5016 }
5017 }
5018 }
5019
5020 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5021 if (C1 == MinVal)
5022 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5023
5024 // TODO: Support this for vectors after legalize ops.
5025 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5026 // Canonicalize setlt X, Max --> setne X, Max
5027 if (C1 == MaxVal)
5028 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5029
5030 // If we have setult X, 1, turn it into seteq X, 0
5031 if (C1 == MinVal+1)
5032 return DAG.getSetCC(dl, VT, N0,
5033 DAG.getConstant(MinVal, dl, N0.getValueType()),
5034 ISD::SETEQ);
5035 }
5036 }
5037
5038 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5039 if (C1 == MaxVal)
5040 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5041
5042 // TODO: Support this for vectors after legalize ops.
5043 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5044 // Canonicalize setgt X, Min --> setne X, Min
5045 if (C1 == MinVal)
5046 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5047
5048 // If we have setugt X, Max-1, turn it into seteq X, Max
5049 if (C1 == MaxVal-1)
5050 return DAG.getSetCC(dl, VT, N0,
5051 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5052 ISD::SETEQ);
5053 }
5054 }
5055
5056 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5057 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5058 if (C1.isZero())
5059 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5060 VT, N0, N1, Cond, DCI, dl))
5061 return CC;
5062
5063 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5064 // For example, when high 32-bits of i64 X are known clear:
5065 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5066 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5067 bool CmpZero = N1C->isZero();
5068 bool CmpNegOne = N1C->isAllOnes();
5069 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5070 // Match or(lo,shl(hi,bw/2)) pattern.
5071 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5072 unsigned EltBits = V.getScalarValueSizeInBits();
5073 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5074 return false;
5075 SDValue LHS = V.getOperand(0);
5076 SDValue RHS = V.getOperand(1);
5077 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5078 // Unshifted element must have zero upperbits.
5079 if (RHS.getOpcode() == ISD::SHL &&
5080 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5081 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5082 DAG.MaskedValueIsZero(LHS, HiBits)) {
5083 Lo = LHS;
5084 Hi = RHS.getOperand(0);
5085 return true;
5086 }
5087 if (LHS.getOpcode() == ISD::SHL &&
5088 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5089 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5090 DAG.MaskedValueIsZero(RHS, HiBits)) {
5091 Lo = RHS;
5092 Hi = LHS.getOperand(0);
5093 return true;
5094 }
5095 return false;
5096 };
5097
5098 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5099 unsigned EltBits = N0.getScalarValueSizeInBits();
5100 unsigned HalfBits = EltBits / 2;
5101 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5102 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5103 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5104 SDValue NewN0 =
5105 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5106 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5107 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5108 };
5109
5110 SDValue Lo, Hi;
5111 if (IsConcat(N0, Lo, Hi))
5112 return MergeConcat(Lo, Hi);
5113
5114 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5115 SDValue Lo0, Lo1, Hi0, Hi1;
5116 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5117 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5118 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5119 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5120 }
5121 }
5122 }
5123 }
5124
5125 // If we have "setcc X, C0", check to see if we can shrink the immediate
5126 // by changing cc.
5127 // TODO: Support this for vectors after legalize ops.
5128 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5129 // SETUGT X, SINTMAX -> SETLT X, 0
5130 // SETUGE X, SINTMIN -> SETLT X, 0
5131 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5132 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5133 return DAG.getSetCC(dl, VT, N0,
5134 DAG.getConstant(0, dl, N1.getValueType()),
5135 ISD::SETLT);
5136
5137 // SETULT X, SINTMIN -> SETGT X, -1
5138 // SETULE X, SINTMAX -> SETGT X, -1
5139 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5140 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5141 return DAG.getSetCC(dl, VT, N0,
5142 DAG.getAllOnesConstant(dl, N1.getValueType()),
5143 ISD::SETGT);
5144 }
5145 }
5146
5147 // Back to non-vector simplifications.
5148 // TODO: Can we do these for vector splats?
5149 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5150 const APInt &C1 = N1C->getAPIntValue();
5151 EVT ShValTy = N0.getValueType();
5152
5153 // Fold bit comparisons when we can. This will result in an
5154 // incorrect value when boolean false is negative one, unless
5155 // the bitsize is 1 in which case the false value is the same
5156 // in practice regardless of the representation.
5157 if ((VT.getSizeInBits() == 1 ||
5159 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5160 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5161 N0.getOpcode() == ISD::AND) {
5162 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5163 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5164 // Perform the xform if the AND RHS is a single bit.
5165 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5166 if (AndRHS->getAPIntValue().isPowerOf2() &&
5167 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5168 return DAG.getNode(
5169 ISD::TRUNCATE, dl, VT,
5170 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5171 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5172 }
5173 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5174 // (X & 8) == 8 --> (X & 8) >> 3
5175 // Perform the xform if C1 is a single bit.
5176 unsigned ShCt = C1.logBase2();
5177 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5178 return DAG.getNode(
5179 ISD::TRUNCATE, dl, VT,
5180 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5181 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5182 }
5183 }
5184 }
5185 }
5186
5187 if (C1.getSignificantBits() <= 64 &&
5189 // (X & -256) == 256 -> (X >> 8) == 1
5190 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5191 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5192 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5193 const APInt &AndRHSC = AndRHS->getAPIntValue();
5194 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5195 unsigned ShiftBits = AndRHSC.countr_zero();
5196 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5197 SDValue Shift = DAG.getNode(
5198 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5199 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5200 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5201 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5202 }
5203 }
5204 }
5205 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5206 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5207 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5208 // X < 0x100000000 -> (X >> 32) < 1
5209 // X >= 0x100000000 -> (X >> 32) >= 1
5210 // X <= 0x0ffffffff -> (X >> 32) < 1
5211 // X > 0x0ffffffff -> (X >> 32) >= 1
5212 unsigned ShiftBits;
5213 APInt NewC = C1;
5214 ISD::CondCode NewCond = Cond;
5215 if (AdjOne) {
5216 ShiftBits = C1.countr_one();
5217 NewC = NewC + 1;
5218 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5219 } else {
5220 ShiftBits = C1.countr_zero();
5221 }
5222 NewC.lshrInPlace(ShiftBits);
5223 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5225 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5226 SDValue Shift =
5227 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5228 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5229 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5230 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5231 }
5232 }
5233 }
5234 }
5235
5236 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5237 auto *CFP = cast<ConstantFPSDNode>(N1);
5238 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5239
5240 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5241 // constant if knowing that the operand is non-nan is enough. We prefer to
5242 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5243 // materialize 0.0.
5244 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5245 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5246
5247 // setcc (fneg x), C -> setcc swap(pred) x, -C
5248 if (N0.getOpcode() == ISD::FNEG) {
5250 if (DCI.isBeforeLegalizeOps() ||
5251 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5252 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5253 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5254 }
5255 }
5256
5257 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5259 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5260 bool IsFabs = N0.getOpcode() == ISD::FABS;
5261 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5262 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5263 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5264 : (IsFabs ? fcInf : fcPosInf);
5265 if (Cond == ISD::SETUEQ)
5266 Flag |= fcNan;
5267 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5268 DAG.getTargetConstant(Flag, dl, MVT::i32));
5269 }
5270 }
5271
5272 // If the condition is not legal, see if we can find an equivalent one
5273 // which is legal.
5275 // If the comparison was an awkward floating-point == or != and one of
5276 // the comparison operands is infinity or negative infinity, convert the
5277 // condition to a less-awkward <= or >=.
5278 if (CFP->getValueAPF().isInfinity()) {
5279 bool IsNegInf = CFP->getValueAPF().isNegative();
5281 switch (Cond) {
5282 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5283 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5284 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5285 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5286 default: break;
5287 }
5288 if (NewCond != ISD::SETCC_INVALID &&
5289 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5290 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5291 }
5292 }
5293 }
5294
5295 if (N0 == N1) {
5296 // The sext(setcc()) => setcc() optimization relies on the appropriate
5297 // constant being emitted.
5298 assert(!N0.getValueType().isInteger() &&
5299 "Integer types should be handled by FoldSetCC");
5300
5301 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5302 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5303 if (UOF == 2) // FP operators that are undefined on NaNs.
5304 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5305 if (UOF == unsigned(EqTrue))
5306 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5307 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5308 // if it is not already.
5309 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5310 if (NewCond != Cond &&
5311 (DCI.isBeforeLegalizeOps() ||
5312 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5313 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5314 }
5315
5316 // ~X > ~Y --> Y > X
5317 // ~X < ~Y --> Y < X
5318 // ~X < C --> X > ~C
5319 // ~X > C --> X < ~C
5320 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5321 N0.getValueType().isInteger()) {
5322 if (isBitwiseNot(N0)) {
5323 if (isBitwiseNot(N1))
5324 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5325
5328 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5329 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5330 }
5331 }
5332 }
5333
5334 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5335 N0.getValueType().isInteger()) {
5336 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5337 N0.getOpcode() == ISD::XOR) {
5338 // Simplify (X+Y) == (X+Z) --> Y == Z
5339 if (N0.getOpcode() == N1.getOpcode()) {
5340 if (N0.getOperand(0) == N1.getOperand(0))
5341 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5342 if (N0.getOperand(1) == N1.getOperand(1))
5343 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5344 if (isCommutativeBinOp(N0.getOpcode())) {
5345 // If X op Y == Y op X, try other combinations.
5346 if (N0.getOperand(0) == N1.getOperand(1))
5347 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5348 Cond);
5349 if (N0.getOperand(1) == N1.getOperand(0))
5350 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5351 Cond);
5352 }
5353 }
5354
5355 // If RHS is a legal immediate value for a compare instruction, we need
5356 // to be careful about increasing register pressure needlessly.
5357 bool LegalRHSImm = false;
5358
5359 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5360 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5361 // Turn (X+C1) == C2 --> X == C2-C1
5362 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5363 return DAG.getSetCC(
5364 dl, VT, N0.getOperand(0),
5365 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5366 dl, N0.getValueType()),
5367 Cond);
5368
5369 // Turn (X^C1) == C2 --> X == C1^C2
5370 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5371 return DAG.getSetCC(
5372 dl, VT, N0.getOperand(0),
5373 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5374 dl, N0.getValueType()),
5375 Cond);
5376 }
5377
5378 // Turn (C1-X) == C2 --> X == C1-C2
5379 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5380 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5381 return DAG.getSetCC(
5382 dl, VT, N0.getOperand(1),
5383 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5384 dl, N0.getValueType()),
5385 Cond);
5386
5387 // Could RHSC fold directly into a compare?
5388 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5389 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5390 }
5391
5392 // (X+Y) == X --> Y == 0 and similar folds.
5393 // Don't do this if X is an immediate that can fold into a cmp
5394 // instruction and X+Y has other uses. It could be an induction variable
5395 // chain, and the transform would increase register pressure.
5396 if (!LegalRHSImm || N0.hasOneUse())
5397 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5398 return V;
5399 }
5400
5401 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5402 N1.getOpcode() == ISD::XOR)
5403 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5404 return V;
5405
5406 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5407 return V;
5408 }
5409
5410 // Fold remainder of division by a constant.
5411 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5412 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5413 // When division is cheap or optimizing for minimum size,
5414 // fall through to DIVREM creation by skipping this fold.
5415 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5416 if (N0.getOpcode() == ISD::UREM) {
5417 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5418 return Folded;
5419 } else if (N0.getOpcode() == ISD::SREM) {
5420 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5421 return Folded;
5422 }
5423 }
5424 }
5425
5426 // Fold away ALL boolean setcc's.
5427 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5428 SDValue Temp;
5429 switch (Cond) {
5430 default: llvm_unreachable("Unknown integer setcc!");
5431 case ISD::SETEQ: // X == Y -> ~(X^Y)
5432 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5433 N0 = DAG.getNOT(dl, Temp, OpVT);
5434 if (!DCI.isCalledByLegalizer())
5435 DCI.AddToWorklist(Temp.getNode());
5436 break;
5437 case ISD::SETNE: // X != Y --> (X^Y)
5438 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5439 break;
5440 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5441 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5442 Temp = DAG.getNOT(dl, N0, OpVT);
5443 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5444 if (!DCI.isCalledByLegalizer())
5445 DCI.AddToWorklist(Temp.getNode());
5446 break;
5447 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5448 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5449 Temp = DAG.getNOT(dl, N1, OpVT);
5450 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5451 if (!DCI.isCalledByLegalizer())
5452 DCI.AddToWorklist(Temp.getNode());
5453 break;
5454 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5455 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5456 Temp = DAG.getNOT(dl, N0, OpVT);
5457 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5458 if (!DCI.isCalledByLegalizer())
5459 DCI.AddToWorklist(Temp.getNode());
5460 break;
5461 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5462 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5463 Temp = DAG.getNOT(dl, N1, OpVT);
5464 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5465 break;
5466 }
5467 if (VT.getScalarType() != MVT::i1) {
5468 if (!DCI.isCalledByLegalizer())
5469 DCI.AddToWorklist(N0.getNode());
5470 // FIXME: If running after legalize, we probably can't do this.
5472 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5473 }
5474 return N0;
5475 }
5476
5477 // Could not fold it.
5478 return SDValue();
5479}
5480
5481/// Returns true (and the GlobalValue and the offset) if the node is a
5482/// GlobalAddress + offset.
5484 int64_t &Offset) const {
5485
5486 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5487
5488 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5489 GA = GASD->getGlobal();
5490 Offset += GASD->getOffset();
5491 return true;
5492 }
5493
5494 if (N->getOpcode() == ISD::ADD) {
5495 SDValue N1 = N->getOperand(0);
5496 SDValue N2 = N->getOperand(1);
5497 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5498 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5499 Offset += V->getSExtValue();
5500 return true;
5501 }
5502 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5503 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5504 Offset += V->getSExtValue();
5505 return true;
5506 }
5507 }
5508 }
5509
5510 return false;
5511}
5512
5514 DAGCombinerInfo &DCI) const {
5515 // Default implementation: no optimization.
5516 return SDValue();
5517}
5518
5519//===----------------------------------------------------------------------===//
5520// Inline Assembler Implementation Methods
5521//===----------------------------------------------------------------------===//
5522
5525 unsigned S = Constraint.size();
5526
5527 if (S == 1) {
5528 switch (Constraint[0]) {
5529 default: break;
5530 case 'r':
5531 return C_RegisterClass;
5532 case 'm': // memory
5533 case 'o': // offsetable
5534 case 'V': // not offsetable
5535 return C_Memory;
5536 case 'p': // Address.
5537 return C_Address;
5538 case 'n': // Simple Integer
5539 case 'E': // Floating Point Constant
5540 case 'F': // Floating Point Constant
5541 return C_Immediate;
5542 case 'i': // Simple Integer or Relocatable Constant
5543 case 's': // Relocatable Constant
5544 case 'X': // Allow ANY value.
5545 case 'I': // Target registers.
5546 case 'J':
5547 case 'K':
5548 case 'L':
5549 case 'M':
5550 case 'N':
5551 case 'O':
5552 case 'P':
5553 case '<':
5554 case '>':
5555 return C_Other;
5556 }
5557 }
5558
5559 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5560 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5561 return C_Memory;
5562 return C_Register;
5563 }
5564 return C_Unknown;
5565}
5566
5567/// Try to replace an X constraint, which matches anything, with another that
5568/// has more specific requirements based on the type of the corresponding
5569/// operand.
5570const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5571 if (ConstraintVT.isInteger())
5572 return "r";
5573 if (ConstraintVT.isFloatingPoint())
5574 return "f"; // works for many targets
5575 return nullptr;
5576}
5577
5579 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5580 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5581 return SDValue();
5582}
5583
5584/// Lower the specified operand into the Ops vector.
5585/// If it is invalid, don't add anything to Ops.
5587 StringRef Constraint,
5588 std::vector<SDValue> &Ops,
5589 SelectionDAG &DAG) const {
5590
5591 if (Constraint.size() > 1)
5592 return;
5593
5594 char ConstraintLetter = Constraint[0];
5595 switch (ConstraintLetter) {
5596 default: break;
5597 case 'X': // Allows any operand
5598 case 'i': // Simple Integer or Relocatable Constant
5599 case 'n': // Simple Integer
5600 case 's': { // Relocatable Constant
5601
5603 uint64_t Offset = 0;
5604
5605 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5606 // etc., since getelementpointer is variadic. We can't use
5607 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5608 // while in this case the GA may be furthest from the root node which is
5609 // likely an ISD::ADD.
5610 while (true) {
5611 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5612 // gcc prints these as sign extended. Sign extend value to 64 bits
5613 // now; without this it would get ZExt'd later in
5614 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5615 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5616 BooleanContent BCont = getBooleanContents(MVT::i64);
5617 ISD::NodeType ExtOpc =
5618 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5619 int64_t ExtVal =
5620 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5621 Ops.push_back(
5622 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5623 return;
5624 }
5625 if (ConstraintLetter != 'n') {
5626 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5627 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5628 GA->getValueType(0),
5629 Offset + GA->getOffset()));
5630 return;
5631 }
5632 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5633 Ops.push_back(DAG.getTargetBlockAddress(
5634 BA->getBlockAddress(), BA->getValueType(0),
5635 Offset + BA->getOffset(), BA->getTargetFlags()));
5636 return;
5637 }
5638 if (isa<BasicBlockSDNode>(Op)) {
5639 Ops.push_back(Op);
5640 return;
5641 }
5642 }
5643 const unsigned OpCode = Op.getOpcode();
5644 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5645 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5646 Op = Op.getOperand(1);
5647 // Subtraction is not commutative.
5648 else if (OpCode == ISD::ADD &&
5649 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5650 Op = Op.getOperand(0);
5651 else
5652 return;
5653 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5654 continue;
5655 }
5656 return;
5657 }
5658 break;
5659 }
5660 }
5661}
5662
5664 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5665}
5666
5667std::pair<unsigned, const TargetRegisterClass *>
5669 StringRef Constraint,
5670 MVT VT) const {
5671 if (!Constraint.starts_with("{"))
5672 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5673 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5674
5675 // Remove the braces from around the name.
5676 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5677
5678 std::pair<unsigned, const TargetRegisterClass *> R =
5679 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5680
5681 // Figure out which register class contains this reg.
5682 for (const TargetRegisterClass *RC : RI->regclasses()) {
5683 // If none of the value types for this register class are valid, we
5684 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5685 if (!isLegalRC(*RI, *RC))
5686 continue;
5687
5688 for (const MCPhysReg &PR : *RC) {
5689 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5690 std::pair<unsigned, const TargetRegisterClass *> S =
5691 std::make_pair(PR, RC);
5692
5693 // If this register class has the requested value type, return it,
5694 // otherwise keep searching and return the first class found
5695 // if no other is found which explicitly has the requested type.
5696 if (RI->isTypeLegalForClass(*RC, VT))
5697 return S;
5698 if (!R.second)
5699 R = S;
5700 }
5701 }
5702 }
5703
5704 return R;
5705}
5706
5707//===----------------------------------------------------------------------===//
5708// Constraint Selection.
5709
5710/// Return true of this is an input operand that is a matching constraint like
5711/// "4".
5713 assert(!ConstraintCode.empty() && "No known constraint!");
5714 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5715}
5716
5717/// If this is an input matching constraint, this method returns the output
5718/// operand it matches.
5720 assert(!ConstraintCode.empty() && "No known constraint!");
5721 return atoi(ConstraintCode.c_str());
5722}
5723
5724/// Split up the constraint string from the inline assembly value into the
5725/// specific constraints and their prefixes, and also tie in the associated
5726/// operand values.
5727/// If this returns an empty vector, and if the constraint string itself
5728/// isn't empty, there was an error parsing.
5731 const TargetRegisterInfo *TRI,
5732 const CallBase &Call) const {
5733 /// Information about all of the constraints.
5734 AsmOperandInfoVector ConstraintOperands;
5735 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5736 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5737
5738 // Do a prepass over the constraints, canonicalizing them, and building up the
5739 // ConstraintOperands list.
5740 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5741 unsigned ResNo = 0; // ResNo - The result number of the next output.
5742 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5743
5744 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5745 ConstraintOperands.emplace_back(std::move(CI));
5746 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5747
5748 // Update multiple alternative constraint count.
5749 if (OpInfo.multipleAlternatives.size() > maCount)
5750 maCount = OpInfo.multipleAlternatives.size();
5751
5752 OpInfo.ConstraintVT = MVT::Other;
5753
5754 // Compute the value type for each operand.
5755 switch (OpInfo.Type) {
5757 // Indirect outputs just consume an argument.
5758 if (OpInfo.isIndirect) {
5759 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5760 break;
5761 }
5762
5763 // The return value of the call is this value. As such, there is no
5764 // corresponding argument.
5765 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5766 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5767 OpInfo.ConstraintVT =
5768 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5769 .getSimpleVT();
5770 } else {
5771 assert(ResNo == 0 && "Asm only has one result!");
5772 OpInfo.ConstraintVT =
5773 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5774 }
5775 ++ResNo;
5776 break;
5777 case InlineAsm::isInput:
5778 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5779 break;
5780 case InlineAsm::isLabel:
5781 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5782 ++LabelNo;
5783 continue;
5785 // Nothing to do.
5786 break;
5787 }
5788
5789 if (OpInfo.CallOperandVal) {
5790 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5791 if (OpInfo.isIndirect) {
5792 OpTy = Call.getParamElementType(ArgNo);
5793 assert(OpTy && "Indirect operand must have elementtype attribute");
5794 }
5795
5796 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5797 if (StructType *STy = dyn_cast<StructType>(OpTy))
5798 if (STy->getNumElements() == 1)
5799 OpTy = STy->getElementType(0);
5800
5801 // If OpTy is not a single value, it may be a struct/union that we
5802 // can tile with integers.
5803 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5804 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5805 switch (BitSize) {
5806 default: break;
5807 case 1:
5808 case 8:
5809 case 16:
5810 case 32:
5811 case 64:
5812 case 128:
5813 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5814 break;
5815 }
5816 }
5817
5818 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5819 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5820 ArgNo++;
5821 }
5822 }
5823
5824 // If we have multiple alternative constraints, select the best alternative.
5825 if (!ConstraintOperands.empty()) {
5826 if (maCount) {
5827 unsigned bestMAIndex = 0;
5828 int bestWeight = -1;
5829 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5830 int weight = -1;
5831 unsigned maIndex;
5832 // Compute the sums of the weights for each alternative, keeping track
5833 // of the best (highest weight) one so far.
5834 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5835 int weightSum = 0;
5836 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5837 cIndex != eIndex; ++cIndex) {
5838 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5839 if (OpInfo.Type == InlineAsm::isClobber)
5840 continue;
5841
5842 // If this is an output operand with a matching input operand,
5843 // look up the matching input. If their types mismatch, e.g. one
5844 // is an integer, the other is floating point, or their sizes are
5845 // different, flag it as an maCantMatch.
5846 if (OpInfo.hasMatchingInput()) {
5847 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5848 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5849 if ((OpInfo.ConstraintVT.isInteger() !=
5850 Input.ConstraintVT.isInteger()) ||
5851 (OpInfo.ConstraintVT.getSizeInBits() !=
5852 Input.ConstraintVT.getSizeInBits())) {
5853 weightSum = -1; // Can't match.
5854 break;
5855 }
5856 }
5857 }
5858 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5859 if (weight == -1) {
5860 weightSum = -1;
5861 break;
5862 }
5863 weightSum += weight;
5864 }
5865 // Update best.
5866 if (weightSum > bestWeight) {
5867 bestWeight = weightSum;
5868 bestMAIndex = maIndex;
5869 }
5870 }
5871
5872 // Now select chosen alternative in each constraint.
5873 for (AsmOperandInfo &cInfo : ConstraintOperands)
5874 if (cInfo.Type != InlineAsm::isClobber)
5875 cInfo.selectAlternative(bestMAIndex);
5876 }
5877 }
5878
5879 // Check and hook up tied operands, choose constraint code to use.
5880 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5881 cIndex != eIndex; ++cIndex) {
5882 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5883
5884 // If this is an output operand with a matching input operand, look up the
5885 // matching input. If their types mismatch, e.g. one is an integer, the
5886 // other is floating point, or their sizes are different, flag it as an
5887 // error.
5888 if (OpInfo.hasMatchingInput()) {
5889 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5890
5891 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5892 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5893 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5894 OpInfo.ConstraintVT);
5895 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5896 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5897 Input.ConstraintVT);
5898 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5900 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5902 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5903 (MatchRC.second != InputRC.second)) {
5904 report_fatal_error("Unsupported asm: input constraint"
5905 " with a matching output constraint of"
5906 " incompatible type!");
5907 }
5908 }
5909 }
5910 }
5911
5912 return ConstraintOperands;
5913}
5914
5915/// Return a number indicating our preference for chosing a type of constraint
5916/// over another, for the purpose of sorting them. Immediates are almost always
5917/// preferrable (when they can be emitted). A higher return value means a
5918/// stronger preference for one constraint type relative to another.
5919/// FIXME: We should prefer registers over memory but doing so may lead to
5920/// unrecoverable register exhaustion later.
5921/// https://github.com/llvm/llvm-project/issues/20571
5923 switch (CT) {
5926 return 4;
5929 return 3;
5931 return 2;
5933 return 1;
5935 return 0;
5936 }
5937 llvm_unreachable("Invalid constraint type");
5938}
5939
5940/// Examine constraint type and operand type and determine a weight value.
5941/// This object must already have been set up with the operand type
5942/// and the current alternative constraint selected.
5945 AsmOperandInfo &info, int maIndex) const {
5947 if (maIndex >= (int)info.multipleAlternatives.size())
5948 rCodes = &info.Codes;
5949 else
5950 rCodes = &info.multipleAlternatives[maIndex].Codes;
5951 ConstraintWeight BestWeight = CW_Invalid;
5952
5953 // Loop over the options, keeping track of the most general one.
5954 for (const std::string &rCode : *rCodes) {
5955 ConstraintWeight weight =
5956 getSingleConstraintMatchWeight(info, rCode.c_str());
5957 if (weight > BestWeight)
5958 BestWeight = weight;
5959 }
5960
5961 return BestWeight;
5962}
5963
5964/// Examine constraint type and operand type and determine a weight value.
5965/// This object must already have been set up with the operand type
5966/// and the current alternative constraint selected.
5969 AsmOperandInfo &info, const char *constraint) const {
5970 ConstraintWeight weight = CW_Invalid;
5971 Value *CallOperandVal = info.CallOperandVal;
5972 // If we don't have a value, we can't do a match,
5973 // but allow it at the lowest weight.
5974 if (!CallOperandVal)
5975 return CW_Default;
5976 // Look at the constraint type.
5977 switch (*constraint) {
5978 case 'i': // immediate integer.
5979 case 'n': // immediate integer with a known value.
5980 if (isa<ConstantInt>(CallOperandVal))
5981 weight = CW_Constant;
5982 break;
5983 case 's': // non-explicit intregal immediate.
5984 if (isa<GlobalValue>(CallOperandVal))
5985 weight = CW_Constant;
5986 break;
5987 case 'E': // immediate float if host format.
5988 case 'F': // immediate float.
5989 if (isa<ConstantFP>(CallOperandVal))
5990 weight = CW_Constant;
5991 break;
5992 case '<': // memory operand with autodecrement.
5993 case '>': // memory operand with autoincrement.
5994 case 'm': // memory operand.
5995 case 'o': // offsettable memory operand
5996 case 'V': // non-offsettable memory operand
5997 weight = CW_Memory;
5998 break;
5999 case 'r': // general register.
6000 case 'g': // general register, memory operand or immediate integer.
6001 // note: Clang converts "g" to "imr".
6002 if (CallOperandVal->getType()->isIntegerTy())
6003 weight = CW_Register;
6004 break;
6005 case 'X': // any operand.
6006 default:
6007 weight = CW_Default;
6008 break;
6009 }
6010 return weight;
6011}
6012
6013/// If there are multiple different constraints that we could pick for this
6014/// operand (e.g. "imr") try to pick the 'best' one.
6015/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6016/// into seven classes:
6017/// Register -> one specific register
6018/// RegisterClass -> a group of regs
6019/// Memory -> memory
6020/// Address -> a symbolic memory reference
6021/// Immediate -> immediate values
6022/// Other -> magic values (such as "Flag Output Operands")
6023/// Unknown -> something we don't recognize yet and can't handle
6024/// Ideally, we would pick the most specific constraint possible: if we have
6025/// something that fits into a register, we would pick it. The problem here
6026/// is that if we have something that could either be in a register or in
6027/// memory that use of the register could cause selection of *other*
6028/// operands to fail: they might only succeed if we pick memory. Because of
6029/// this the heuristic we use is:
6030///
6031/// 1) If there is an 'other' constraint, and if the operand is valid for
6032/// that constraint, use it. This makes us take advantage of 'i'
6033/// constraints when available.
6034/// 2) Otherwise, pick the most general constraint present. This prefers
6035/// 'm' over 'r', for example.
6036///
6038 TargetLowering::AsmOperandInfo &OpInfo) const {
6039 ConstraintGroup Ret;
6040
6041 Ret.reserve(OpInfo.Codes.size());
6042 for (StringRef Code : OpInfo.Codes) {
6043 TargetLowering::ConstraintType CType = getConstraintType(Code);
6044
6045 // Indirect 'other' or 'immediate' constraints are not allowed.
6046 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6047 CType == TargetLowering::C_Register ||
6049 continue;
6050
6051 // Things with matching constraints can only be registers, per gcc
6052 // documentation. This mainly affects "g" constraints.
6053 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6054 continue;
6055
6056 Ret.emplace_back(Code, CType);
6057 }
6058
6059 std::stable_sort(
6060 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6061 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6062 });
6063
6064 return Ret;
6065}
6066
6067/// If we have an immediate, see if we can lower it. Return true if we can,
6068/// false otherwise.
6070 SDValue Op, SelectionDAG *DAG,
6071 const TargetLowering &TLI) {
6072
6073 assert((P.second == TargetLowering::C_Other ||
6074 P.second == TargetLowering::C_Immediate) &&
6075 "need immediate or other");
6076
6077 if (!Op.getNode())
6078 return false;
6079
6080 std::vector<SDValue> ResultOps;
6081 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6082 return !ResultOps.empty();
6083}
6084
6085/// Determines the constraint code and constraint type to use for the specific
6086/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6088 SDValue Op,
6089 SelectionDAG *DAG) const {
6090 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6091
6092 // Single-letter constraints ('r') are very common.
6093 if (OpInfo.Codes.size() == 1) {
6094 OpInfo.ConstraintCode = OpInfo.Codes[0];
6095 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6096 } else {
6097 ConstraintGroup G = getConstraintPreferences(OpInfo);
6098 if (G.empty())
6099 return;
6100
6101 unsigned BestIdx = 0;
6102 for (const unsigned E = G.size();
6103 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6104 G[BestIdx].second == TargetLowering::C_Immediate);
6105 ++BestIdx) {
6106 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6107 break;
6108 // If we're out of constraints, just pick the first one.
6109 if (BestIdx + 1 == E) {
6110 BestIdx = 0;
6111 break;
6112 }
6113 }
6114
6115 OpInfo.ConstraintCode = G[BestIdx].first;
6116 OpInfo.ConstraintType = G[BestIdx].second;
6117 }
6118
6119 // 'X' matches anything.
6120 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6121 // Constants are handled elsewhere. For Functions, the type here is the
6122 // type of the result, which is not what we want to look at; leave them
6123 // alone.
6124 Value *v = OpInfo.CallOperandVal;
6125 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6126 return;
6127 }
6128
6129 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6130 OpInfo.ConstraintCode = "i";
6131 return;
6132 }
6133
6134 // Otherwise, try to resolve it to something we know about by looking at
6135 // the actual operand type.
6136 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6137 OpInfo.ConstraintCode = Repl;
6138 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6139 }
6140 }
6141}
6142
6143/// Given an exact SDIV by a constant, create a multiplication
6144/// with the multiplicative inverse of the constant.
6145/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6147 const SDLoc &dl, SelectionDAG &DAG,
6148 SmallVectorImpl<SDNode *> &Created) {
6149 SDValue Op0 = N->getOperand(0);
6150 SDValue Op1 = N->getOperand(1);
6151 EVT VT = N->getValueType(0);
6152 EVT SVT = VT.getScalarType();
6153 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6154 EVT ShSVT = ShVT.getScalarType();
6155
6156 bool UseSRA = false;
6157 SmallVector<SDValue, 16> Shifts, Factors;
6158
6159 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6160 if (C->isZero())
6161 return false;
6162 APInt Divisor = C->getAPIntValue();
6163 unsigned Shift = Divisor.countr_zero();
6164 if (Shift) {
6165 Divisor.ashrInPlace(Shift);
6166 UseSRA = true;
6167 }
6168 APInt Factor = Divisor.multiplicativeInverse();
6169 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6170 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6171 return true;
6172 };
6173
6174 // Collect all magic values from the build vector.
6175 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6176 return SDValue();
6177
6178 SDValue Shift, Factor;
6179 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6180 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6181 Factor = DAG.getBuildVector(VT, dl, Factors);
6182 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6183 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6184 "Expected matchUnaryPredicate to return one element for scalable "
6185 "vectors");
6186 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6187 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6188 } else {
6189 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6190 Shift = Shifts[0];
6191 Factor = Factors[0];
6192 }
6193
6194 SDValue Res = Op0;
6195 if (UseSRA) {
6196 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6197 Created.push_back(Res.getNode());
6198 }
6199
6200 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6201}
6202
6203/// Given an exact UDIV by a constant, create a multiplication
6204/// with the multiplicative inverse of the constant.
6205/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6207 const SDLoc &dl, SelectionDAG &DAG,
6208 SmallVectorImpl<SDNode *> &Created) {
6209 EVT VT = N->getValueType(0);
6210 EVT SVT = VT.getScalarType();
6211 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6212 EVT ShSVT = ShVT.getScalarType();
6213
6214 bool UseSRL = false;
6215 SmallVector<SDValue, 16> Shifts, Factors;
6216
6217 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6218 if (C->isZero())
6219 return false;
6220 APInt Divisor = C->getAPIntValue();
6221 unsigned Shift = Divisor.countr_zero();
6222 if (Shift) {
6223 Divisor.lshrInPlace(Shift);
6224 UseSRL = true;
6225 }
6226 // Calculate the multiplicative inverse modulo BW.
6227 APInt Factor = Divisor.multiplicativeInverse();
6228 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6229 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6230 return true;
6231 };
6232
6233 SDValue Op1 = N->getOperand(1);
6234
6235 // Collect all magic values from the build vector.
6236 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6237 return SDValue();
6238
6239 SDValue Shift, Factor;
6240 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6241 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6242 Factor = DAG.getBuildVector(VT, dl, Factors);
6243 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6244 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6245 "Expected matchUnaryPredicate to return one element for scalable "
6246 "vectors");
6247 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6248 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6249 } else {
6250 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6251 Shift = Shifts[0];
6252 Factor = Factors[0];
6253 }
6254
6255 SDValue Res = N->getOperand(0);
6256 if (UseSRL) {
6257 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6258 Created.push_back(Res.getNode());
6259 }
6260
6261 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6262}
6263
6265 SelectionDAG &DAG,
6266 SmallVectorImpl<SDNode *> &Created) const {
6268 if (isIntDivCheap(N->getValueType(0), Attr))
6269 return SDValue(N, 0); // Lower SDIV as SDIV
6270 return SDValue();
6271}
6272
6273SDValue
6275 SelectionDAG &DAG,
6276 SmallVectorImpl<SDNode *> &Created) const {
6278 if (isIntDivCheap(N->getValueType(0), Attr))
6279 return SDValue(N, 0); // Lower SREM as SREM
6280 return SDValue();
6281}
6282
6283/// Build sdiv by power-of-2 with conditional move instructions
6284/// Ref: "Hacker's Delight" by Henry Warren 10-1
6285/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6286/// bgez x, label
6287/// add x, x, 2**k-1
6288/// label:
6289/// sra res, x, k
6290/// neg res, res (when the divisor is negative)
6292 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6293 SmallVectorImpl<SDNode *> &Created) const {
6294 unsigned Lg2 = Divisor.countr_zero();
6295 EVT VT = N->getValueType(0);
6296
6297 SDLoc DL(N);
6298 SDValue N0 = N->getOperand(0);
6299 SDValue Zero = DAG.getConstant(0, DL, VT);
6300 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6301 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6302
6303 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6304 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6305 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6306 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6307 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6308
6309 Created.push_back(Cmp.getNode());
6310 Created.push_back(Add.getNode());
6311 Created.push_back(CMov.getNode());
6312
6313 // Divide by pow2.
6314 SDValue SRA =
6315 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6316
6317 // If we're dividing by a positive value, we're done. Otherwise, we must
6318 // negate the result.
6319 if (Divisor.isNonNegative())
6320 return SRA;
6321
6322 Created.push_back(SRA.getNode());
6323 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6324}
6325
6326/// Given an ISD::SDIV node expressing a divide by constant,
6327/// return a DAG expression to select that will generate the same value by
6328/// multiplying by a magic number.
6329/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6331 bool IsAfterLegalization,
6332 bool IsAfterLegalTypes,
6333 SmallVectorImpl<SDNode *> &Created) const {
6334 SDLoc dl(N);
6335 EVT VT = N->getValueType(0);
6336 EVT SVT = VT.getScalarType();
6337 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6338 EVT ShSVT = ShVT.getScalarType();
6339 unsigned EltBits = VT.getScalarSizeInBits();
6340 EVT MulVT;
6341
6342 // Check to see if we can do this.
6343 // FIXME: We should be more aggressive here.
6344 if (!isTypeLegal(VT)) {
6345 // Limit this to simple scalars for now.
6346 if (VT.isVector() || !VT.isSimple())
6347 return SDValue();
6348
6349 // If this type will be promoted to a large enough type with a legal
6350 // multiply operation, we can go ahead and do this transform.
6352 return SDValue();
6353
6354 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6355 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6356 !isOperationLegal(ISD::MUL, MulVT))
6357 return SDValue();
6358 }
6359
6360 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6361 if (N->getFlags().hasExact())
6362 return BuildExactSDIV(*this, N, dl, DAG, Created);
6363
6364 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6365
6366 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6367 if (C->isZero())
6368 return false;
6369
6370 const APInt &Divisor = C->getAPIntValue();
6372 int NumeratorFactor = 0;
6373 int ShiftMask = -1;
6374
6375 if (Divisor.isOne() || Divisor.isAllOnes()) {
6376 // If d is +1/-1, we just multiply the numerator by +1/-1.
6377 NumeratorFactor = Divisor.getSExtValue();
6378 magics.Magic = 0;
6379 magics.ShiftAmount = 0;
6380 ShiftMask = 0;
6381 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6382 // If d > 0 and m < 0, add the numerator.
6383 NumeratorFactor = 1;
6384 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6385 // If d < 0 and m > 0, subtract the numerator.
6386 NumeratorFactor = -1;
6387 }
6388
6389 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6390 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6391 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6392 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6393 return true;
6394 };
6395
6396 SDValue N0 = N->getOperand(0);
6397 SDValue N1 = N->getOperand(1);
6398
6399 // Collect the shifts / magic values from each element.
6400 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6401 return SDValue();
6402
6403 SDValue MagicFactor, Factor, Shift, ShiftMask;
6404 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6405 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6406 Factor = DAG.getBuildVector(VT, dl, Factors);
6407 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6408 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6409 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6410 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6411 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6412 "Expected matchUnaryPredicate to return one element for scalable "
6413 "vectors");
6414 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6415 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6416 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6417 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6418 } else {
6419 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6420 MagicFactor = MagicFactors[0];
6421 Factor = Factors[0];
6422 Shift = Shifts[0];
6423 ShiftMask = ShiftMasks[0];
6424 }
6425
6426 // Multiply the numerator (operand 0) by the magic value.
6427 // FIXME: We should support doing a MUL in a wider type.
6428 auto GetMULHS = [&](SDValue X, SDValue Y) {
6429 // If the type isn't legal, use a wider mul of the type calculated
6430 // earlier.
6431 if (!isTypeLegal(VT)) {
6432 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6433 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6434 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6435 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6436 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6437 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6438 }
6439
6440 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6441 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6442 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6443 SDValue LoHi =
6444 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6445 return SDValue(LoHi.getNode(), 1);
6446 }
6447 // If type twice as wide legal, widen and use a mul plus a shift.
6448 unsigned Size = VT.getScalarSizeInBits();
6449 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6450 if (VT.isVector())
6451 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6453 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6454 // custom lowered. This is very expensive so avoid it at all costs for
6455 // constant divisors.
6456 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6459 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6460 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6461 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6462 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6463 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6464 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6465 }
6466 return SDValue();
6467 };
6468
6469 SDValue Q = GetMULHS(N0, MagicFactor);
6470 if (!Q)
6471 return SDValue();
6472
6473 Created.push_back(Q.getNode());
6474
6475 // (Optionally) Add/subtract the numerator using Factor.
6476 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6477 Created.push_back(Factor.getNode());
6478 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6479 Created.push_back(Q.getNode());
6480
6481 // Shift right algebraic by shift value.
6482 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6483 Created.push_back(Q.getNode());
6484
6485 // Extract the sign bit, mask it and add it to the quotient.
6486 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6487 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6488 Created.push_back(T.getNode());
6489 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6490 Created.push_back(T.getNode());
6491 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6492}
6493
6494/// Given an ISD::UDIV node expressing a divide by constant,
6495/// return a DAG expression to select that will generate the same value by
6496/// multiplying by a magic number.
6497/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6499 bool IsAfterLegalization,
6500 bool IsAfterLegalTypes,
6501 SmallVectorImpl<SDNode *> &Created) const {
6502 SDLoc dl(N);
6503 EVT VT = N->getValueType(0);
6504 EVT SVT = VT.getScalarType();
6505 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6506 EVT ShSVT = ShVT.getScalarType();
6507 unsigned EltBits = VT.getScalarSizeInBits();
6508 EVT MulVT;
6509
6510 // Check to see if we can do this.
6511 // FIXME: We should be more aggressive here.
6512 if (!isTypeLegal(VT)) {
6513 // Limit this to simple scalars for now.
6514 if (VT.isVector() || !VT.isSimple())
6515 return SDValue();
6516
6517 // If this type will be promoted to a large enough type with a legal
6518 // multiply operation, we can go ahead and do this transform.
6520 return SDValue();
6521
6522 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6523 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6524 !isOperationLegal(ISD::MUL, MulVT))
6525 return SDValue();
6526 }
6527
6528 // If the udiv has an 'exact' bit we can use a simpler lowering.
6529 if (N->getFlags().hasExact())
6530 return BuildExactUDIV(*this, N, dl, DAG, Created);
6531
6532 SDValue N0 = N->getOperand(0);
6533 SDValue N1 = N->getOperand(1);
6534
6535 // Try to use leading zeros of the dividend to reduce the multiplier and
6536 // avoid expensive fixups.
6537 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6538
6539 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6540 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6541
6542 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6543 if (C->isZero())
6544 return false;
6545 const APInt& Divisor = C->getAPIntValue();
6546
6547 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6548
6549 // Magic algorithm doesn't work for division by 1. We need to emit a select
6550 // at the end.
6551 if (Divisor.isOne()) {
6552 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6553 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6554 } else {
6557 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6558
6559 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6560
6561 assert(magics.PreShift < Divisor.getBitWidth() &&
6562 "We shouldn't generate an undefined shift!");
6563 assert(magics.PostShift < Divisor.getBitWidth() &&
6564 "We shouldn't generate an undefined shift!");
6565 assert((!magics.IsAdd || magics.PreShift == 0) &&
6566 "Unexpected pre-shift");
6567 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6568 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6569 NPQFactor = DAG.getConstant(
6570 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6571 : APInt::getZero(EltBits),
6572 dl, SVT);
6573 UseNPQ |= magics.IsAdd;
6574 UsePreShift |= magics.PreShift != 0;
6575 UsePostShift |= magics.PostShift != 0;
6576 }
6577
6578 PreShifts.push_back(PreShift);
6579 MagicFactors.push_back(MagicFactor);
6580 NPQFactors.push_back(NPQFactor);
6581 PostShifts.push_back(PostShift);
6582 return true;
6583 };
6584
6585 // Collect the shifts/magic values from each element.
6586 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6587 return SDValue();
6588
6589 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6590 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6591 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6592 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6593 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6594 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6595 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6596 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6597 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6598 "Expected matchUnaryPredicate to return one for scalable vectors");
6599 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6600 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6601 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6602 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6603 } else {
6604 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6605 PreShift = PreShifts[0];
6606 MagicFactor = MagicFactors[0];
6607 PostShift = PostShifts[0];
6608 }
6609
6610 SDValue Q = N0;
6611 if (UsePreShift) {
6612 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6613 Created.push_back(Q.getNode());
6614 }
6615
6616 // FIXME: We should support doing a MUL in a wider type.
6617 auto GetMULHU = [&](SDValue X, SDValue Y) {
6618 // If the type isn't legal, use a wider mul of the type calculated
6619 // earlier.
6620 if (!isTypeLegal(VT)) {
6621 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6622 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6623 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6624 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6625 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6626 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6627 }
6628
6629 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6630 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6631 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6632 SDValue LoHi =
6633 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6634 return SDValue(LoHi.getNode(), 1);
6635 }
6636 // If type twice as wide legal, widen and use a mul plus a shift.
6637 unsigned Size = VT.getScalarSizeInBits();
6638 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6639 if (VT.isVector())
6640 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6642 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6643 // custom lowered. This is very expensive so avoid it at all costs for
6644 // constant divisors.
6645 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6648 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6649 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6650 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6651 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6652 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6653 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6654 }
6655 return SDValue(); // No mulhu or equivalent
6656 };
6657
6658 // Multiply the numerator (operand 0) by the magic value.
6659 Q = GetMULHU(Q, MagicFactor);
6660 if (!Q)
6661 return SDValue();
6662
6663 Created.push_back(Q.getNode());
6664
6665 if (UseNPQ) {
6666 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6667 Created.push_back(NPQ.getNode());
6668
6669 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6670 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6671 if (VT.isVector())
6672 NPQ = GetMULHU(NPQ, NPQFactor);
6673 else
6674 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6675
6676 Created.push_back(NPQ.getNode());
6677
6678 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6679 Created.push_back(Q.getNode());
6680 }
6681
6682 if (UsePostShift) {
6683 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6684 Created.push_back(Q.getNode());
6685 }
6686
6687 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6688
6689 SDValue One = DAG.getConstant(1, dl, VT);
6690 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6691 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6692}
6693
6694/// If all values in Values that *don't* match the predicate are same 'splat'
6695/// value, then replace all values with that splat value.
6696/// Else, if AlternativeReplacement was provided, then replace all values that
6697/// do match predicate with AlternativeReplacement value.
6698static void
6700 std::function<bool(SDValue)> Predicate,
6701 SDValue AlternativeReplacement = SDValue()) {
6702 SDValue Replacement;
6703 // Is there a value for which the Predicate does *NOT* match? What is it?
6704 auto SplatValue = llvm::find_if_not(Values, Predicate);
6705 if (SplatValue != Values.end()) {
6706 // Does Values consist only of SplatValue's and values matching Predicate?
6707 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6708 return Value == *SplatValue || Predicate(Value);
6709 })) // Then we shall replace values matching predicate with SplatValue.
6710 Replacement = *SplatValue;
6711 }
6712 if (!Replacement) {
6713 // Oops, we did not find the "baseline" splat value.
6714 if (!AlternativeReplacement)
6715 return; // Nothing to do.
6716 // Let's replace with provided value then.
6717 Replacement = AlternativeReplacement;
6718 }
6719 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6720}
6721
6722/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6723/// where the divisor is constant and the comparison target is zero,
6724/// return a DAG expression that will generate the same comparison result
6725/// using only multiplications, additions and shifts/rotations.
6726/// Ref: "Hacker's Delight" 10-17.
6727SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6728 SDValue CompTargetNode,
6730 DAGCombinerInfo &DCI,
6731 const SDLoc &DL) const {
6733 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6734 DCI, DL, Built)) {
6735 for (SDNode *N : Built)
6736 DCI.AddToWorklist(N);
6737 return Folded;
6738 }
6739
6740 return SDValue();
6741}
6742
6743SDValue
6744TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6745 SDValue CompTargetNode, ISD::CondCode Cond,
6746 DAGCombinerInfo &DCI, const SDLoc &DL,
6747 SmallVectorImpl<SDNode *> &Created) const {
6748 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6749 // - D must be constant, with D = D0 * 2^K where D0 is odd
6750 // - P is the multiplicative inverse of D0 modulo 2^W
6751 // - Q = floor(((2^W) - 1) / D)
6752 // where W is the width of the common type of N and D.
6753 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6754 "Only applicable for (in)equality comparisons.");
6755
6756 SelectionDAG &DAG = DCI.DAG;
6757
6758 EVT VT = REMNode.getValueType();
6759 EVT SVT = VT.getScalarType();
6760 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6761 EVT ShSVT = ShVT.getScalarType();
6762
6763 // If MUL is unavailable, we cannot proceed in any case.
6764 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6765 return SDValue();
6766
6767 bool ComparingWithAllZeros = true;
6768 bool AllComparisonsWithNonZerosAreTautological = true;
6769 bool HadTautologicalLanes = false;
6770 bool AllLanesAreTautological = true;
6771 bool HadEvenDivisor = false;
6772 bool AllDivisorsArePowerOfTwo = true;
6773 bool HadTautologicalInvertedLanes = false;
6774 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6775
6776 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6777 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6778 if (CDiv->isZero())
6779 return false;
6780
6781 const APInt &D = CDiv->getAPIntValue();
6782 const APInt &Cmp = CCmp->getAPIntValue();
6783
6784 ComparingWithAllZeros &= Cmp.isZero();
6785
6786 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6787 // if C2 is not less than C1, the comparison is always false.
6788 // But we will only be able to produce the comparison that will give the
6789 // opposive tautological answer. So this lane would need to be fixed up.
6790 bool TautologicalInvertedLane = D.ule(Cmp);
6791 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6792
6793 // If all lanes are tautological (either all divisors are ones, or divisor
6794 // is not greater than the constant we are comparing with),
6795 // we will prefer to avoid the fold.
6796 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6797 HadTautologicalLanes |= TautologicalLane;
6798 AllLanesAreTautological &= TautologicalLane;
6799
6800 // If we are comparing with non-zero, we need'll need to subtract said
6801 // comparison value from the LHS. But there is no point in doing that if
6802 // every lane where we are comparing with non-zero is tautological..
6803 if (!Cmp.isZero())
6804 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6805
6806 // Decompose D into D0 * 2^K
6807 unsigned K = D.countr_zero();
6808 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6809 APInt D0 = D.lshr(K);
6810
6811 // D is even if it has trailing zeros.
6812 HadEvenDivisor |= (K != 0);
6813 // D is a power-of-two if D0 is one.
6814 // If all divisors are power-of-two, we will prefer to avoid the fold.
6815 AllDivisorsArePowerOfTwo &= D0.isOne();
6816
6817 // P = inv(D0, 2^W)
6818 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6819 unsigned W = D.getBitWidth();
6821 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6822
6823 // Q = floor((2^W - 1) u/ D)
6824 // R = ((2^W - 1) u% D)
6825 APInt Q, R;
6827
6828 // If we are comparing with zero, then that comparison constant is okay,
6829 // else it may need to be one less than that.
6830 if (Cmp.ugt(R))
6831 Q -= 1;
6832
6834 "We are expecting that K is always less than all-ones for ShSVT");
6835
6836 // If the lane is tautological the result can be constant-folded.
6837 if (TautologicalLane) {
6838 // Set P and K amount to a bogus values so we can try to splat them.
6839 P = 0;
6840 K = -1;
6841 // And ensure that comparison constant is tautological,
6842 // it will always compare true/false.
6843 Q = -1;
6844 }
6845
6846 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6847 KAmts.push_back(
6848 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6849 /*implicitTrunc=*/true),
6850 DL, ShSVT));
6851 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6852 return true;
6853 };
6854
6855 SDValue N = REMNode.getOperand(0);
6856 SDValue D = REMNode.getOperand(1);
6857
6858 // Collect the values from each element.
6859 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6860 return SDValue();
6861
6862 // If all lanes are tautological, the result can be constant-folded.
6863 if (AllLanesAreTautological)
6864 return SDValue();
6865
6866 // If this is a urem by a powers-of-two, avoid the fold since it can be
6867 // best implemented as a bit test.
6868 if (AllDivisorsArePowerOfTwo)
6869 return SDValue();
6870
6871 SDValue PVal, KVal, QVal;
6872 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6873 if (HadTautologicalLanes) {
6874 // Try to turn PAmts into a splat, since we don't care about the values
6875 // that are currently '0'. If we can't, just keep '0'`s.
6877 // Try to turn KAmts into a splat, since we don't care about the values
6878 // that are currently '-1'. If we can't, change them to '0'`s.
6880 DAG.getConstant(0, DL, ShSVT));
6881 }
6882
6883 PVal = DAG.getBuildVector(VT, DL, PAmts);
6884 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6885 QVal = DAG.getBuildVector(VT, DL, QAmts);
6886 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6887 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6888 "Expected matchBinaryPredicate to return one element for "
6889 "SPLAT_VECTORs");
6890 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6891 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6892 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6893 } else {
6894 PVal = PAmts[0];
6895 KVal = KAmts[0];
6896 QVal = QAmts[0];
6897 }
6898
6899 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6900 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6901 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6902 assert(CompTargetNode.getValueType() == N.getValueType() &&
6903 "Expecting that the types on LHS and RHS of comparisons match.");
6904 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6905 }
6906
6907 // (mul N, P)
6908 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6909 Created.push_back(Op0.getNode());
6910
6911 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6912 // divisors as a performance improvement, since rotating by 0 is a no-op.
6913 if (HadEvenDivisor) {
6914 // We need ROTR to do this.
6915 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6916 return SDValue();
6917 // UREM: (rotr (mul N, P), K)
6918 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6919 Created.push_back(Op0.getNode());
6920 }
6921
6922 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6923 SDValue NewCC =
6924 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6926 if (!HadTautologicalInvertedLanes)
6927 return NewCC;
6928
6929 // If any lanes previously compared always-false, the NewCC will give
6930 // always-true result for them, so we need to fixup those lanes.
6931 // Or the other way around for inequality predicate.
6932 assert(VT.isVector() && "Can/should only get here for vectors.");
6933 Created.push_back(NewCC.getNode());
6934
6935 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6936 // if C2 is not less than C1, the comparison is always false.
6937 // But we have produced the comparison that will give the
6938 // opposive tautological answer. So these lanes would need to be fixed up.
6939 SDValue TautologicalInvertedChannels =
6940 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6941 Created.push_back(TautologicalInvertedChannels.getNode());
6942
6943 // NOTE: we avoid letting illegal types through even if we're before legalize
6944 // ops – legalization has a hard time producing good code for this.
6945 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6946 // If we have a vector select, let's replace the comparison results in the
6947 // affected lanes with the correct tautological result.
6948 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6949 DL, SETCCVT, SETCCVT);
6950 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6951 Replacement, NewCC);
6952 }
6953
6954 // Else, we can just invert the comparison result in the appropriate lanes.
6955 //
6956 // NOTE: see the note above VSELECT above.
6957 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6958 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6959 TautologicalInvertedChannels);
6960
6961 return SDValue(); // Don't know how to lower.
6962}
6963
6964/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6965/// where the divisor is constant and the comparison target is zero,
6966/// return a DAG expression that will generate the same comparison result
6967/// using only multiplications, additions and shifts/rotations.
6968/// Ref: "Hacker's Delight" 10-17.
6969SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6970 SDValue CompTargetNode,
6972 DAGCombinerInfo &DCI,
6973 const SDLoc &DL) const {
6975 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6976 DCI, DL, Built)) {
6977 assert(Built.size() <= 7 && "Max size prediction failed.");
6978 for (SDNode *N : Built)
6979 DCI.AddToWorklist(N);
6980 return Folded;
6981 }
6982
6983 return SDValue();
6984}
6985
6986SDValue
6987TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6988 SDValue CompTargetNode, ISD::CondCode Cond,
6989 DAGCombinerInfo &DCI, const SDLoc &DL,
6990 SmallVectorImpl<SDNode *> &Created) const {
6991 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6992 // Fold:
6993 // (seteq/ne (srem N, D), 0)
6994 // To:
6995 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6996 //
6997 // - D must be constant, with D = D0 * 2^K where D0 is odd
6998 // - P is the multiplicative inverse of D0 modulo 2^W
6999 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7000 // - Q = floor((2 * A) / (2^K))
7001 // where W is the width of the common type of N and D.
7002 //
7003 // When D is a power of two (and thus D0 is 1), the normal
7004 // formula for A and Q don't apply, because the derivation
7005 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7006 // does not apply. This specifically fails when N = INT_MIN.
7007 //
7008 // Instead, for power-of-two D, we use:
7009 // - A = 2^(W-1)
7010 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7011 // - Q = 2^(W-K) - 1
7012 // |-> Test that the top K bits are zero after rotation
7013 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7014 "Only applicable for (in)equality comparisons.");
7015
7016 SelectionDAG &DAG = DCI.DAG;
7017
7018 EVT VT = REMNode.getValueType();
7019 EVT SVT = VT.getScalarType();
7020 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7021 EVT ShSVT = ShVT.getScalarType();
7022
7023 // If we are after ops legalization, and MUL is unavailable, we can not
7024 // proceed.
7025 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7026 return SDValue();
7027
7028 // TODO: Could support comparing with non-zero too.
7029 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7030 if (!CompTarget || !CompTarget->isZero())
7031 return SDValue();
7032
7033 bool HadIntMinDivisor = false;
7034 bool HadOneDivisor = false;
7035 bool AllDivisorsAreOnes = true;
7036 bool HadEvenDivisor = false;
7037 bool NeedToApplyOffset = false;
7038 bool AllDivisorsArePowerOfTwo = true;
7039 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7040
7041 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7042 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7043 if (C->isZero())
7044 return false;
7045
7046 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7047
7048 // WARNING: this fold is only valid for positive divisors!
7049 APInt D = C->getAPIntValue();
7050 if (D.isNegative())
7051 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7052
7053 HadIntMinDivisor |= D.isMinSignedValue();
7054
7055 // If all divisors are ones, we will prefer to avoid the fold.
7056 HadOneDivisor |= D.isOne();
7057 AllDivisorsAreOnes &= D.isOne();
7058
7059 // Decompose D into D0 * 2^K
7060 unsigned K = D.countr_zero();
7061 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7062 APInt D0 = D.lshr(K);
7063
7064 if (!D.isMinSignedValue()) {
7065 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7066 // we don't care about this lane in this fold, we'll special-handle it.
7067 HadEvenDivisor |= (K != 0);
7068 }
7069
7070 // D is a power-of-two if D0 is one. This includes INT_MIN.
7071 // If all divisors are power-of-two, we will prefer to avoid the fold.
7072 AllDivisorsArePowerOfTwo &= D0.isOne();
7073
7074 // P = inv(D0, 2^W)
7075 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7076 unsigned W = D.getBitWidth();
7078 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7079
7080 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7082 A.clearLowBits(K);
7083
7084 if (!D.isMinSignedValue()) {
7085 // If divisor INT_MIN, then we don't care about this lane in this fold,
7086 // we'll special-handle it.
7087 NeedToApplyOffset |= A != 0;
7088 }
7089
7090 // Q = floor((2 * A) / (2^K))
7091 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7092
7094 "We are expecting that A is always less than all-ones for SVT");
7096 "We are expecting that K is always less than all-ones for ShSVT");
7097
7098 // If D was a power of two, apply the alternate constant derivation.
7099 if (D0.isOne()) {
7100 // A = 2^(W-1)
7102 // - Q = 2^(W-K) - 1
7103 Q = APInt::getAllOnes(W - K).zext(W);
7104 }
7105
7106 // If the divisor is 1 the result can be constant-folded. Likewise, we
7107 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7108 if (D.isOne()) {
7109 // Set P, A and K to a bogus values so we can try to splat them.
7110 P = 0;
7111 A = -1;
7112 K = -1;
7113
7114 // x ?% 1 == 0 <--> true <--> x u<= -1
7115 Q = -1;
7116 }
7117
7118 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7119 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7120 KAmts.push_back(
7121 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7122 /*implicitTrunc=*/true),
7123 DL, ShSVT));
7124 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7125 return true;
7126 };
7127
7128 SDValue N = REMNode.getOperand(0);
7129 SDValue D = REMNode.getOperand(1);
7130
7131 // Collect the values from each element.
7132 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7133 return SDValue();
7134
7135 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7136 if (AllDivisorsAreOnes)
7137 return SDValue();
7138
7139 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7140 // since it can be best implemented as a bit test.
7141 if (AllDivisorsArePowerOfTwo)
7142 return SDValue();
7143
7144 SDValue PVal, AVal, KVal, QVal;
7145 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7146 if (HadOneDivisor) {
7147 // Try to turn PAmts into a splat, since we don't care about the values
7148 // that are currently '0'. If we can't, just keep '0'`s.
7150 // Try to turn AAmts into a splat, since we don't care about the
7151 // values that are currently '-1'. If we can't, change them to '0'`s.
7153 DAG.getConstant(0, DL, SVT));
7154 // Try to turn KAmts into a splat, since we don't care about the values
7155 // that are currently '-1'. If we can't, change them to '0'`s.
7157 DAG.getConstant(0, DL, ShSVT));
7158 }
7159
7160 PVal = DAG.getBuildVector(VT, DL, PAmts);
7161 AVal = DAG.getBuildVector(VT, DL, AAmts);
7162 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7163 QVal = DAG.getBuildVector(VT, DL, QAmts);
7164 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7165 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7166 QAmts.size() == 1 &&
7167 "Expected matchUnaryPredicate to return one element for scalable "
7168 "vectors");
7169 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7170 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7171 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7172 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7173 } else {
7174 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7175 PVal = PAmts[0];
7176 AVal = AAmts[0];
7177 KVal = KAmts[0];
7178 QVal = QAmts[0];
7179 }
7180
7181 // (mul N, P)
7182 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7183 Created.push_back(Op0.getNode());
7184
7185 if (NeedToApplyOffset) {
7186 // We need ADD to do this.
7187 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7188 return SDValue();
7189
7190 // (add (mul N, P), A)
7191 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7192 Created.push_back(Op0.getNode());
7193 }
7194
7195 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7196 // divisors as a performance improvement, since rotating by 0 is a no-op.
7197 if (HadEvenDivisor) {
7198 // We need ROTR to do this.
7199 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7200 return SDValue();
7201 // SREM: (rotr (add (mul N, P), A), K)
7202 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7203 Created.push_back(Op0.getNode());
7204 }
7205
7206 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7207 SDValue Fold =
7208 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7210
7211 // If we didn't have lanes with INT_MIN divisor, then we're done.
7212 if (!HadIntMinDivisor)
7213 return Fold;
7214
7215 // That fold is only valid for positive divisors. Which effectively means,
7216 // it is invalid for INT_MIN divisors. So if we have such a lane,
7217 // we must fix-up results for said lanes.
7218 assert(VT.isVector() && "Can/should only get here for vectors.");
7219
7220 // NOTE: we avoid letting illegal types through even if we're before legalize
7221 // ops – legalization has a hard time producing good code for the code that
7222 // follows.
7223 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7227 return SDValue();
7228
7229 Created.push_back(Fold.getNode());
7230
7231 SDValue IntMin = DAG.getConstant(
7233 SDValue IntMax = DAG.getConstant(
7235 SDValue Zero =
7237
7238 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7239 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7240 Created.push_back(DivisorIsIntMin.getNode());
7241
7242 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7243 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7244 Created.push_back(Masked.getNode());
7245 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7246 Created.push_back(MaskedIsZero.getNode());
7247
7248 // To produce final result we need to blend 2 vectors: 'SetCC' and
7249 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7250 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7251 // constant-folded, select can get lowered to a shuffle with constant mask.
7252 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7253 MaskedIsZero, Fold);
7254
7255 return Blended;
7256}
7257
7260 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7261 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7262 "be a constant integer");
7263 return true;
7264 }
7265
7266 return false;
7267}
7268
7270 const DenormalMode &Mode) const {
7271 SDLoc DL(Op);
7272 EVT VT = Op.getValueType();
7273 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7274 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7275
7276 // This is specifically a check for the handling of denormal inputs, not the
7277 // result.
7278 if (Mode.Input == DenormalMode::PreserveSign ||
7279 Mode.Input == DenormalMode::PositiveZero) {
7280 // Test = X == 0.0
7281 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7282 }
7283
7284 // Testing it with denormal inputs to avoid wrong estimate.
7285 //
7286 // Test = fabs(X) < SmallestNormal
7287 const fltSemantics &FltSem = VT.getFltSemantics();
7288 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7289 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7290 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7291 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7292}
7293
7295 bool LegalOps, bool OptForSize,
7297 unsigned Depth) const {
7298 // fneg is removable even if it has multiple uses.
7299 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7301 return Op.getOperand(0);
7302 }
7303
7304 // Don't recurse exponentially.
7306 return SDValue();
7307
7308 // Pre-increment recursion depth for use in recursive calls.
7309 ++Depth;
7310 const SDNodeFlags Flags = Op->getFlags();
7311 const TargetOptions &Options = DAG.getTarget().Options;
7312 EVT VT = Op.getValueType();
7313 unsigned Opcode = Op.getOpcode();
7314
7315 // Don't allow anything with multiple uses unless we know it is free.
7316 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7317 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7318 isFPExtFree(VT, Op.getOperand(0).getValueType());
7319 if (!IsFreeExtend)
7320 return SDValue();
7321 }
7322
7323 auto RemoveDeadNode = [&](SDValue N) {
7324 if (N && N.getNode()->use_empty())
7325 DAG.RemoveDeadNode(N.getNode());
7326 };
7327
7328 SDLoc DL(Op);
7329
7330 // Because getNegatedExpression can delete nodes we need a handle to keep
7331 // temporary nodes alive in case the recursion manages to create an identical
7332 // node.
7333 std::list<HandleSDNode> Handles;
7334
7335 switch (Opcode) {
7336 case ISD::ConstantFP: {
7337 // Don't invert constant FP values after legalization unless the target says
7338 // the negated constant is legal.
7339 bool IsOpLegal =
7341 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7342 OptForSize);
7343
7344 if (LegalOps && !IsOpLegal)
7345 break;
7346
7347 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7348 V.changeSign();
7349 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7350
7351 // If we already have the use of the negated floating constant, it is free
7352 // to negate it even it has multiple uses.
7353 if (!Op.hasOneUse() && CFP.use_empty())
7354 break;
7356 return CFP;
7357 }
7358 case ISD::BUILD_VECTOR: {
7359 // Only permit BUILD_VECTOR of constants.
7360 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7361 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7362 }))
7363 break;
7364
7365 bool IsOpLegal =
7368 llvm::all_of(Op->op_values(), [&](SDValue N) {
7369 return N.isUndef() ||
7370 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7371 OptForSize);
7372 });
7373
7374 if (LegalOps && !IsOpLegal)
7375 break;
7376
7378 for (SDValue C : Op->op_values()) {
7379 if (C.isUndef()) {
7380 Ops.push_back(C);
7381 continue;
7382 }
7383 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7384 V.changeSign();
7385 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7386 }
7388 return DAG.getBuildVector(VT, DL, Ops);
7389 }
7390 case ISD::FADD: {
7391 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7392 break;
7393
7394 // After operation legalization, it might not be legal to create new FSUBs.
7395 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7396 break;
7397 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7398
7399 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7401 SDValue NegX =
7402 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7403 // Prevent this node from being deleted by the next call.
7404 if (NegX)
7405 Handles.emplace_back(NegX);
7406
7407 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7409 SDValue NegY =
7410 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7411
7412 // We're done with the handles.
7413 Handles.clear();
7414
7415 // Negate the X if its cost is less or equal than Y.
7416 if (NegX && (CostX <= CostY)) {
7417 Cost = CostX;
7418 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7419 if (NegY != N)
7420 RemoveDeadNode(NegY);
7421 return N;
7422 }
7423
7424 // Negate the Y if it is not expensive.
7425 if (NegY) {
7426 Cost = CostY;
7427 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7428 if (NegX != N)
7429 RemoveDeadNode(NegX);
7430 return N;
7431 }
7432 break;
7433 }
7434 case ISD::FSUB: {
7435 // We can't turn -(A-B) into B-A when we honor signed zeros.
7436 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7437 break;
7438
7439 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7440 // fold (fneg (fsub 0, Y)) -> Y
7441 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7442 if (C->isZero()) {
7444 return Y;
7445 }
7446
7447 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7449 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7450 }
7451 case ISD::FMUL:
7452 case ISD::FDIV: {
7453 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7454
7455 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7457 SDValue NegX =
7458 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7459 // Prevent this node from being deleted by the next call.
7460 if (NegX)
7461 Handles.emplace_back(NegX);
7462
7463 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7465 SDValue NegY =
7466 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7467
7468 // We're done with the handles.
7469 Handles.clear();
7470
7471 // Negate the X if its cost is less or equal than Y.
7472 if (NegX && (CostX <= CostY)) {
7473 Cost = CostX;
7474 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7475 if (NegY != N)
7476 RemoveDeadNode(NegY);
7477 return N;
7478 }
7479
7480 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7481 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7482 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7483 break;
7484
7485 // Negate the Y if it is not expensive.
7486 if (NegY) {
7487 Cost = CostY;
7488 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7489 if (NegX != N)
7490 RemoveDeadNode(NegX);
7491 return N;
7492 }
7493 break;
7494 }
7495 case ISD::FMA:
7496 case ISD::FMAD: {
7497 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7498 break;
7499
7500 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7502 SDValue NegZ =
7503 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7504 // Give up if fail to negate the Z.
7505 if (!NegZ)
7506 break;
7507
7508 // Prevent this node from being deleted by the next two calls.
7509 Handles.emplace_back(NegZ);
7510
7511 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7513 SDValue NegX =
7514 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7515 // Prevent this node from being deleted by the next call.
7516 if (NegX)
7517 Handles.emplace_back(NegX);
7518
7519 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7521 SDValue NegY =
7522 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7523
7524 // We're done with the handles.
7525 Handles.clear();
7526
7527 // Negate the X if its cost is less or equal than Y.
7528 if (NegX && (CostX <= CostY)) {
7529 Cost = std::min(CostX, CostZ);
7530 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7531 if (NegY != N)
7532 RemoveDeadNode(NegY);
7533 return N;
7534 }
7535
7536 // Negate the Y if it is not expensive.
7537 if (NegY) {
7538 Cost = std::min(CostY, CostZ);
7539 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7540 if (NegX != N)
7541 RemoveDeadNode(NegX);
7542 return N;
7543 }
7544 break;
7545 }
7546
7547 case ISD::FP_EXTEND:
7548 case ISD::FSIN:
7549 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7550 OptForSize, Cost, Depth))
7551 return DAG.getNode(Opcode, DL, VT, NegV);
7552 break;
7553 case ISD::FP_ROUND:
7554 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7555 OptForSize, Cost, Depth))
7556 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7557 break;
7558 case ISD::SELECT:
7559 case ISD::VSELECT: {
7560 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7561 // iff at least one cost is cheaper and the other is neutral/cheaper
7562 SDValue LHS = Op.getOperand(1);
7564 SDValue NegLHS =
7565 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7566 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7567 RemoveDeadNode(NegLHS);
7568 break;
7569 }
7570
7571 // Prevent this node from being deleted by the next call.
7572 Handles.emplace_back(NegLHS);
7573
7574 SDValue RHS = Op.getOperand(2);
7576 SDValue NegRHS =
7577 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7578
7579 // We're done with the handles.
7580 Handles.clear();
7581
7582 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7583 (CostLHS != NegatibleCost::Cheaper &&
7584 CostRHS != NegatibleCost::Cheaper)) {
7585 RemoveDeadNode(NegLHS);
7586 RemoveDeadNode(NegRHS);
7587 break;
7588 }
7589
7590 Cost = std::min(CostLHS, CostRHS);
7591 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7592 }
7593 }
7594
7595 return SDValue();
7596}
7597
7598//===----------------------------------------------------------------------===//
7599// Legalization Utilities
7600//===----------------------------------------------------------------------===//
7601
7602bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7603 SDValue LHS, SDValue RHS,
7605 EVT HiLoVT, SelectionDAG &DAG,
7606 MulExpansionKind Kind, SDValue LL,
7607 SDValue LH, SDValue RL, SDValue RH) const {
7608 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7609 Opcode == ISD::SMUL_LOHI);
7610
7611 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7613 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7615 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7617 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7619
7620 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7621 return false;
7622
7623 unsigned OuterBitSize = VT.getScalarSizeInBits();
7624 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7625
7626 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7627 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7628 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7629
7630 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7631 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7632 bool Signed) -> bool {
7633 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7634 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7635 Hi = SDValue(Lo.getNode(), 1);
7636 return true;
7637 }
7638 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7639 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7640 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7641 return true;
7642 }
7643 return false;
7644 };
7645
7646 SDValue Lo, Hi;
7647
7648 if (!LL.getNode() && !RL.getNode() &&
7650 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7651 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7652 }
7653
7654 if (!LL.getNode())
7655 return false;
7656
7657 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7658 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7659 DAG.MaskedValueIsZero(RHS, HighMask)) {
7660 // The inputs are both zero-extended.
7661 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7662 Result.push_back(Lo);
7663 Result.push_back(Hi);
7664 if (Opcode != ISD::MUL) {
7665 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7666 Result.push_back(Zero);
7667 Result.push_back(Zero);
7668 }
7669 return true;
7670 }
7671 }
7672
7673 if (!VT.isVector() && Opcode == ISD::MUL &&
7674 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7675 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7676 // The input values are both sign-extended.
7677 // TODO non-MUL case?
7678 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7679 Result.push_back(Lo);
7680 Result.push_back(Hi);
7681 return true;
7682 }
7683 }
7684
7685 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7686 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7687
7688 if (!LH.getNode() && !RH.getNode() &&
7691 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7692 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7693 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7694 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7695 }
7696
7697 if (!LH.getNode())
7698 return false;
7699
7700 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7701 return false;
7702
7703 Result.push_back(Lo);
7704
7705 if (Opcode == ISD::MUL) {
7706 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7707 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7708 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7709 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7710 Result.push_back(Hi);
7711 return true;
7712 }
7713
7714 // Compute the full width result.
7715 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7716 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7717 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7718 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7719 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7720 };
7721
7722 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7723 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7724 return false;
7725
7726 // This is effectively the add part of a multiply-add of half-sized operands,
7727 // so it cannot overflow.
7728 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7729
7730 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7731 return false;
7732
7733 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7734 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7735
7736 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7738 if (UseGlue)
7739 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7740 Merge(Lo, Hi));
7741 else
7742 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7743 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7744
7745 SDValue Carry = Next.getValue(1);
7746 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7747 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7748
7749 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7750 return false;
7751
7752 if (UseGlue)
7753 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7754 Carry);
7755 else
7756 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7757 Zero, Carry);
7758
7759 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7760
7761 if (Opcode == ISD::SMUL_LOHI) {
7762 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7763 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7764 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7765
7766 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7767 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7768 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7769 }
7770
7771 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7772 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7773 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7774 return true;
7775}
7776
7778 SelectionDAG &DAG, MulExpansionKind Kind,
7779 SDValue LL, SDValue LH, SDValue RL,
7780 SDValue RH) const {
7782 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7783 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7784 DAG, Kind, LL, LH, RL, RH);
7785 if (Ok) {
7786 assert(Result.size() == 2);
7787 Lo = Result[0];
7788 Hi = Result[1];
7789 }
7790 return Ok;
7791}
7792
7793// Optimize unsigned division or remainder by constants for types twice as large
7794// as a legal VT.
7795//
7796// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7797// can be computed
7798// as:
7799// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7800// Remainder = Sum % Constant
7801// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7802//
7803// For division, we can compute the remainder using the algorithm described
7804// above, subtract it from the dividend to get an exact multiple of Constant.
7805// Then multiply that exact multiply by the multiplicative inverse modulo
7806// (1 << (BitWidth / 2)) to get the quotient.
7807
7808// If Constant is even, we can shift right the dividend and the divisor by the
7809// number of trailing zeros in Constant before applying the remainder algorithm.
7810// If we're after the quotient, we can subtract this value from the shifted
7811// dividend and multiply by the multiplicative inverse of the shifted divisor.
7812// If we want the remainder, we shift the value left by the number of trailing
7813// zeros and add the bits that were shifted out of the dividend.
7816 EVT HiLoVT, SelectionDAG &DAG,
7817 SDValue LL, SDValue LH) const {
7818 unsigned Opcode = N->getOpcode();
7819 EVT VT = N->getValueType(0);
7820
7821 // TODO: Support signed division/remainder.
7822 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7823 return false;
7824 assert(
7825 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7826 "Unexpected opcode");
7827
7828 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7829 if (!CN)
7830 return false;
7831
7832 APInt Divisor = CN->getAPIntValue();
7833 unsigned BitWidth = Divisor.getBitWidth();
7834 unsigned HBitWidth = BitWidth / 2;
7836 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7837
7838 // Divisor needs to less than (1 << HBitWidth).
7839 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7840 if (Divisor.uge(HalfMaxPlus1))
7841 return false;
7842
7843 // We depend on the UREM by constant optimization in DAGCombiner that requires
7844 // high multiply.
7845 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7847 return false;
7848
7849 // Don't expand if optimizing for size.
7850 if (DAG.shouldOptForSize())
7851 return false;
7852
7853 // Early out for 0 or 1 divisors.
7854 if (Divisor.ule(1))
7855 return false;
7856
7857 // If the divisor is even, shift it until it becomes odd.
7858 unsigned TrailingZeros = 0;
7859 if (!Divisor[0]) {
7860 TrailingZeros = Divisor.countr_zero();
7861 Divisor.lshrInPlace(TrailingZeros);
7862 }
7863
7864 SDLoc dl(N);
7865 SDValue Sum;
7866 SDValue PartialRem;
7867
7868 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7869 // then add in the carry.
7870 // TODO: If we can't split it in half, we might be able to split into 3 or
7871 // more pieces using a smaller bit width.
7872 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7873 assert(!LL == !LH && "Expected both input halves or no input halves!");
7874 if (!LL)
7875 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7876
7877 // Shift the input by the number of TrailingZeros in the divisor. The
7878 // shifted out bits will be added to the remainder later.
7879 if (TrailingZeros) {
7880 // Save the shifted off bits if we need the remainder.
7881 if (Opcode != ISD::UDIV) {
7882 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7883 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7884 DAG.getConstant(Mask, dl, HiLoVT));
7885 }
7886
7887 LL = DAG.getNode(
7888 ISD::OR, dl, HiLoVT,
7889 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7890 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7891 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7892 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7893 HiLoVT, dl)));
7894 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7895 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7896 }
7897
7898 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7899 EVT SetCCType =
7900 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7902 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7903 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7904 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7905 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7906 } else {
7907 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7908 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7909 // If the boolean for the target is 0 or 1, we can add the setcc result
7910 // directly.
7911 if (getBooleanContents(HiLoVT) ==
7913 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7914 else
7915 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7916 DAG.getConstant(0, dl, HiLoVT));
7917 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7918 }
7919 }
7920
7921 // If we didn't find a sum, we can't do the expansion.
7922 if (!Sum)
7923 return false;
7924
7925 // Perform a HiLoVT urem on the Sum using truncated divisor.
7926 SDValue RemL =
7927 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7928 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7929 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7930
7931 if (Opcode != ISD::UREM) {
7932 // Subtract the remainder from the shifted dividend.
7933 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7934 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7935
7936 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7937
7938 // Multiply by the multiplicative inverse of the divisor modulo
7939 // (1 << BitWidth).
7940 APInt MulFactor = Divisor.multiplicativeInverse();
7941
7942 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7943 DAG.getConstant(MulFactor, dl, VT));
7944
7945 // Split the quotient into low and high parts.
7946 SDValue QuotL, QuotH;
7947 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7948 Result.push_back(QuotL);
7949 Result.push_back(QuotH);
7950 }
7951
7952 if (Opcode != ISD::UDIV) {
7953 // If we shifted the input, shift the remainder left and add the bits we
7954 // shifted off the input.
7955 if (TrailingZeros) {
7956 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7957 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7958 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7959 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7960 }
7961 Result.push_back(RemL);
7962 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7963 }
7964
7965 return true;
7966}
7967
7968// Check that (every element of) Z is undef or not an exact multiple of BW.
7969static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7971 Z,
7972 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7973 true);
7974}
7975
7977 EVT VT = Node->getValueType(0);
7978 SDValue ShX, ShY;
7979 SDValue ShAmt, InvShAmt;
7980 SDValue X = Node->getOperand(0);
7981 SDValue Y = Node->getOperand(1);
7982 SDValue Z = Node->getOperand(2);
7983 SDValue Mask = Node->getOperand(3);
7984 SDValue VL = Node->getOperand(4);
7985
7986 unsigned BW = VT.getScalarSizeInBits();
7987 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7988 SDLoc DL(SDValue(Node, 0));
7989
7990 EVT ShVT = Z.getValueType();
7991 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7992 // fshl: X << C | Y >> (BW - C)
7993 // fshr: X << (BW - C) | Y >> C
7994 // where C = Z % BW is not zero
7995 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7996 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7997 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7998 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7999 VL);
8000 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8001 VL);
8002 } else {
8003 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8004 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8005 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8006 if (isPowerOf2_32(BW)) {
8007 // Z % BW -> Z & (BW - 1)
8008 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8009 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8010 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8011 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8012 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8013 } else {
8014 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8015 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8016 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8017 }
8018
8019 SDValue One = DAG.getConstant(1, DL, ShVT);
8020 if (IsFSHL) {
8021 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8022 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8023 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8024 } else {
8025 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8026 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8027 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8028 }
8029 }
8030 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8031}
8032
8034 SelectionDAG &DAG) const {
8035 if (Node->isVPOpcode())
8036 return expandVPFunnelShift(Node, DAG);
8037
8038 EVT VT = Node->getValueType(0);
8039
8040 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8044 return SDValue();
8045
8046 SDValue X = Node->getOperand(0);
8047 SDValue Y = Node->getOperand(1);
8048 SDValue Z = Node->getOperand(2);
8049
8050 unsigned BW = VT.getScalarSizeInBits();
8051 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8052 SDLoc DL(SDValue(Node, 0));
8053
8054 EVT ShVT = Z.getValueType();
8055
8056 // If a funnel shift in the other direction is more supported, use it.
8057 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8058 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8059 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8060 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8061 // fshl X, Y, Z -> fshr X, Y, -Z
8062 // fshr X, Y, Z -> fshl X, Y, -Z
8063 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8064 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8065 } else {
8066 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8067 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8068 SDValue One = DAG.getConstant(1, DL, ShVT);
8069 if (IsFSHL) {
8070 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8071 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8072 } else {
8073 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8074 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8075 }
8076 Z = DAG.getNOT(DL, Z, ShVT);
8077 }
8078 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8079 }
8080
8081 SDValue ShX, ShY;
8082 SDValue ShAmt, InvShAmt;
8083 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8084 // fshl: X << C | Y >> (BW - C)
8085 // fshr: X << (BW - C) | Y >> C
8086 // where C = Z % BW is not zero
8087 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8088 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8089 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8090 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8091 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8092 } else {
8093 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8094 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8095 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8096 if (isPowerOf2_32(BW)) {
8097 // Z % BW -> Z & (BW - 1)
8098 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8099 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8100 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8101 } else {
8102 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8103 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8104 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8105 }
8106
8107 SDValue One = DAG.getConstant(1, DL, ShVT);
8108 if (IsFSHL) {
8109 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8110 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8111 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8112 } else {
8113 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8114 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8115 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8116 }
8117 }
8118 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8119}
8120
8121// TODO: Merge with expandFunnelShift.
8122SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8123 SelectionDAG &DAG) const {
8124 EVT VT = Node->getValueType(0);
8125 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8126 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8127 SDValue Op0 = Node->getOperand(0);
8128 SDValue Op1 = Node->getOperand(1);
8129 SDLoc DL(SDValue(Node, 0));
8130
8131 EVT ShVT = Op1.getValueType();
8132 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8133
8134 // If a rotate in the other direction is more supported, use it.
8135 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8136 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8137 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8138 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8139 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8140 }
8141
8142 if (!AllowVectorOps && VT.isVector() &&
8148 return SDValue();
8149
8150 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8151 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8152 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8153 SDValue ShVal;
8154 SDValue HsVal;
8155 if (isPowerOf2_32(EltSizeInBits)) {
8156 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8157 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8158 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8159 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8160 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8161 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8162 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8163 } else {
8164 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8165 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8166 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8167 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8168 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8169 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8170 SDValue One = DAG.getConstant(1, DL, ShVT);
8171 HsVal =
8172 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8173 }
8174 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8175}
8176
8178 SelectionDAG &DAG) const {
8179 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8180 EVT VT = Node->getValueType(0);
8181 unsigned VTBits = VT.getScalarSizeInBits();
8182 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8183
8184 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8185 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8186 SDValue ShOpLo = Node->getOperand(0);
8187 SDValue ShOpHi = Node->getOperand(1);
8188 SDValue ShAmt = Node->getOperand(2);
8189 EVT ShAmtVT = ShAmt.getValueType();
8190 EVT ShAmtCCVT =
8191 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8192 SDLoc dl(Node);
8193
8194 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8195 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8196 // away during isel.
8197 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8198 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8199 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8200 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8201 : DAG.getConstant(0, dl, VT);
8202
8203 SDValue Tmp2, Tmp3;
8204 if (IsSHL) {
8205 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8206 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8207 } else {
8208 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8209 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8210 }
8211
8212 // If the shift amount is larger or equal than the width of a part we don't
8213 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8214 // values for large shift amounts.
8215 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8216 DAG.getConstant(VTBits, dl, ShAmtVT));
8217 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8218 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8219
8220 if (IsSHL) {
8221 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8222 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8223 } else {
8224 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8225 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8226 }
8227}
8228
8230 SelectionDAG &DAG) const {
8231 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8232 SDValue Src = Node->getOperand(OpNo);
8233 EVT SrcVT = Src.getValueType();
8234 EVT DstVT = Node->getValueType(0);
8235 SDLoc dl(SDValue(Node, 0));
8236
8237 // FIXME: Only f32 to i64 conversions are supported.
8238 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8239 return false;
8240
8241 if (Node->isStrictFPOpcode())
8242 // When a NaN is converted to an integer a trap is allowed. We can't
8243 // use this expansion here because it would eliminate that trap. Other
8244 // traps are also allowed and cannot be eliminated. See
8245 // IEEE 754-2008 sec 5.8.
8246 return false;
8247
8248 // Expand f32 -> i64 conversion
8249 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8250 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8251 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8252 EVT IntVT = SrcVT.changeTypeToInteger();
8253 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8254
8255 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8256 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8257 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8258 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8259 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8260 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8261
8262 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8263
8264 SDValue ExponentBits = DAG.getNode(
8265 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8266 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8267 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8268
8269 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8270 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8271 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8272 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8273
8274 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8275 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8276 DAG.getConstant(0x00800000, dl, IntVT));
8277
8278 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8279
8280 R = DAG.getSelectCC(
8281 dl, Exponent, ExponentLoBit,
8282 DAG.getNode(ISD::SHL, dl, DstVT, R,
8283 DAG.getZExtOrTrunc(
8284 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8285 dl, IntShVT)),
8286 DAG.getNode(ISD::SRL, dl, DstVT, R,
8287 DAG.getZExtOrTrunc(
8288 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8289 dl, IntShVT)),
8290 ISD::SETGT);
8291
8292 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8293 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8294
8295 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8296 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8297 return true;
8298}
8299
8301 SDValue &Chain,
8302 SelectionDAG &DAG) const {
8303 SDLoc dl(SDValue(Node, 0));
8304 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8305 SDValue Src = Node->getOperand(OpNo);
8306
8307 EVT SrcVT = Src.getValueType();
8308 EVT DstVT = Node->getValueType(0);
8309 EVT SetCCVT =
8310 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8311 EVT DstSetCCVT =
8312 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8313
8314 // Only expand vector types if we have the appropriate vector bit operations.
8315 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8317 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8319 return false;
8320
8321 // If the maximum float value is smaller then the signed integer range,
8322 // the destination signmask can't be represented by the float, so we can
8323 // just use FP_TO_SINT directly.
8324 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8325 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8326 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8328 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8329 if (Node->isStrictFPOpcode()) {
8330 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8331 { Node->getOperand(0), Src });
8332 Chain = Result.getValue(1);
8333 } else
8334 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8335 return true;
8336 }
8337
8338 // Don't expand it if there isn't cheap fsub instruction.
8340 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8341 return false;
8342
8343 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8344 SDValue Sel;
8345
8346 if (Node->isStrictFPOpcode()) {
8347 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8348 Node->getOperand(0), /*IsSignaling*/ true);
8349 Chain = Sel.getValue(1);
8350 } else {
8351 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8352 }
8353
8354 bool Strict = Node->isStrictFPOpcode() ||
8355 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8356
8357 if (Strict) {
8358 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8359 // signmask then offset (the result of which should be fully representable).
8360 // Sel = Src < 0x8000000000000000
8361 // FltOfs = select Sel, 0, 0x8000000000000000
8362 // IntOfs = select Sel, 0, 0x8000000000000000
8363 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8364
8365 // TODO: Should any fast-math-flags be set for the FSUB?
8366 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8367 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8368 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8369 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8370 DAG.getConstant(0, dl, DstVT),
8371 DAG.getConstant(SignMask, dl, DstVT));
8372 SDValue SInt;
8373 if (Node->isStrictFPOpcode()) {
8374 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8375 { Chain, Src, FltOfs });
8376 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8377 { Val.getValue(1), Val });
8378 Chain = SInt.getValue(1);
8379 } else {
8380 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8381 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8382 }
8383 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8384 } else {
8385 // Expand based on maximum range of FP_TO_SINT:
8386 // True = fp_to_sint(Src)
8387 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8388 // Result = select (Src < 0x8000000000000000), True, False
8389
8390 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8391 // TODO: Should any fast-math-flags be set for the FSUB?
8392 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8393 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8394 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8395 DAG.getConstant(SignMask, dl, DstVT));
8396 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8397 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8398 }
8399 return true;
8400}
8401
8403 SDValue &Chain, SelectionDAG &DAG) const {
8404 // This transform is not correct for converting 0 when rounding mode is set
8405 // to round toward negative infinity which will produce -0.0. So disable
8406 // under strictfp.
8407 if (Node->isStrictFPOpcode())
8408 return false;
8409
8410 SDValue Src = Node->getOperand(0);
8411 EVT SrcVT = Src.getValueType();
8412 EVT DstVT = Node->getValueType(0);
8413
8414 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8415 // it.
8416 if (Node->getFlags().hasNonNeg() &&
8418 Result =
8419 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8420 return true;
8421 }
8422
8423 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8424 return false;
8425
8426 // Only expand vector types if we have the appropriate vector bit
8427 // operations.
8428 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8433 return false;
8434
8435 SDLoc dl(SDValue(Node, 0));
8436
8437 // Implementation of unsigned i64 to f64 following the algorithm in
8438 // __floatundidf in compiler_rt. This implementation performs rounding
8439 // correctly in all rounding modes with the exception of converting 0
8440 // when rounding toward negative infinity. In that case the fsub will
8441 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8442 // incorrect.
8443 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8444 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8445 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8446 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8447 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8448 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8449
8450 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8451 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8452 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8453 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8454 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8455 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8456 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8457 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8458 return true;
8459}
8460
8461SDValue
8463 SelectionDAG &DAG) const {
8464 unsigned Opcode = Node->getOpcode();
8465 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8466 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8467 "Wrong opcode");
8468
8469 if (Node->getFlags().hasNoNaNs()) {
8470 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8471 EVT VT = Node->getValueType(0);
8472 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8474 VT.isVector())
8475 return SDValue();
8476 SDValue Op1 = Node->getOperand(0);
8477 SDValue Op2 = Node->getOperand(1);
8478 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8479 // Copy FMF flags, but always set the no-signed-zeros flag
8480 // as this is implied by the FMINNUM/FMAXNUM semantics.
8481 SelCC->setFlags(Node->getFlags() | SDNodeFlags::NoSignedZeros);
8482 return SelCC;
8483 }
8484
8485 return SDValue();
8486}
8487
8489 SelectionDAG &DAG) const {
8490 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8491 return Expanded;
8492
8493 EVT VT = Node->getValueType(0);
8494 if (VT.isScalableVector())
8496 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8497
8498 SDLoc dl(Node);
8499 unsigned NewOp =
8500 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8501
8502 if (isOperationLegalOrCustom(NewOp, VT)) {
8503 SDValue Quiet0 = Node->getOperand(0);
8504 SDValue Quiet1 = Node->getOperand(1);
8505
8506 if (!Node->getFlags().hasNoNaNs()) {
8507 // Insert canonicalizes if it's possible we need to quiet to get correct
8508 // sNaN behavior.
8509 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8510 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8511 Node->getFlags());
8512 }
8513 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8514 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8515 Node->getFlags());
8516 }
8517 }
8518
8519 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8520 }
8521
8522 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8523 // instead if there are no NaNs and there can't be an incompatible zero
8524 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8525 if ((Node->getFlags().hasNoNaNs() ||
8526 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8527 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8528 (Node->getFlags().hasNoSignedZeros() ||
8529 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8530 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8531 unsigned IEEE2018Op =
8532 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8533 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8534 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8535 Node->getOperand(1), Node->getFlags());
8536 }
8537
8538 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8539 return SelCC;
8540
8541 return SDValue();
8542}
8543
8545 SelectionDAG &DAG) const {
8546 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8547 return Expanded;
8548
8549 SDLoc DL(N);
8550 SDValue LHS = N->getOperand(0);
8551 SDValue RHS = N->getOperand(1);
8552 unsigned Opc = N->getOpcode();
8553 EVT VT = N->getValueType(0);
8554 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8555 bool IsMax = Opc == ISD::FMAXIMUM;
8556 SDNodeFlags Flags = N->getFlags();
8557
8558 // First, implement comparison not propagating NaN. If no native fmin or fmax
8559 // available, use plain select with setcc instead.
8561 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8562 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8563
8564 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8565 // signed zero behavior.
8566 bool MinMaxMustRespectOrderedZero = false;
8567
8568 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8569 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8570 MinMaxMustRespectOrderedZero = true;
8571 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8572 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8573 } else {
8575 return DAG.UnrollVectorOp(N);
8576
8577 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8578 SDValue Compare =
8579 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8580 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8581 }
8582
8583 // Propagate any NaN of both operands
8584 if (!N->getFlags().hasNoNaNs() &&
8585 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8586 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8588 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8589 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8590 }
8591
8592 // fminimum/fmaximum requires -0.0 less than +0.0
8593 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8595 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8596 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8597 SDValue TestZero =
8598 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8599 SDValue LCmp = DAG.getSelect(
8600 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8601 MinMax, Flags);
8602 SDValue RCmp = DAG.getSelect(
8603 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8604 LCmp, Flags);
8605 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8606 }
8607
8608 return MinMax;
8609}
8610
8612 SelectionDAG &DAG) const {
8613 SDLoc DL(Node);
8614 SDValue LHS = Node->getOperand(0);
8615 SDValue RHS = Node->getOperand(1);
8616 unsigned Opc = Node->getOpcode();
8617 EVT VT = Node->getValueType(0);
8618 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8619 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8620 const TargetOptions &Options = DAG.getTarget().Options;
8621 SDNodeFlags Flags = Node->getFlags();
8622
8623 unsigned NewOp =
8625
8626 if (isOperationLegalOrCustom(NewOp, VT)) {
8627 if (!Flags.hasNoNaNs()) {
8628 // Insert canonicalizes if it's possible we need to quiet to get correct
8629 // sNaN behavior.
8630 if (!DAG.isKnownNeverSNaN(LHS)) {
8631 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8632 }
8633 if (!DAG.isKnownNeverSNaN(RHS)) {
8634 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8635 }
8636 }
8637
8638 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8639 }
8640
8641 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8642 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8643 if (Flags.hasNoNaNs() ||
8644 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8645 unsigned IEEE2019Op =
8647 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8648 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8649 }
8650
8651 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8652 // either one for +0.0 vs -0.0.
8653 if ((Flags.hasNoNaNs() ||
8654 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8655 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8656 DAG.isKnownNeverZeroFloat(RHS))) {
8657 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8658 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8659 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8660 }
8661
8663 return DAG.UnrollVectorOp(Node);
8664
8665 // If only one operand is NaN, override it with another operand.
8666 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8667 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8668 }
8669 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8670 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8671 }
8672
8673 SDValue MinMax =
8674 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8675 // If MinMax is NaN, let's quiet it.
8676 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8677 !DAG.isKnownNeverNaN(RHS)) {
8678 MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8679 }
8680
8681 // Fixup signed zero behavior.
8682 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8684 return MinMax;
8685 }
8686 SDValue TestZero =
8687 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8688 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8689 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8690 SDValue LCmp = DAG.getSelect(
8691 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8692 MinMax, Flags);
8693 SDValue RCmp = DAG.getSelect(
8694 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8695 Flags);
8696 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8697}
8698
8699/// Returns a true value if if this FPClassTest can be performed with an ordered
8700/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8701/// std::nullopt if it cannot be performed as a compare with 0.
8702static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8703 const fltSemantics &Semantics,
8704 const MachineFunction &MF) {
8705 FPClassTest OrderedMask = Test & ~fcNan;
8706 FPClassTest NanTest = Test & fcNan;
8707 bool IsOrdered = NanTest == fcNone;
8708 bool IsUnordered = NanTest == fcNan;
8709
8710 // Skip cases that are testing for only a qnan or snan.
8711 if (!IsOrdered && !IsUnordered)
8712 return std::nullopt;
8713
8714 if (OrderedMask == fcZero &&
8715 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8716 return IsOrdered;
8717 if (OrderedMask == (fcZero | fcSubnormal) &&
8718 MF.getDenormalMode(Semantics).inputsAreZero())
8719 return IsOrdered;
8720 return std::nullopt;
8721}
8722
8724 const FPClassTest OrigTestMask,
8725 SDNodeFlags Flags, const SDLoc &DL,
8726 SelectionDAG &DAG) const {
8727 EVT OperandVT = Op.getValueType();
8728 assert(OperandVT.isFloatingPoint());
8729 FPClassTest Test = OrigTestMask;
8730
8731 // Degenerated cases.
8732 if (Test == fcNone)
8733 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8734 if (Test == fcAllFlags)
8735 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8736
8737 // PPC double double is a pair of doubles, of which the higher part determines
8738 // the value class.
8739 if (OperandVT == MVT::ppcf128) {
8740 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8741 DAG.getConstant(1, DL, MVT::i32));
8742 OperandVT = MVT::f64;
8743 }
8744
8745 // Floating-point type properties.
8746 EVT ScalarFloatVT = OperandVT.getScalarType();
8747 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8748 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8749 bool IsF80 = (ScalarFloatVT == MVT::f80);
8750
8751 // Some checks can be implemented using float comparisons, if floating point
8752 // exceptions are ignored.
8753 if (Flags.hasNoFPExcept() &&
8755 FPClassTest FPTestMask = Test;
8756 bool IsInvertedFP = false;
8757
8758 if (FPClassTest InvertedFPCheck =
8759 invertFPClassTestIfSimpler(FPTestMask, true)) {
8760 FPTestMask = InvertedFPCheck;
8761 IsInvertedFP = true;
8762 }
8763
8764 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8765 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8766
8767 // See if we can fold an | fcNan into an unordered compare.
8768 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8769
8770 // Can't fold the ordered check if we're only testing for snan or qnan
8771 // individually.
8772 if ((FPTestMask & fcNan) != fcNan)
8773 OrderedFPTestMask = FPTestMask;
8774
8775 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8776
8777 if (std::optional<bool> IsCmp0 =
8778 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8779 IsCmp0 && (isCondCodeLegalOrCustom(
8780 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8781 OperandVT.getScalarType().getSimpleVT()))) {
8782
8783 // If denormals could be implicitly treated as 0, this is not equivalent
8784 // to a compare with 0 since it will also be true for denormals.
8785 return DAG.getSetCC(DL, ResultVT, Op,
8786 DAG.getConstantFP(0.0, DL, OperandVT),
8787 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8788 }
8789
8790 if (FPTestMask == fcNan &&
8792 OperandVT.getScalarType().getSimpleVT()))
8793 return DAG.getSetCC(DL, ResultVT, Op, Op,
8794 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8795
8796 bool IsOrderedInf = FPTestMask == fcInf;
8797 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8798 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8799 : UnorderedCmpOpcode,
8800 OperandVT.getScalarType().getSimpleVT()) &&
8803 (OperandVT.isVector() &&
8805 // isinf(x) --> fabs(x) == inf
8806 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8807 SDValue Inf =
8808 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8809 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8810 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8811 }
8812
8813 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8814 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8815 : UnorderedCmpOpcode,
8816 OperandVT.getSimpleVT())) {
8817 // isposinf(x) --> x == inf
8818 // isneginf(x) --> x == -inf
8819 // isposinf(x) || nan --> x u== inf
8820 // isneginf(x) || nan --> x u== -inf
8821
8822 SDValue Inf = DAG.getConstantFP(
8823 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8824 OperandVT);
8825 return DAG.getSetCC(DL, ResultVT, Op, Inf,
8826 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8827 }
8828
8829 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8830 // TODO: Could handle ordered case, but it produces worse code for
8831 // x86. Maybe handle ordered if fabs is free?
8832
8833 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8834 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8835
8836 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8837 OperandVT.getScalarType().getSimpleVT())) {
8838 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8839
8840 // TODO: Maybe only makes sense if fabs is free. Integer test of
8841 // exponent bits seems better for x86.
8842 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8843 SDValue SmallestNormal = DAG.getConstantFP(
8844 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8845 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8846 IsOrdered ? OrderedOp : UnorderedOp);
8847 }
8848 }
8849
8850 if (FPTestMask == fcNormal) {
8851 // TODO: Handle unordered
8852 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8853 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8854
8855 if (isCondCodeLegalOrCustom(IsFiniteOp,
8856 OperandVT.getScalarType().getSimpleVT()) &&
8857 isCondCodeLegalOrCustom(IsNormalOp,
8858 OperandVT.getScalarType().getSimpleVT()) &&
8859 isFAbsFree(OperandVT)) {
8860 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8861 SDValue Inf =
8862 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8863 SDValue SmallestNormal = DAG.getConstantFP(
8864 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8865
8866 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8867 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8868 SDValue IsNormal =
8869 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8870 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8871 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8872 }
8873 }
8874 }
8875
8876 // Some checks may be represented as inversion of simpler check, for example
8877 // "inf|normal|subnormal|zero" => !"nan".
8878 bool IsInverted = false;
8879
8880 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8881 Test = InvertedCheck;
8882 IsInverted = true;
8883 }
8884
8885 // In the general case use integer operations.
8886 unsigned BitSize = OperandVT.getScalarSizeInBits();
8887 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8888 if (OperandVT.isVector())
8889 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8890 OperandVT.getVectorElementCount());
8891 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8892
8893 // Various masks.
8894 APInt SignBit = APInt::getSignMask(BitSize);
8895 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8896 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8897 const unsigned ExplicitIntBitInF80 = 63;
8898 APInt ExpMask = Inf;
8899 if (IsF80)
8900 ExpMask.clearBit(ExplicitIntBitInF80);
8901 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8902 APInt QNaNBitMask =
8903 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8904 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8905
8906 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8907 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8908 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8909 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8910 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8911 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8912
8913 SDValue Res;
8914 const auto appendResult = [&](SDValue PartialRes) {
8915 if (PartialRes) {
8916 if (Res)
8917 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8918 else
8919 Res = PartialRes;
8920 }
8921 };
8922
8923 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8924 const auto getIntBitIsSet = [&]() -> SDValue {
8925 if (!IntBitIsSetV) {
8926 APInt IntBitMask(BitSize, 0);
8927 IntBitMask.setBit(ExplicitIntBitInF80);
8928 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8929 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8930 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8931 }
8932 return IntBitIsSetV;
8933 };
8934
8935 // Split the value into sign bit and absolute value.
8936 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8937 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8938 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8939
8940 // Tests that involve more than one class should be processed first.
8941 SDValue PartialRes;
8942
8943 if (IsF80)
8944 ; // Detect finite numbers of f80 by checking individual classes because
8945 // they have different settings of the explicit integer bit.
8946 else if ((Test & fcFinite) == fcFinite) {
8947 // finite(V) ==> abs(V) < exp_mask
8948 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8949 Test &= ~fcFinite;
8950 } else if ((Test & fcFinite) == fcPosFinite) {
8951 // finite(V) && V > 0 ==> V < exp_mask
8952 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8953 Test &= ~fcPosFinite;
8954 } else if ((Test & fcFinite) == fcNegFinite) {
8955 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8956 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8957 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8958 Test &= ~fcNegFinite;
8959 }
8960 appendResult(PartialRes);
8961
8962 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8963 // fcZero | fcSubnormal => test all exponent bits are 0
8964 // TODO: Handle sign bit specific cases
8965 if (PartialCheck == (fcZero | fcSubnormal)) {
8966 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8967 SDValue ExpIsZero =
8968 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8969 appendResult(ExpIsZero);
8970 Test &= ~PartialCheck & fcAllFlags;
8971 }
8972 }
8973
8974 // Check for individual classes.
8975
8976 if (unsigned PartialCheck = Test & fcZero) {
8977 if (PartialCheck == fcPosZero)
8978 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8979 else if (PartialCheck == fcZero)
8980 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8981 else // ISD::fcNegZero
8982 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8983 appendResult(PartialRes);
8984 }
8985
8986 if (unsigned PartialCheck = Test & fcSubnormal) {
8987 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8988 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8989 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8990 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8991 SDValue VMinusOneV =
8992 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8993 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8994 if (PartialCheck == fcNegSubnormal)
8995 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8996 appendResult(PartialRes);
8997 }
8998
8999 if (unsigned PartialCheck = Test & fcInf) {
9000 if (PartialCheck == fcPosInf)
9001 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9002 else if (PartialCheck == fcInf)
9003 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9004 else { // ISD::fcNegInf
9005 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9006 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9007 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9008 }
9009 appendResult(PartialRes);
9010 }
9011
9012 if (unsigned PartialCheck = Test & fcNan) {
9013 APInt InfWithQnanBit = Inf | QNaNBitMask;
9014 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9015 if (PartialCheck == fcNan) {
9016 // isnan(V) ==> abs(V) > int(inf)
9017 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9018 if (IsF80) {
9019 // Recognize unsupported values as NaNs for compatibility with glibc.
9020 // In them (exp(V)==0) == int_bit.
9021 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9022 SDValue ExpIsZero =
9023 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9024 SDValue IsPseudo =
9025 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9026 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9027 }
9028 } else if (PartialCheck == fcQNan) {
9029 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9030 PartialRes =
9031 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9032 } else { // ISD::fcSNan
9033 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9034 // abs(V) < (unsigned(Inf) | quiet_bit)
9035 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9036 SDValue IsNotQnan =
9037 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9038 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9039 }
9040 appendResult(PartialRes);
9041 }
9042
9043 if (unsigned PartialCheck = Test & fcNormal) {
9044 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9045 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9046 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9047 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9048 APInt ExpLimit = ExpMask - ExpLSB;
9049 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9050 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9051 if (PartialCheck == fcNegNormal)
9052 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9053 else if (PartialCheck == fcPosNormal) {
9054 SDValue PosSignV =
9055 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
9056 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9057 }
9058 if (IsF80)
9059 PartialRes =
9060 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9061 appendResult(PartialRes);
9062 }
9063
9064 if (!Res)
9065 return DAG.getConstant(IsInverted, DL, ResultVT);
9066 if (IsInverted)
9067 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9068 return Res;
9069}
9070
9071// Only expand vector types if we have the appropriate vector bit operations.
9072static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9073 assert(VT.isVector() && "Expected vector type");
9074 unsigned Len = VT.getScalarSizeInBits();
9075 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9078 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9080}
9081
9083 SDLoc dl(Node);
9084 EVT VT = Node->getValueType(0);
9085 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9086 SDValue Op = Node->getOperand(0);
9087 unsigned Len = VT.getScalarSizeInBits();
9088 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9089
9090 // TODO: Add support for irregular type lengths.
9091 if (!(Len <= 128 && Len % 8 == 0))
9092 return SDValue();
9093
9094 // Only expand vector types if we have the appropriate vector bit operations.
9095 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9096 return SDValue();
9097
9098 // This is the "best" algorithm from
9099 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9100 SDValue Mask55 =
9101 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9102 SDValue Mask33 =
9103 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9104 SDValue Mask0F =
9105 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9106
9107 // v = v - ((v >> 1) & 0x55555555...)
9108 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9109 DAG.getNode(ISD::AND, dl, VT,
9110 DAG.getNode(ISD::SRL, dl, VT, Op,
9111 DAG.getConstant(1, dl, ShVT)),
9112 Mask55));
9113 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9114 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9115 DAG.getNode(ISD::AND, dl, VT,
9116 DAG.getNode(ISD::SRL, dl, VT, Op,
9117 DAG.getConstant(2, dl, ShVT)),
9118 Mask33));
9119 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9120 Op = DAG.getNode(ISD::AND, dl, VT,
9121 DAG.getNode(ISD::ADD, dl, VT, Op,
9122 DAG.getNode(ISD::SRL, dl, VT, Op,
9123 DAG.getConstant(4, dl, ShVT))),
9124 Mask0F);
9125
9126 if (Len <= 8)
9127 return Op;
9128
9129 // Avoid the multiply if we only have 2 bytes to add.
9130 // TODO: Only doing this for scalars because vectors weren't as obviously
9131 // improved.
9132 if (Len == 16 && !VT.isVector()) {
9133 // v = (v + (v >> 8)) & 0x00FF;
9134 return DAG.getNode(ISD::AND, dl, VT,
9135 DAG.getNode(ISD::ADD, dl, VT, Op,
9136 DAG.getNode(ISD::SRL, dl, VT, Op,
9137 DAG.getConstant(8, dl, ShVT))),
9138 DAG.getConstant(0xFF, dl, VT));
9139 }
9140
9141 // v = (v * 0x01010101...) >> (Len - 8)
9142 SDValue V;
9145 SDValue Mask01 =
9146 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9147 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9148 } else {
9149 V = Op;
9150 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9151 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9152 V = DAG.getNode(ISD::ADD, dl, VT, V,
9153 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9154 }
9155 }
9156 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9157}
9158
9160 SDLoc dl(Node);
9161 EVT VT = Node->getValueType(0);
9162 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9163 SDValue Op = Node->getOperand(0);
9164 SDValue Mask = Node->getOperand(1);
9165 SDValue VL = Node->getOperand(2);
9166 unsigned Len = VT.getScalarSizeInBits();
9167 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9168
9169 // TODO: Add support for irregular type lengths.
9170 if (!(Len <= 128 && Len % 8 == 0))
9171 return SDValue();
9172
9173 // This is same algorithm of expandCTPOP from
9174 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9175 SDValue Mask55 =
9176 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9177 SDValue Mask33 =
9178 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9179 SDValue Mask0F =
9180 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9181
9182 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9183
9184 // v = v - ((v >> 1) & 0x55555555...)
9185 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9186 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9187 DAG.getConstant(1, dl, ShVT), Mask, VL),
9188 Mask55, Mask, VL);
9189 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9190
9191 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9192 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9193 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9194 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9195 DAG.getConstant(2, dl, ShVT), Mask, VL),
9196 Mask33, Mask, VL);
9197 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9198
9199 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9200 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9201 Mask, VL),
9202 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9203 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9204
9205 if (Len <= 8)
9206 return Op;
9207
9208 // v = (v * 0x01010101...) >> (Len - 8)
9209 SDValue V;
9211 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9212 SDValue Mask01 =
9213 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9214 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9215 } else {
9216 V = Op;
9217 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9218 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9219 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9220 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9221 Mask, VL);
9222 }
9223 }
9224 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9225 Mask, VL);
9226}
9227
9229 SDLoc dl(Node);
9230 EVT VT = Node->getValueType(0);
9231 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9232 SDValue Op = Node->getOperand(0);
9233 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9234
9235 // If the non-ZERO_UNDEF version is supported we can use that instead.
9236 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9238 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9239
9240 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9242 EVT SetCCVT =
9243 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9244 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9245 SDValue Zero = DAG.getConstant(0, dl, VT);
9246 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9247 return DAG.getSelect(dl, VT, SrcIsZero,
9248 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9249 }
9250
9251 // Only expand vector types if we have the appropriate vector bit operations.
9252 // This includes the operations needed to expand CTPOP if it isn't supported.
9253 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9255 !canExpandVectorCTPOP(*this, VT)) ||
9258 return SDValue();
9259
9260 // for now, we do this:
9261 // x = x | (x >> 1);
9262 // x = x | (x >> 2);
9263 // ...
9264 // x = x | (x >>16);
9265 // x = x | (x >>32); // for 64-bit input
9266 // return popcount(~x);
9267 //
9268 // Ref: "Hacker's Delight" by Henry Warren
9269 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9270 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9271 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9272 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9273 }
9274 Op = DAG.getNOT(dl, Op, VT);
9275 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9276}
9277
9279 SDLoc dl(Node);
9280 EVT VT = Node->getValueType(0);
9281 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9282 SDValue Op = Node->getOperand(0);
9283 SDValue Mask = Node->getOperand(1);
9284 SDValue VL = Node->getOperand(2);
9285 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9286
9287 // do this:
9288 // x = x | (x >> 1);
9289 // x = x | (x >> 2);
9290 // ...
9291 // x = x | (x >>16);
9292 // x = x | (x >>32); // for 64-bit input
9293 // return popcount(~x);
9294 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9295 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9296 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9297 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9298 VL);
9299 }
9300 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9301 Mask, VL);
9302 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9303}
9304
9306 const SDLoc &DL, EVT VT, SDValue Op,
9307 unsigned BitWidth) const {
9308 if (BitWidth != 32 && BitWidth != 64)
9309 return SDValue();
9310 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9311 : APInt(64, 0x0218A392CD3D5DBFULL);
9312 const DataLayout &TD = DAG.getDataLayout();
9313 MachinePointerInfo PtrInfo =
9315 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9316 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9317 SDValue Lookup = DAG.getNode(
9318 ISD::SRL, DL, VT,
9319 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9320 DAG.getConstant(DeBruijn, DL, VT)),
9321 DAG.getConstant(ShiftAmt, DL, VT));
9323
9325 for (unsigned i = 0; i < BitWidth; i++) {
9326 APInt Shl = DeBruijn.shl(i);
9327 APInt Lshr = Shl.lshr(ShiftAmt);
9328 Table[Lshr.getZExtValue()] = i;
9329 }
9330
9331 // Create a ConstantArray in Constant Pool
9332 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9333 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9334 TD.getPrefTypeAlign(CA->getType()));
9335 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9336 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9337 PtrInfo, MVT::i8);
9338 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9339 return ExtLoad;
9340
9341 EVT SetCCVT =
9342 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9343 SDValue Zero = DAG.getConstant(0, DL, VT);
9344 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9345 return DAG.getSelect(DL, VT, SrcIsZero,
9346 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9347}
9348
9350 SDLoc dl(Node);
9351 EVT VT = Node->getValueType(0);
9352 SDValue Op = Node->getOperand(0);
9353 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9354
9355 // If the non-ZERO_UNDEF version is supported we can use that instead.
9356 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9358 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9359
9360 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9362 EVT SetCCVT =
9363 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9364 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9365 SDValue Zero = DAG.getConstant(0, dl, VT);
9366 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9367 return DAG.getSelect(dl, VT, SrcIsZero,
9368 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9369 }
9370
9371 // Only expand vector types if we have the appropriate vector bit operations.
9372 // This includes the operations needed to expand CTPOP if it isn't supported.
9373 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9376 !canExpandVectorCTPOP(*this, VT)) ||
9380 return SDValue();
9381
9382 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9383 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9385 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9386 return V;
9387
9388 // for now, we use: { return popcount(~x & (x - 1)); }
9389 // unless the target has ctlz but not ctpop, in which case we use:
9390 // { return 32 - nlz(~x & (x-1)); }
9391 // Ref: "Hacker's Delight" by Henry Warren
9392 SDValue Tmp = DAG.getNode(
9393 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9394 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9395
9396 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9398 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9399 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9400 }
9401
9402 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9403}
9404
9406 SDValue Op = Node->getOperand(0);
9407 SDValue Mask = Node->getOperand(1);
9408 SDValue VL = Node->getOperand(2);
9409 SDLoc dl(Node);
9410 EVT VT = Node->getValueType(0);
9411
9412 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9413 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9414 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9415 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9416 DAG.getConstant(1, dl, VT), Mask, VL);
9417 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9418 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9419}
9420
9422 SelectionDAG &DAG) const {
9423 // %cond = to_bool_vec %source
9424 // %splat = splat /*val=*/VL
9425 // %tz = step_vector
9426 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9427 // %r = vp.reduce.umin %v
9428 SDLoc DL(N);
9429 SDValue Source = N->getOperand(0);
9430 SDValue Mask = N->getOperand(1);
9431 SDValue EVL = N->getOperand(2);
9432 EVT SrcVT = Source.getValueType();
9433 EVT ResVT = N->getValueType(0);
9434 EVT ResVecVT =
9435 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9436
9437 // Convert to boolean vector.
9438 if (SrcVT.getScalarType() != MVT::i1) {
9439 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9440 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9441 SrcVT.getVectorElementCount());
9442 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9443 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9444 }
9445
9446 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9447 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9448 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9449 SDValue Select =
9450 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9451 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9452}
9453
9455 bool IsNegative) const {
9456 SDLoc dl(N);
9457 EVT VT = N->getValueType(0);
9458 SDValue Op = N->getOperand(0);
9459
9460 // abs(x) -> smax(x,sub(0,x))
9461 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9463 SDValue Zero = DAG.getConstant(0, dl, VT);
9464 Op = DAG.getFreeze(Op);
9465 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9466 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9467 }
9468
9469 // abs(x) -> umin(x,sub(0,x))
9470 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9472 SDValue Zero = DAG.getConstant(0, dl, VT);
9473 Op = DAG.getFreeze(Op);
9474 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9475 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9476 }
9477
9478 // 0 - abs(x) -> smin(x, sub(0,x))
9479 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9481 SDValue Zero = DAG.getConstant(0, dl, VT);
9482 Op = DAG.getFreeze(Op);
9483 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9484 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9485 }
9486
9487 // Only expand vector types if we have the appropriate vector operations.
9488 if (VT.isVector() &&
9490 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9491 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9493 return SDValue();
9494
9495 Op = DAG.getFreeze(Op);
9496 SDValue Shift = DAG.getNode(
9497 ISD::SRA, dl, VT, Op,
9498 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9499 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9500
9501 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9502 if (!IsNegative)
9503 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9504
9505 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9506 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9507}
9508
9510 SDLoc dl(N);
9511 EVT VT = N->getValueType(0);
9512 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9513 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9514 bool IsSigned = N->getOpcode() == ISD::ABDS;
9515
9516 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9517 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9518 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9519 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9520 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9521 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9522 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9523 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9524 }
9525
9526 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9527 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9528 return DAG.getNode(ISD::OR, dl, VT,
9529 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9530 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9531
9532 // If the subtract doesn't overflow then just use abs(sub())
9533 // NOTE: don't use frozen operands for value tracking.
9534 bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9535 DAG.SignBitIsZero(N->getOperand(0));
9536
9537 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9538 N->getOperand(1)))
9539 return DAG.getNode(ISD::ABS, dl, VT,
9540 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9541
9542 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9543 N->getOperand(0)))
9544 return DAG.getNode(ISD::ABS, dl, VT,
9545 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9546
9547 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9549 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9550
9551 // Branchless expansion iff cmp result is allbits:
9552 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9553 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9554 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9555 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9556 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9557 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9558 }
9559
9560 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9561 // flag if the (scalar) type is illegal as this is more likely to legalize
9562 // cleanly:
9563 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9564 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9565 SDValue USubO =
9566 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9567 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9568 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9569 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9570 }
9571
9572 // FIXME: Should really try to split the vector in case it's legal on a
9573 // subvector.
9575 return DAG.UnrollVectorOp(N);
9576
9577 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9578 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9579 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9580 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9581}
9582
9584 SDLoc dl(N);
9585 EVT VT = N->getValueType(0);
9586 SDValue LHS = N->getOperand(0);
9587 SDValue RHS = N->getOperand(1);
9588
9589 unsigned Opc = N->getOpcode();
9590 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9591 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9592 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9593 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9594 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9595 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9596 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9597 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9598 "Unknown AVG node");
9599
9600 // If the operands are already extended, we can add+shift.
9601 bool IsExt =
9602 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9603 DAG.ComputeNumSignBits(RHS) >= 2) ||
9604 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9606 if (IsExt) {
9607 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9608 if (!IsFloor)
9609 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9610 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9611 DAG.getShiftAmountConstant(1, VT, dl));
9612 }
9613
9614 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9615 if (VT.isScalarInteger()) {
9616 unsigned BW = VT.getScalarSizeInBits();
9617 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9618 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9619 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9620 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9621 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9622 if (!IsFloor)
9623 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9624 DAG.getConstant(1, dl, ExtVT));
9625 // Just use SRL as we will be truncating away the extended sign bits.
9626 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9627 DAG.getShiftAmountConstant(1, ExtVT, dl));
9628 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9629 }
9630 }
9631
9632 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9633 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9634 SDValue UAddWithOverflow =
9635 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9636
9637 SDValue Sum = UAddWithOverflow.getValue(0);
9638 SDValue Overflow = UAddWithOverflow.getValue(1);
9639
9640 // Right shift the sum by 1
9641 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9642 DAG.getShiftAmountConstant(1, VT, dl));
9643
9644 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9645 SDValue OverflowShl = DAG.getNode(
9646 ISD::SHL, dl, VT, ZeroExtOverflow,
9647 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9648
9649 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9650 }
9651
9652 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9653 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9654 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9655 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9656 LHS = DAG.getFreeze(LHS);
9657 RHS = DAG.getFreeze(RHS);
9658 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9659 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9660 SDValue Shift =
9661 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9662 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9663}
9664
9666 SDLoc dl(N);
9667 EVT VT = N->getValueType(0);
9668 SDValue Op = N->getOperand(0);
9669
9670 if (!VT.isSimple())
9671 return SDValue();
9672
9673 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9674 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9675 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9676 default:
9677 return SDValue();
9678 case MVT::i16:
9679 // Use a rotate by 8. This can be further expanded if necessary.
9680 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9681 case MVT::i32:
9682 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9683 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9684 DAG.getConstant(0xFF00, dl, VT));
9685 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9686 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9687 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9688 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9689 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9690 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9691 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9692 case MVT::i64:
9693 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9694 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9695 DAG.getConstant(255ULL<<8, dl, VT));
9696 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9697 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9698 DAG.getConstant(255ULL<<16, dl, VT));
9699 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9700 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9701 DAG.getConstant(255ULL<<24, dl, VT));
9702 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9703 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9704 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9705 DAG.getConstant(255ULL<<24, dl, VT));
9706 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9707 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9708 DAG.getConstant(255ULL<<16, dl, VT));
9709 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9710 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9711 DAG.getConstant(255ULL<<8, dl, VT));
9712 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9713 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9714 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9715 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9716 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9717 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9718 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9719 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9720 }
9721}
9722
9724 SDLoc dl(N);
9725 EVT VT = N->getValueType(0);
9726 SDValue Op = N->getOperand(0);
9727 SDValue Mask = N->getOperand(1);
9728 SDValue EVL = N->getOperand(2);
9729
9730 if (!VT.isSimple())
9731 return SDValue();
9732
9733 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9734 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9735 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9736 default:
9737 return SDValue();
9738 case MVT::i16:
9739 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9740 Mask, EVL);
9741 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9742 Mask, EVL);
9743 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9744 case MVT::i32:
9745 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9746 Mask, EVL);
9747 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9748 Mask, EVL);
9749 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9750 Mask, EVL);
9751 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9752 Mask, EVL);
9753 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9754 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9755 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9756 Mask, EVL);
9757 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9758 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9759 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9760 case MVT::i64:
9761 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9762 Mask, EVL);
9763 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9764 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9765 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9766 Mask, EVL);
9767 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9768 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9769 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9770 Mask, EVL);
9771 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9772 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9773 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9774 Mask, EVL);
9775 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9776 Mask, EVL);
9777 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9778 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9779 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9780 Mask, EVL);
9781 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9782 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9783 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9784 Mask, EVL);
9785 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9786 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9787 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9788 Mask, EVL);
9789 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9790 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9791 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9792 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9793 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9794 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9795 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9796 }
9797}
9798
9800 SDLoc dl(N);
9801 EVT VT = N->getValueType(0);
9802 SDValue Op = N->getOperand(0);
9803 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9804 unsigned Sz = VT.getScalarSizeInBits();
9805
9806 SDValue Tmp, Tmp2, Tmp3;
9807
9808 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9809 // and finally the i1 pairs.
9810 // TODO: We can easily support i4/i2 legal types if any target ever does.
9811 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9812 // Create the masks - repeating the pattern every byte.
9813 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9814 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9815 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9816
9817 // BSWAP if the type is wider than a single byte.
9818 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9819
9820 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9821 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9822 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9823 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9824 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9825 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9826
9827 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9828 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9829 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9830 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9831 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9832 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9833
9834 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9835 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9836 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9837 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9838 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9839 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9840 return Tmp;
9841 }
9842
9843 Tmp = DAG.getConstant(0, dl, VT);
9844 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9845 if (I < J)
9846 Tmp2 =
9847 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9848 else
9849 Tmp2 =
9850 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9851
9852 APInt Shift = APInt::getOneBitSet(Sz, J);
9853 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9854 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9855 }
9856
9857 return Tmp;
9858}
9859
9861 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9862
9863 SDLoc dl(N);
9864 EVT VT = N->getValueType(0);
9865 SDValue Op = N->getOperand(0);
9866 SDValue Mask = N->getOperand(1);
9867 SDValue EVL = N->getOperand(2);
9868 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9869 unsigned Sz = VT.getScalarSizeInBits();
9870
9871 SDValue Tmp, Tmp2, Tmp3;
9872
9873 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9874 // and finally the i1 pairs.
9875 // TODO: We can easily support i4/i2 legal types if any target ever does.
9876 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9877 // Create the masks - repeating the pattern every byte.
9878 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9879 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9880 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9881
9882 // BSWAP if the type is wider than a single byte.
9883 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9884
9885 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9886 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9887 Mask, EVL);
9888 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9889 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9890 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9891 Mask, EVL);
9892 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9893 Mask, EVL);
9894 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9895
9896 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9897 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9898 Mask, EVL);
9899 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9900 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9901 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9902 Mask, EVL);
9903 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9904 Mask, EVL);
9905 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9906
9907 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9908 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9909 Mask, EVL);
9910 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9911 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9912 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9913 Mask, EVL);
9914 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9915 Mask, EVL);
9916 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9917 return Tmp;
9918 }
9919 return SDValue();
9920}
9921
9922std::pair<SDValue, SDValue>
9924 SelectionDAG &DAG) const {
9925 SDLoc SL(LD);
9926 SDValue Chain = LD->getChain();
9927 SDValue BasePTR = LD->getBasePtr();
9928 EVT SrcVT = LD->getMemoryVT();
9929 EVT DstVT = LD->getValueType(0);
9930 ISD::LoadExtType ExtType = LD->getExtensionType();
9931
9932 if (SrcVT.isScalableVector())
9933 report_fatal_error("Cannot scalarize scalable vector loads");
9934
9935 unsigned NumElem = SrcVT.getVectorNumElements();
9936
9937 EVT SrcEltVT = SrcVT.getScalarType();
9938 EVT DstEltVT = DstVT.getScalarType();
9939
9940 // A vector must always be stored in memory as-is, i.e. without any padding
9941 // between the elements, since various code depend on it, e.g. in the
9942 // handling of a bitcast of a vector type to int, which may be done with a
9943 // vector store followed by an integer load. A vector that does not have
9944 // elements that are byte-sized must therefore be stored as an integer
9945 // built out of the extracted vector elements.
9946 if (!SrcEltVT.isByteSized()) {
9947 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9948 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9949
9950 unsigned NumSrcBits = SrcVT.getSizeInBits();
9951 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9952
9953 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9954 SDValue SrcEltBitMask = DAG.getConstant(
9955 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9956
9957 // Load the whole vector and avoid masking off the top bits as it makes
9958 // the codegen worse.
9959 SDValue Load =
9960 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9961 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9962 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9963
9965 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9966 unsigned ShiftIntoIdx =
9967 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9968 SDValue ShiftAmount = DAG.getShiftAmountConstant(
9969 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9970 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9971 SDValue Elt =
9972 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9973 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9974
9975 if (ExtType != ISD::NON_EXTLOAD) {
9976 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9977 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9978 }
9979
9980 Vals.push_back(Scalar);
9981 }
9982
9983 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9984 return std::make_pair(Value, Load.getValue(1));
9985 }
9986
9987 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9988 assert(SrcEltVT.isByteSized());
9989
9991 SmallVector<SDValue, 8> LoadChains;
9992
9993 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9994 SDValue ScalarLoad =
9995 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9996 LD->getPointerInfo().getWithOffset(Idx * Stride),
9997 SrcEltVT, LD->getOriginalAlign(),
9998 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9999
10000 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10001
10002 Vals.push_back(ScalarLoad.getValue(0));
10003 LoadChains.push_back(ScalarLoad.getValue(1));
10004 }
10005
10006 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10007 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10008
10009 return std::make_pair(Value, NewChain);
10010}
10011
10013 SelectionDAG &DAG) const {
10014 SDLoc SL(ST);
10015
10016 SDValue Chain = ST->getChain();
10017 SDValue BasePtr = ST->getBasePtr();
10018 SDValue Value = ST->getValue();
10019 EVT StVT = ST->getMemoryVT();
10020
10021 if (StVT.isScalableVector())
10022 report_fatal_error("Cannot scalarize scalable vector stores");
10023
10024 // The type of the data we want to save
10025 EVT RegVT = Value.getValueType();
10026 EVT RegSclVT = RegVT.getScalarType();
10027
10028 // The type of data as saved in memory.
10029 EVT MemSclVT = StVT.getScalarType();
10030
10031 unsigned NumElem = StVT.getVectorNumElements();
10032
10033 // A vector must always be stored in memory as-is, i.e. without any padding
10034 // between the elements, since various code depend on it, e.g. in the
10035 // handling of a bitcast of a vector type to int, which may be done with a
10036 // vector store followed by an integer load. A vector that does not have
10037 // elements that are byte-sized must therefore be stored as an integer
10038 // built out of the extracted vector elements.
10039 if (!MemSclVT.isByteSized()) {
10040 unsigned NumBits = StVT.getSizeInBits();
10041 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10042
10043 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10044
10045 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10046 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10047 DAG.getVectorIdxConstant(Idx, SL));
10048 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10049 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10050 unsigned ShiftIntoIdx =
10051 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10052 SDValue ShiftAmount =
10053 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10054 SDValue ShiftedElt =
10055 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10056 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10057 }
10058
10059 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10060 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10061 ST->getAAInfo());
10062 }
10063
10064 // Store Stride in bytes
10065 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10066 assert(Stride && "Zero stride!");
10067 // Extract each of the elements from the original vector and save them into
10068 // memory individually.
10070 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10071 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10072 DAG.getVectorIdxConstant(Idx, SL));
10073
10074 SDValue Ptr =
10075 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10076
10077 // This scalar TruncStore may be illegal, but we legalize it later.
10078 SDValue Store = DAG.getTruncStore(
10079 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10080 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10081 ST->getAAInfo());
10082
10083 Stores.push_back(Store);
10084 }
10085
10086 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10087}
10088
10089std::pair<SDValue, SDValue>
10091 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10092 "unaligned indexed loads not implemented!");
10093 SDValue Chain = LD->getChain();
10094 SDValue Ptr = LD->getBasePtr();
10095 EVT VT = LD->getValueType(0);
10096 EVT LoadedVT = LD->getMemoryVT();
10097 SDLoc dl(LD);
10098 auto &MF = DAG.getMachineFunction();
10099
10100 if (VT.isFloatingPoint() || VT.isVector()) {
10101 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10102 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10103 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10104 LoadedVT.isVector()) {
10105 // Scalarize the load and let the individual components be handled.
10106 return scalarizeVectorLoad(LD, DAG);
10107 }
10108
10109 // Expand to a (misaligned) integer load of the same size,
10110 // then bitconvert to floating point or vector.
10111 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10112 LD->getMemOperand());
10113 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10114 if (LoadedVT != VT)
10115 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10116 ISD::ANY_EXTEND, dl, VT, Result);
10117
10118 return std::make_pair(Result, newLoad.getValue(1));
10119 }
10120
10121 // Copy the value to a (aligned) stack slot using (unaligned) integer
10122 // loads and stores, then do a (aligned) load from the stack slot.
10123 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10124 unsigned LoadedBytes = LoadedVT.getStoreSize();
10125 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10126 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10127
10128 // Make sure the stack slot is also aligned for the register type.
10129 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10130 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10132 SDValue StackPtr = StackBase;
10133 unsigned Offset = 0;
10134
10135 EVT PtrVT = Ptr.getValueType();
10136 EVT StackPtrVT = StackPtr.getValueType();
10137
10138 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10139 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10140
10141 // Do all but one copies using the full register width.
10142 for (unsigned i = 1; i < NumRegs; i++) {
10143 // Load one integer register's worth from the original location.
10144 SDValue Load = DAG.getLoad(
10145 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10146 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10147 LD->getAAInfo());
10148 // Follow the load with a store to the stack slot. Remember the store.
10149 Stores.push_back(DAG.getStore(
10150 Load.getValue(1), dl, Load, StackPtr,
10151 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10152 // Increment the pointers.
10153 Offset += RegBytes;
10154
10155 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10156 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10157 }
10158
10159 // The last copy may be partial. Do an extending load.
10160 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10161 8 * (LoadedBytes - Offset));
10162 SDValue Load =
10163 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10164 LD->getPointerInfo().getWithOffset(Offset), MemVT,
10165 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10166 LD->getAAInfo());
10167 // Follow the load with a store to the stack slot. Remember the store.
10168 // On big-endian machines this requires a truncating store to ensure
10169 // that the bits end up in the right place.
10170 Stores.push_back(DAG.getTruncStore(
10171 Load.getValue(1), dl, Load, StackPtr,
10172 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10173
10174 // The order of the stores doesn't matter - say it with a TokenFactor.
10175 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10176
10177 // Finally, perform the original load only redirected to the stack slot.
10178 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10179 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10180 LoadedVT);
10181
10182 // Callers expect a MERGE_VALUES node.
10183 return std::make_pair(Load, TF);
10184 }
10185
10186 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10187 "Unaligned load of unsupported type.");
10188
10189 // Compute the new VT that is half the size of the old one. This is an
10190 // integer MVT.
10191 unsigned NumBits = LoadedVT.getSizeInBits();
10192 EVT NewLoadedVT;
10193 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10194 NumBits >>= 1;
10195
10196 Align Alignment = LD->getOriginalAlign();
10197 unsigned IncrementSize = NumBits / 8;
10198 ISD::LoadExtType HiExtType = LD->getExtensionType();
10199
10200 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10201 if (HiExtType == ISD::NON_EXTLOAD)
10202 HiExtType = ISD::ZEXTLOAD;
10203
10204 // Load the value in two parts
10205 SDValue Lo, Hi;
10206 if (DAG.getDataLayout().isLittleEndian()) {
10207 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10208 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10209 LD->getAAInfo());
10210
10211 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10212 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10213 LD->getPointerInfo().getWithOffset(IncrementSize),
10214 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10215 LD->getAAInfo());
10216 } else {
10217 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10218 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10219 LD->getAAInfo());
10220
10221 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10222 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10223 LD->getPointerInfo().getWithOffset(IncrementSize),
10224 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10225 LD->getAAInfo());
10226 }
10227
10228 // aggregate the two parts
10229 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10230 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10231 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10232
10233 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10234 Hi.getValue(1));
10235
10236 return std::make_pair(Result, TF);
10237}
10238
10240 SelectionDAG &DAG) const {
10241 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10242 "unaligned indexed stores not implemented!");
10243 SDValue Chain = ST->getChain();
10244 SDValue Ptr = ST->getBasePtr();
10245 SDValue Val = ST->getValue();
10246 EVT VT = Val.getValueType();
10247 Align Alignment = ST->getOriginalAlign();
10248 auto &MF = DAG.getMachineFunction();
10249 EVT StoreMemVT = ST->getMemoryVT();
10250
10251 SDLoc dl(ST);
10252 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10253 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10254 if (isTypeLegal(intVT)) {
10255 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10256 StoreMemVT.isVector()) {
10257 // Scalarize the store and let the individual components be handled.
10258 SDValue Result = scalarizeVectorStore(ST, DAG);
10259 return Result;
10260 }
10261 // Expand to a bitconvert of the value to the integer type of the
10262 // same size, then a (misaligned) int store.
10263 // FIXME: Does not handle truncating floating point stores!
10264 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10265 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10266 Alignment, ST->getMemOperand()->getFlags());
10267 return Result;
10268 }
10269 // Do a (aligned) store to a stack slot, then copy from the stack slot
10270 // to the final destination using (unaligned) integer loads and stores.
10271 MVT RegVT = getRegisterType(
10272 *DAG.getContext(),
10273 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10274 EVT PtrVT = Ptr.getValueType();
10275 unsigned StoredBytes = StoreMemVT.getStoreSize();
10276 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10277 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10278
10279 // Make sure the stack slot is also aligned for the register type.
10280 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10281 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10282
10283 // Perform the original store, only redirected to the stack slot.
10284 SDValue Store = DAG.getTruncStore(
10285 Chain, dl, Val, StackPtr,
10286 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10287
10288 EVT StackPtrVT = StackPtr.getValueType();
10289
10290 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10291 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10293 unsigned Offset = 0;
10294
10295 // Do all but one copies using the full register width.
10296 for (unsigned i = 1; i < NumRegs; i++) {
10297 // Load one integer register's worth from the stack slot.
10298 SDValue Load = DAG.getLoad(
10299 RegVT, dl, Store, StackPtr,
10300 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10301 // Store it to the final location. Remember the store.
10302 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10303 ST->getPointerInfo().getWithOffset(Offset),
10304 ST->getOriginalAlign(),
10305 ST->getMemOperand()->getFlags()));
10306 // Increment the pointers.
10307 Offset += RegBytes;
10308 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10309 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10310 }
10311
10312 // The last store may be partial. Do a truncating store. On big-endian
10313 // machines this requires an extending load from the stack slot to ensure
10314 // that the bits are in the right place.
10315 EVT LoadMemVT =
10316 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10317
10318 // Load from the stack slot.
10319 SDValue Load = DAG.getExtLoad(
10320 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10321 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10322
10323 Stores.push_back(
10324 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10325 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10326 ST->getOriginalAlign(),
10327 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10328 // The order of the stores doesn't matter - say it with a TokenFactor.
10329 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10330 return Result;
10331 }
10332
10333 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10334 "Unaligned store of unknown type.");
10335 // Get the half-size VT
10336 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10337 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10338 unsigned IncrementSize = NumBits / 8;
10339
10340 // Divide the stored value in two parts.
10341 SDValue ShiftAmount =
10342 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10343 SDValue Lo = Val;
10344 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10345 // fold and not use the upper bits. A smaller constant may be easier to
10346 // materialize.
10347 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10348 Lo = DAG.getNode(
10349 ISD::AND, dl, VT, Lo,
10350 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10351 VT));
10352 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10353
10354 // Store the two parts
10355 SDValue Store1, Store2;
10356 Store1 = DAG.getTruncStore(Chain, dl,
10357 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10358 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10359 ST->getMemOperand()->getFlags());
10360
10361 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10362 Store2 = DAG.getTruncStore(
10363 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10364 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10365 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10366
10367 SDValue Result =
10368 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10369 return Result;
10370}
10371
10372SDValue
10374 const SDLoc &DL, EVT DataVT,
10375 SelectionDAG &DAG,
10376 bool IsCompressedMemory) const {
10377 SDValue Increment;
10378 EVT AddrVT = Addr.getValueType();
10379 EVT MaskVT = Mask.getValueType();
10380 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10381 "Incompatible types of Data and Mask");
10382 if (IsCompressedMemory) {
10383 if (DataVT.isScalableVector())
10385 "Cannot currently handle compressed memory with scalable vectors");
10386 // Incrementing the pointer according to number of '1's in the mask.
10387 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10388 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10389 if (MaskIntVT.getSizeInBits() < 32) {
10390 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10391 MaskIntVT = MVT::i32;
10392 }
10393
10394 // Count '1's with POPCNT.
10395 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10396 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10397 // Scale is an element size in bytes.
10398 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10399 AddrVT);
10400 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10401 } else if (DataVT.isScalableVector()) {
10402 Increment = DAG.getVScale(DL, AddrVT,
10403 APInt(AddrVT.getFixedSizeInBits(),
10404 DataVT.getStoreSize().getKnownMinValue()));
10405 } else
10406 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10407
10408 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10409}
10410
10412 EVT VecVT, const SDLoc &dl,
10413 ElementCount SubEC) {
10414 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10415 "Cannot index a scalable vector within a fixed-width vector");
10416
10417 unsigned NElts = VecVT.getVectorMinNumElements();
10418 unsigned NumSubElts = SubEC.getKnownMinValue();
10419 EVT IdxVT = Idx.getValueType();
10420
10421 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10422 // If this is a constant index and we know the value plus the number of the
10423 // elements in the subvector minus one is less than the minimum number of
10424 // elements then it's safe to return Idx.
10425 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10426 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10427 return Idx;
10428 SDValue VS =
10429 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10430 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10431 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10432 DAG.getConstant(NumSubElts, dl, IdxVT));
10433 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10434 }
10435 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10436 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10437 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10438 DAG.getConstant(Imm, dl, IdxVT));
10439 }
10440 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10441 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10442 DAG.getConstant(MaxIndex, dl, IdxVT));
10443}
10444
10446 SDValue VecPtr, EVT VecVT,
10447 SDValue Index) const {
10448 return getVectorSubVecPointer(
10449 DAG, VecPtr, VecVT,
10451 Index);
10452}
10453
10455 SDValue VecPtr, EVT VecVT,
10456 EVT SubVecVT,
10457 SDValue Index) const {
10458 SDLoc dl(Index);
10459 // Make sure the index type is big enough to compute in.
10460 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10461
10462 EVT EltVT = VecVT.getVectorElementType();
10463
10464 // Calculate the element offset and add it to the pointer.
10465 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10466 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10467 "Converting bits to bytes lost precision");
10468 assert(SubVecVT.getVectorElementType() == EltVT &&
10469 "Sub-vector must be a vector with matching element type");
10470 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10471 SubVecVT.getVectorElementCount());
10472
10473 EVT IdxVT = Index.getValueType();
10474 if (SubVecVT.isScalableVector())
10475 Index =
10476 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10477 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10478
10479 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10480 DAG.getConstant(EltSize, dl, IdxVT));
10481 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10482}
10483
10484//===----------------------------------------------------------------------===//
10485// Implementation of Emulated TLS Model
10486//===----------------------------------------------------------------------===//
10487
10489 SelectionDAG &DAG) const {
10490 // Access to address of TLS varialbe xyz is lowered to a function call:
10491 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10492 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10493 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10494 SDLoc dl(GA);
10495
10496 ArgListTy Args;
10497 ArgListEntry Entry;
10498 const GlobalValue *GV =
10499 cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10500 SmallString<32> NameString("__emutls_v.");
10501 NameString += GV->getName();
10502 StringRef EmuTlsVarName(NameString);
10503 const GlobalVariable *EmuTlsVar =
10504 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10505 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10506 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10507 Entry.Ty = VoidPtrType;
10508 Args.push_back(Entry);
10509
10510 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10511
10513 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10514 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10515 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10516
10517 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10518 // At last for X86 targets, maybe good for other targets too?
10520 MFI.setAdjustsStack(true); // Is this only for X86 target?
10521 MFI.setHasCalls(true);
10522
10523 assert((GA->getOffset() == 0) &&
10524 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10525 return CallResult.first;
10526}
10527
10529 SelectionDAG &DAG) const {
10530 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10531 if (!isCtlzFast())
10532 return SDValue();
10533 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10534 SDLoc dl(Op);
10535 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10536 EVT VT = Op.getOperand(0).getValueType();
10537 SDValue Zext = Op.getOperand(0);
10538 if (VT.bitsLT(MVT::i32)) {
10539 VT = MVT::i32;
10540 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10541 }
10542 unsigned Log2b = Log2_32(VT.getSizeInBits());
10543 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10544 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10545 DAG.getConstant(Log2b, dl, MVT::i32));
10546 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10547 }
10548 return SDValue();
10549}
10550
10552 SDValue Op0 = Node->getOperand(0);
10553 SDValue Op1 = Node->getOperand(1);
10554 EVT VT = Op0.getValueType();
10555 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10556 unsigned Opcode = Node->getOpcode();
10557 SDLoc DL(Node);
10558
10559 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10560 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10562 Op0 = DAG.getFreeze(Op0);
10563 SDValue Zero = DAG.getConstant(0, DL, VT);
10564 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10565 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10566 }
10567
10568 // umin(x,y) -> sub(x,usubsat(x,y))
10569 // TODO: Missing freeze(Op0)?
10570 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10572 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10573 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10574 }
10575
10576 // umax(x,y) -> add(x,usubsat(y,x))
10577 // TODO: Missing freeze(Op0)?
10578 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10580 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10581 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10582 }
10583
10584 // FIXME: Should really try to split the vector in case it's legal on a
10585 // subvector.
10587 return DAG.UnrollVectorOp(Node);
10588
10589 // Attempt to find an existing SETCC node that we can reuse.
10590 // TODO: Do we need a generic doesSETCCNodeExist?
10591 // TODO: Missing freeze(Op0)/freeze(Op1)?
10592 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10593 ISD::CondCode PrefCommuteCC,
10594 ISD::CondCode AltCommuteCC) {
10595 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10596 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10597 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10598 {Op0, Op1, DAG.getCondCode(CC)})) {
10599 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10600 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10601 }
10602 }
10603 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10604 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10605 {Op0, Op1, DAG.getCondCode(CC)})) {
10606 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10607 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10608 }
10609 }
10610 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10611 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10612 };
10613
10614 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10615 // -> Y = (A < B) ? B : A
10616 // -> Y = (A >= B) ? A : B
10617 // -> Y = (A <= B) ? B : A
10618 switch (Opcode) {
10619 case ISD::SMAX:
10620 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10621 case ISD::SMIN:
10622 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10623 case ISD::UMAX:
10624 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10625 case ISD::UMIN:
10626 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10627 }
10628
10629 llvm_unreachable("How did we get here?");
10630}
10631
10633 unsigned Opcode = Node->getOpcode();
10634 SDValue LHS = Node->getOperand(0);
10635 SDValue RHS = Node->getOperand(1);
10636 EVT VT = LHS.getValueType();
10637 SDLoc dl(Node);
10638
10639 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10640 assert(VT.isInteger() && "Expected operands to be integers");
10641
10642 // usub.sat(a, b) -> umax(a, b) - b
10643 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10644 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10645 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10646 }
10647
10648 // uadd.sat(a, b) -> umin(a, ~b) + b
10649 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10650 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10651 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10652 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10653 }
10654
10655 unsigned OverflowOp;
10656 switch (Opcode) {
10657 case ISD::SADDSAT:
10658 OverflowOp = ISD::SADDO;
10659 break;
10660 case ISD::UADDSAT:
10661 OverflowOp = ISD::UADDO;
10662 break;
10663 case ISD::SSUBSAT:
10664 OverflowOp = ISD::SSUBO;
10665 break;
10666 case ISD::USUBSAT:
10667 OverflowOp = ISD::USUBO;
10668 break;
10669 default:
10670 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10671 "addition or subtraction node.");
10672 }
10673
10674 // FIXME: Should really try to split the vector in case it's legal on a
10675 // subvector.
10677 return DAG.UnrollVectorOp(Node);
10678
10679 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10680 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10681 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10682 SDValue SumDiff = Result.getValue(0);
10683 SDValue Overflow = Result.getValue(1);
10684 SDValue Zero = DAG.getConstant(0, dl, VT);
10685 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10686
10687 if (Opcode == ISD::UADDSAT) {
10689 // (LHS + RHS) | OverflowMask
10690 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10691 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10692 }
10693 // Overflow ? 0xffff.... : (LHS + RHS)
10694 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10695 }
10696
10697 if (Opcode == ISD::USUBSAT) {
10699 // (LHS - RHS) & ~OverflowMask
10700 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10701 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10702 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10703 }
10704 // Overflow ? 0 : (LHS - RHS)
10705 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10706 }
10707
10708 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10711
10712 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10713 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10714
10715 // If either of the operand signs are known, then they are guaranteed to
10716 // only saturate in one direction. If non-negative they will saturate
10717 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10718 //
10719 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10720 // sign of 'y' has to be flipped.
10721
10722 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10723 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10724 : KnownRHS.isNegative();
10725 if (LHSIsNonNegative || RHSIsNonNegative) {
10726 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10727 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10728 }
10729
10730 bool LHSIsNegative = KnownLHS.isNegative();
10731 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10732 : KnownRHS.isNonNegative();
10733 if (LHSIsNegative || RHSIsNegative) {
10734 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10735 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10736 }
10737 }
10738
10739 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10741 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10742 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10743 DAG.getConstant(BitWidth - 1, dl, VT));
10744 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10745 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10746}
10747
10749 unsigned Opcode = Node->getOpcode();
10750 SDValue LHS = Node->getOperand(0);
10751 SDValue RHS = Node->getOperand(1);
10752 EVT VT = LHS.getValueType();
10753 EVT ResVT = Node->getValueType(0);
10754 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10755 SDLoc dl(Node);
10756
10757 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10758 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10759 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10760 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10761
10762 // We can't perform arithmetic on i1 values. Extending them would
10763 // probably result in worse codegen, so let's just use two selects instead.
10764 // Some targets are also just better off using selects rather than subtraction
10765 // because one of the conditions can be merged with one of the selects.
10766 // And finally, if we don't know the contents of high bits of a boolean value
10767 // we can't perform any arithmetic either.
10768 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10770 SDValue SelectZeroOrOne =
10771 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10772 DAG.getConstant(0, dl, ResVT));
10773 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10774 SelectZeroOrOne);
10775 }
10776
10778 std::swap(IsGT, IsLT);
10779 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10780 ResVT);
10781}
10782
10784 unsigned Opcode = Node->getOpcode();
10785 bool IsSigned = Opcode == ISD::SSHLSAT;
10786 SDValue LHS = Node->getOperand(0);
10787 SDValue RHS = Node->getOperand(1);
10788 EVT VT = LHS.getValueType();
10789 SDLoc dl(Node);
10790
10791 assert((Node->getOpcode() == ISD::SSHLSAT ||
10792 Node->getOpcode() == ISD::USHLSAT) &&
10793 "Expected a SHLSAT opcode");
10794 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10795 assert(VT.isInteger() && "Expected operands to be integers");
10796
10798 return DAG.UnrollVectorOp(Node);
10799
10800 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10801
10802 unsigned BW = VT.getScalarSizeInBits();
10803 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10804 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10805 SDValue Orig =
10806 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10807
10808 SDValue SatVal;
10809 if (IsSigned) {
10810 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10811 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10812 SDValue Cond =
10813 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10814 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10815 } else {
10816 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10817 }
10818 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10819 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10820}
10821
10823 bool Signed, EVT WideVT,
10824 const SDValue LL, const SDValue LH,
10825 const SDValue RL, const SDValue RH,
10826 SDValue &Lo, SDValue &Hi) const {
10827 // We can fall back to a libcall with an illegal type for the MUL if we
10828 // have a libcall big enough.
10829 // Also, we can fall back to a division in some cases, but that's a big
10830 // performance hit in the general case.
10831 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10832 if (WideVT == MVT::i16)
10833 LC = RTLIB::MUL_I16;
10834 else if (WideVT == MVT::i32)
10835 LC = RTLIB::MUL_I32;
10836 else if (WideVT == MVT::i64)
10837 LC = RTLIB::MUL_I64;
10838 else if (WideVT == MVT::i128)
10839 LC = RTLIB::MUL_I128;
10840
10841 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10842 // We'll expand the multiplication by brute force because we have no other
10843 // options. This is a trivially-generalized version of the code from
10844 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10845 // 4.3.1).
10846 EVT VT = LL.getValueType();
10847 unsigned Bits = VT.getSizeInBits();
10848 unsigned HalfBits = Bits >> 1;
10849 SDValue Mask =
10850 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10851 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10852 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10853
10854 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10855 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10856
10857 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10858 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10859 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10860 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10861
10862 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10863 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10864 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10865 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10866
10867 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10868 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10869 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10870
10871 SDValue W =
10872 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10873 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10874 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10875 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10876
10877 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10878 DAG.getNode(ISD::ADD, dl, VT,
10879 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10880 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10881 } else {
10882 // Attempt a libcall.
10883 SDValue Ret;
10885 CallOptions.setIsSigned(Signed);
10886 CallOptions.setIsPostTypeLegalization(true);
10887 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10888 // Halves of WideVT are packed into registers in different order
10889 // depending on platform endianness. This is usually handled by
10890 // the C calling convention, but we can't defer to it in
10891 // the legalizer.
10892 SDValue Args[] = {LL, LH, RL, RH};
10893 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10894 } else {
10895 SDValue Args[] = {LH, LL, RH, RL};
10896 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10897 }
10898 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10899 "Ret value is a collection of constituent nodes holding result.");
10900 if (DAG.getDataLayout().isLittleEndian()) {
10901 // Same as above.
10902 Lo = Ret.getOperand(0);
10903 Hi = Ret.getOperand(1);
10904 } else {
10905 Lo = Ret.getOperand(1);
10906 Hi = Ret.getOperand(0);
10907 }
10908 }
10909}
10910
10912 bool Signed, const SDValue LHS,
10913 const SDValue RHS, SDValue &Lo,
10914 SDValue &Hi) const {
10915 EVT VT = LHS.getValueType();
10916 assert(RHS.getValueType() == VT && "Mismatching operand types");
10917
10918 SDValue HiLHS;
10919 SDValue HiRHS;
10920 if (Signed) {
10921 // The high part is obtained by SRA'ing all but one of the bits of low
10922 // part.
10923 unsigned LoSize = VT.getFixedSizeInBits();
10924 HiLHS = DAG.getNode(
10925 ISD::SRA, dl, VT, LHS,
10926 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10927 HiRHS = DAG.getNode(
10928 ISD::SRA, dl, VT, RHS,
10929 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10930 } else {
10931 HiLHS = DAG.getConstant(0, dl, VT);
10932 HiRHS = DAG.getConstant(0, dl, VT);
10933 }
10934 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10935 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10936}
10937
10938SDValue
10940 assert((Node->getOpcode() == ISD::SMULFIX ||
10941 Node->getOpcode() == ISD::UMULFIX ||
10942 Node->getOpcode() == ISD::SMULFIXSAT ||
10943 Node->getOpcode() == ISD::UMULFIXSAT) &&
10944 "Expected a fixed point multiplication opcode");
10945
10946 SDLoc dl(Node);
10947 SDValue LHS = Node->getOperand(0);
10948 SDValue RHS = Node->getOperand(1);
10949 EVT VT = LHS.getValueType();
10950 unsigned Scale = Node->getConstantOperandVal(2);
10951 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10952 Node->getOpcode() == ISD::UMULFIXSAT);
10953 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10954 Node->getOpcode() == ISD::SMULFIXSAT);
10955 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10956 unsigned VTSize = VT.getScalarSizeInBits();
10957
10958 if (!Scale) {
10959 // [us]mul.fix(a, b, 0) -> mul(a, b)
10960 if (!Saturating) {
10962 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10963 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10964 SDValue Result =
10965 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10966 SDValue Product = Result.getValue(0);
10967 SDValue Overflow = Result.getValue(1);
10968 SDValue Zero = DAG.getConstant(0, dl, VT);
10969
10970 APInt MinVal = APInt::getSignedMinValue(VTSize);
10971 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10972 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10973 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10974 // Xor the inputs, if resulting sign bit is 0 the product will be
10975 // positive, else negative.
10976 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10977 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10978 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10979 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10980 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10981 SDValue Result =
10982 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10983 SDValue Product = Result.getValue(0);
10984 SDValue Overflow = Result.getValue(1);
10985
10986 APInt MaxVal = APInt::getMaxValue(VTSize);
10987 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10988 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10989 }
10990 }
10991
10992 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10993 "Expected scale to be less than the number of bits if signed or at "
10994 "most the number of bits if unsigned.");
10995 assert(LHS.getValueType() == RHS.getValueType() &&
10996 "Expected both operands to be the same type");
10997
10998 // Get the upper and lower bits of the result.
10999 SDValue Lo, Hi;
11000 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11001 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11002 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11003 if (VT.isVector())
11004 WideVT =
11006 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11007 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11008 Lo = Result.getValue(0);
11009 Hi = Result.getValue(1);
11010 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11011 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11012 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11013 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11014 // Try for a multiplication using a wider type.
11015 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11016 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11017 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11018 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11019 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11020 SDValue Shifted =
11021 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11022 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11023 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11024 } else if (VT.isVector()) {
11025 return SDValue();
11026 } else {
11027 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11028 }
11029
11030 if (Scale == VTSize)
11031 // Result is just the top half since we'd be shifting by the width of the
11032 // operand. Overflow impossible so this works for both UMULFIX and
11033 // UMULFIXSAT.
11034 return Hi;
11035
11036 // The result will need to be shifted right by the scale since both operands
11037 // are scaled. The result is given to us in 2 halves, so we only want part of
11038 // both in the result.
11039 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11040 DAG.getShiftAmountConstant(Scale, VT, dl));
11041 if (!Saturating)
11042 return Result;
11043
11044 if (!Signed) {
11045 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11046 // widened multiplication) aren't all zeroes.
11047
11048 // Saturate to max if ((Hi >> Scale) != 0),
11049 // which is the same as if (Hi > ((1 << Scale) - 1))
11050 APInt MaxVal = APInt::getMaxValue(VTSize);
11051 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11052 dl, VT);
11053 Result = DAG.getSelectCC(dl, Hi, LowMask,
11054 DAG.getConstant(MaxVal, dl, VT), Result,
11055 ISD::SETUGT);
11056
11057 return Result;
11058 }
11059
11060 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11061 // widened multiplication) aren't all ones or all zeroes.
11062
11063 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11064 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11065
11066 if (Scale == 0) {
11067 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11068 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11069 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11070 // Saturated to SatMin if wide product is negative, and SatMax if wide
11071 // product is positive ...
11072 SDValue Zero = DAG.getConstant(0, dl, VT);
11073 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11074 ISD::SETLT);
11075 // ... but only if we overflowed.
11076 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11077 }
11078
11079 // We handled Scale==0 above so all the bits to examine is in Hi.
11080
11081 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11082 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11083 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11084 dl, VT);
11085 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11086 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11087 // which is the same as if (HI < (-1 << (Scale - 1))
11088 SDValue HighMask =
11089 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11090 dl, VT);
11091 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11092 return Result;
11093}
11094
11095SDValue
11097 SDValue LHS, SDValue RHS,
11098 unsigned Scale, SelectionDAG &DAG) const {
11099 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11100 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11101 "Expected a fixed point division opcode");
11102
11103 EVT VT = LHS.getValueType();
11104 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11105 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11106 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11107
11108 // If there is enough room in the type to upscale the LHS or downscale the
11109 // RHS before the division, we can perform it in this type without having to
11110 // resize. For signed operations, the LHS headroom is the number of
11111 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11112 // The headroom for the RHS is the number of trailing zeroes.
11113 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11115 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11116
11117 // For signed saturating operations, we need to be able to detect true integer
11118 // division overflow; that is, when you have MIN / -EPS. However, this
11119 // is undefined behavior and if we emit divisions that could take such
11120 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11121 // example).
11122 // Avoid this by requiring an extra bit so that we never get this case.
11123 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11124 // signed saturating division, we need to emit a whopping 32-bit division.
11125 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11126 return SDValue();
11127
11128 unsigned LHSShift = std::min(LHSLead, Scale);
11129 unsigned RHSShift = Scale - LHSShift;
11130
11131 // At this point, we know that if we shift the LHS up by LHSShift and the
11132 // RHS down by RHSShift, we can emit a regular division with a final scaling
11133 // factor of Scale.
11134
11135 if (LHSShift)
11136 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11137 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11138 if (RHSShift)
11139 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11140 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11141
11142 SDValue Quot;
11143 if (Signed) {
11144 // For signed operations, if the resulting quotient is negative and the
11145 // remainder is nonzero, subtract 1 from the quotient to round towards
11146 // negative infinity.
11147 SDValue Rem;
11148 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11149 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11150 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11151 if (isTypeLegal(VT) &&
11153 Quot = DAG.getNode(ISD::SDIVREM, dl,
11154 DAG.getVTList(VT, VT),
11155 LHS, RHS);
11156 Rem = Quot.getValue(1);
11157 Quot = Quot.getValue(0);
11158 } else {
11159 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11160 LHS, RHS);
11161 Rem = DAG.getNode(ISD::SREM, dl, VT,
11162 LHS, RHS);
11163 }
11164 SDValue Zero = DAG.getConstant(0, dl, VT);
11165 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11166 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11167 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11168 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11169 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11170 DAG.getConstant(1, dl, VT));
11171 Quot = DAG.getSelect(dl, VT,
11172 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11173 Sub1, Quot);
11174 } else
11175 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11176 LHS, RHS);
11177
11178 return Quot;
11179}
11180
11182 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11183 SDLoc dl(Node);
11184 SDValue LHS = Node->getOperand(0);
11185 SDValue RHS = Node->getOperand(1);
11186 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11187
11188 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11189 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11190 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11191 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11192 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11193 { LHS, RHS, CarryIn });
11194 Result = SDValue(NodeCarry.getNode(), 0);
11195 Overflow = SDValue(NodeCarry.getNode(), 1);
11196 return;
11197 }
11198
11199 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11200 LHS.getValueType(), LHS, RHS);
11201
11202 EVT ResultType = Node->getValueType(1);
11203 EVT SetCCType = getSetCCResultType(
11204 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11205 SDValue SetCC;
11206 if (IsAdd && isOneConstant(RHS)) {
11207 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11208 // the live range of X. We assume comparing with 0 is cheap.
11209 // The general case (X + C) < C is not necessarily beneficial. Although we
11210 // reduce the live range of X, we may introduce the materialization of
11211 // constant C.
11212 SetCC =
11213 DAG.getSetCC(dl, SetCCType, Result,
11214 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11215 } else if (IsAdd && isAllOnesConstant(RHS)) {
11216 // Special case: uaddo X, -1 overflows if X != 0.
11217 SetCC =
11218 DAG.getSetCC(dl, SetCCType, LHS,
11219 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11220 } else {
11222 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11223 }
11224 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11225}
11226
11228 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11229 SDLoc dl(Node);
11230 SDValue LHS = Node->getOperand(0);
11231 SDValue RHS = Node->getOperand(1);
11232 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11233
11234 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11235 LHS.getValueType(), LHS, RHS);
11236
11237 EVT ResultType = Node->getValueType(1);
11238 EVT OType = getSetCCResultType(
11239 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11240
11241 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11242 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11243 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11244 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11245 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11246 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11247 return;
11248 }
11249
11250 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11251
11252 // For an addition, the result should be less than one of the operands (LHS)
11253 // if and only if the other operand (RHS) is negative, otherwise there will
11254 // be overflow.
11255 // For a subtraction, the result should be less than one of the operands
11256 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11257 // otherwise there will be overflow.
11258 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11259 SDValue ConditionRHS =
11260 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11261
11262 Overflow = DAG.getBoolExtOrTrunc(
11263 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11264 ResultType, ResultType);
11265}
11266
11268 SDValue &Overflow, SelectionDAG &DAG) const {
11269 SDLoc dl(Node);
11270 EVT VT = Node->getValueType(0);
11271 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11272 SDValue LHS = Node->getOperand(0);
11273 SDValue RHS = Node->getOperand(1);
11274 bool isSigned = Node->getOpcode() == ISD::SMULO;
11275
11276 // For power-of-two multiplications we can use a simpler shift expansion.
11277 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11278 const APInt &C = RHSC->getAPIntValue();
11279 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11280 if (C.isPowerOf2()) {
11281 // smulo(x, signed_min) is same as umulo(x, signed_min).
11282 bool UseArithShift = isSigned && !C.isMinSignedValue();
11283 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11284 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11285 Overflow = DAG.getSetCC(dl, SetCCVT,
11286 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11287 dl, VT, Result, ShiftAmt),
11288 LHS, ISD::SETNE);
11289 return true;
11290 }
11291 }
11292
11293 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11294 if (VT.isVector())
11295 WideVT =
11297
11298 SDValue BottomHalf;
11299 SDValue TopHalf;
11300 static const unsigned Ops[2][3] =
11303 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11304 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11305 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11306 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11307 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11308 RHS);
11309 TopHalf = BottomHalf.getValue(1);
11310 } else if (isTypeLegal(WideVT)) {
11311 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11312 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11313 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11314 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11315 SDValue ShiftAmt =
11316 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11317 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11318 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11319 } else {
11320 if (VT.isVector())
11321 return false;
11322
11323 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11324 }
11325
11326 Result = BottomHalf;
11327 if (isSigned) {
11328 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11329 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11330 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11331 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11332 } else {
11333 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11334 DAG.getConstant(0, dl, VT), ISD::SETNE);
11335 }
11336
11337 // Truncate the result if SetCC returns a larger type than needed.
11338 EVT RType = Node->getValueType(1);
11339 if (RType.bitsLT(Overflow.getValueType()))
11340 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11341
11342 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11343 "Unexpected result type for S/UMULO legalization");
11344 return true;
11345}
11346
11348 SDLoc dl(Node);
11349 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11350 SDValue Op = Node->getOperand(0);
11351 EVT VT = Op.getValueType();
11352
11353 if (VT.isScalableVector())
11355 "Expanding reductions for scalable vectors is undefined.");
11356
11357 // Try to use a shuffle reduction for power of two vectors.
11358 if (VT.isPow2VectorType()) {
11359 while (VT.getVectorNumElements() > 1) {
11360 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11361 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11362 break;
11363
11364 SDValue Lo, Hi;
11365 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11366 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11367 VT = HalfVT;
11368 }
11369 }
11370
11371 EVT EltVT = VT.getVectorElementType();
11372 unsigned NumElts = VT.getVectorNumElements();
11373
11375 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11376
11377 SDValue Res = Ops[0];
11378 for (unsigned i = 1; i < NumElts; i++)
11379 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11380
11381 // Result type may be wider than element type.
11382 if (EltVT != Node->getValueType(0))
11383 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11384 return Res;
11385}
11386
11388 SDLoc dl(Node);
11389 SDValue AccOp = Node->getOperand(0);
11390 SDValue VecOp = Node->getOperand(1);
11391 SDNodeFlags Flags = Node->getFlags();
11392
11393 EVT VT = VecOp.getValueType();
11394 EVT EltVT = VT.getVectorElementType();
11395
11396 if (VT.isScalableVector())
11398 "Expanding reductions for scalable vectors is undefined.");
11399
11400 unsigned NumElts = VT.getVectorNumElements();
11401
11403 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11404
11405 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11406
11407 SDValue Res = AccOp;
11408 for (unsigned i = 0; i < NumElts; i++)
11409 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11410
11411 return Res;
11412}
11413
11415 SelectionDAG &DAG) const {
11416 EVT VT = Node->getValueType(0);
11417 SDLoc dl(Node);
11418 bool isSigned = Node->getOpcode() == ISD::SREM;
11419 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11420 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11421 SDValue Dividend = Node->getOperand(0);
11422 SDValue Divisor = Node->getOperand(1);
11423 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11424 SDVTList VTs = DAG.getVTList(VT, VT);
11425 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11426 return true;
11427 }
11428 if (isOperationLegalOrCustom(DivOpc, VT)) {
11429 // X % Y -> X-X/Y*Y
11430 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11431 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11432 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11433 return true;
11434 }
11435 return false;
11436}
11437
11439 SelectionDAG &DAG) const {
11440 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11441 SDLoc dl(SDValue(Node, 0));
11442 SDValue Src = Node->getOperand(0);
11443
11444 // DstVT is the result type, while SatVT is the size to which we saturate
11445 EVT SrcVT = Src.getValueType();
11446 EVT DstVT = Node->getValueType(0);
11447
11448 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11449 unsigned SatWidth = SatVT.getScalarSizeInBits();
11450 unsigned DstWidth = DstVT.getScalarSizeInBits();
11451 assert(SatWidth <= DstWidth &&
11452 "Expected saturation width smaller than result width");
11453
11454 // Determine minimum and maximum integer values and their corresponding
11455 // floating-point values.
11456 APInt MinInt, MaxInt;
11457 if (IsSigned) {
11458 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11459 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11460 } else {
11461 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11462 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11463 }
11464
11465 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11466 // libcall emission cannot handle this. Large result types will fail.
11467 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11468 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11469 SrcVT = Src.getValueType();
11470 }
11471
11472 const fltSemantics &Sem = SrcVT.getFltSemantics();
11473 APFloat MinFloat(Sem);
11474 APFloat MaxFloat(Sem);
11475
11476 APFloat::opStatus MinStatus =
11477 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11478 APFloat::opStatus MaxStatus =
11479 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11480 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11481 !(MaxStatus & APFloat::opStatus::opInexact);
11482
11483 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11484 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11485
11486 // If the integer bounds are exactly representable as floats and min/max are
11487 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11488 // of comparisons and selects.
11489 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11491 if (AreExactFloatBounds && MinMaxLegal) {
11492 SDValue Clamped = Src;
11493
11494 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11495 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11496 // Clamp by MaxFloat from above. NaN cannot occur.
11497 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11498 // Convert clamped value to integer.
11499 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11500 dl, DstVT, Clamped);
11501
11502 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11503 // which will cast to zero.
11504 if (!IsSigned)
11505 return FpToInt;
11506
11507 // Otherwise, select 0 if Src is NaN.
11508 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11509 EVT SetCCVT =
11510 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11511 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11512 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11513 }
11514
11515 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11516 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11517
11518 // Result of direct conversion. The assumption here is that the operation is
11519 // non-trapping and it's fine to apply it to an out-of-range value if we
11520 // select it away later.
11521 SDValue FpToInt =
11522 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11523
11524 SDValue Select = FpToInt;
11525
11526 EVT SetCCVT =
11527 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11528
11529 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11530 // MinInt if Src is NaN.
11531 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11532 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11533 // If Src OGT MaxFloat, select MaxInt.
11534 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11535 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11536
11537 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11538 // is already zero.
11539 if (!IsSigned)
11540 return Select;
11541
11542 // Otherwise, select 0 if Src is NaN.
11543 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11544 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11545 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11546}
11547
11549 const SDLoc &dl,
11550 SelectionDAG &DAG) const {
11551 EVT OperandVT = Op.getValueType();
11552 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11553 return Op;
11554 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11555 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11556 // can induce double-rounding which may alter the results. We can
11557 // correct for this using a trick explained in: Boldo, Sylvie, and
11558 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11559 // World Congress. 2005.
11560 unsigned BitSize = OperandVT.getScalarSizeInBits();
11561 EVT WideIntVT = OperandVT.changeTypeToInteger();
11562 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11563 SDValue SignBit =
11564 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11565 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11566 SDValue AbsWide;
11567 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11568 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11569 } else {
11570 SDValue ClearedSign = DAG.getNode(
11571 ISD::AND, dl, WideIntVT, OpAsInt,
11572 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11573 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11574 }
11575 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11576 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11577
11578 // We can keep the narrow value as-is if narrowing was exact (no
11579 // rounding error), the wide value was NaN (the narrow value is also
11580 // NaN and should be preserved) or if we rounded to the odd value.
11581 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11582 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11583 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11584 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11585 EVT ResultIntVTCCVT = getSetCCResultType(
11586 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11587 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11588 // The result is already odd so we don't need to do anything.
11589 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11590
11591 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11592 AbsWide.getValueType());
11593 // We keep results which are exact, odd or NaN.
11594 SDValue KeepNarrow =
11595 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11596 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11597 // We morally performed a round-down if AbsNarrow is smaller than
11598 // AbsWide.
11599 SDValue NarrowIsRd =
11600 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11601 // If the narrow value is odd or exact, pick it.
11602 // Otherwise, narrow is even and corresponds to either the rounded-up
11603 // or rounded-down value. If narrow is the rounded-down value, we want
11604 // the rounded-up value as it will be odd.
11605 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11606 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11607 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11608 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11609 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11610 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11611 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11612 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11613 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11614}
11615
11617 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11618 SDValue Op = Node->getOperand(0);
11619 EVT VT = Node->getValueType(0);
11620 SDLoc dl(Node);
11621 if (VT.getScalarType() == MVT::bf16) {
11622 if (Node->getConstantOperandVal(1) == 1) {
11623 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11624 }
11625 EVT OperandVT = Op.getValueType();
11626 SDValue IsNaN = DAG.getSetCC(
11627 dl,
11628 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11629 Op, Op, ISD::SETUO);
11630
11631 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11632 // can induce double-rounding which may alter the results. We can
11633 // correct for this using a trick explained in: Boldo, Sylvie, and
11634 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11635 // World Congress. 2005.
11636 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11637 EVT I32 = F32.changeTypeToInteger();
11638 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11639 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11640
11641 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11642 // turning into infinities.
11643 SDValue NaN =
11644 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11645
11646 // Factor in the contribution of the low 16 bits.
11647 SDValue One = DAG.getConstant(1, dl, I32);
11648 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11649 DAG.getShiftAmountConstant(16, I32, dl));
11650 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11651 SDValue RoundingBias =
11652 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11653 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11654
11655 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11656 // 0x80000000.
11657 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11658
11659 // Now that we have rounded, shift the bits into position.
11660 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11661 DAG.getShiftAmountConstant(16, I32, dl));
11662 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11663 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11664 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11665 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11666 }
11667 return SDValue();
11668}
11669
11671 SelectionDAG &DAG) const {
11672 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11673 assert(Node->getValueType(0).isScalableVector() &&
11674 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11675
11676 EVT VT = Node->getValueType(0);
11677 SDValue V1 = Node->getOperand(0);
11678 SDValue V2 = Node->getOperand(1);
11679 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11680 SDLoc DL(Node);
11681
11682 // Expand through memory thusly:
11683 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11684 // Store V1, Ptr
11685 // Store V2, Ptr + sizeof(V1)
11686 // If (Imm < 0)
11687 // TrailingElts = -Imm
11688 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11689 // else
11690 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11691 // Res = Load Ptr
11692
11693 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11694
11696 VT.getVectorElementCount() * 2);
11697 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11698 EVT PtrVT = StackPtr.getValueType();
11699 auto &MF = DAG.getMachineFunction();
11700 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11701 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11702
11703 // Store the lo part of CONCAT_VECTORS(V1, V2)
11704 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11705 // Store the hi part of CONCAT_VECTORS(V1, V2)
11706 SDValue OffsetToV2 = DAG.getVScale(
11707 DL, PtrVT,
11709 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11710 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11711
11712 if (Imm >= 0) {
11713 // Load back the required element. getVectorElementPointer takes care of
11714 // clamping the index if it's out-of-bounds.
11715 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11716 // Load the spliced result
11717 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11719 }
11720
11721 uint64_t TrailingElts = -Imm;
11722
11723 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11724 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11725 SDValue TrailingBytes =
11726 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11727
11728 if (TrailingElts > VT.getVectorMinNumElements()) {
11729 SDValue VLBytes =
11730 DAG.getVScale(DL, PtrVT,
11731 APInt(PtrVT.getFixedSizeInBits(),
11733 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11734 }
11735
11736 // Calculate the start address of the spliced result.
11737 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11738
11739 // Load the spliced result
11740 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11742}
11743
11745 SelectionDAG &DAG) const {
11746 SDLoc DL(Node);
11747 SDValue Vec = Node->getOperand(0);
11748 SDValue Mask = Node->getOperand(1);
11749 SDValue Passthru = Node->getOperand(2);
11750
11751 EVT VecVT = Vec.getValueType();
11752 EVT ScalarVT = VecVT.getScalarType();
11753 EVT MaskVT = Mask.getValueType();
11754 EVT MaskScalarVT = MaskVT.getScalarType();
11755
11756 // Needs to be handled by targets that have scalable vector types.
11757 if (VecVT.isScalableVector())
11758 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11759
11760 SDValue StackPtr = DAG.CreateStackTemporary(
11761 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11762 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11763 MachinePointerInfo PtrInfo =
11765
11766 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11767 SDValue Chain = DAG.getEntryNode();
11768 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11769
11770 bool HasPassthru = !Passthru.isUndef();
11771
11772 // If we have a passthru vector, store it on the stack, overwrite the matching
11773 // positions and then re-write the last element that was potentially
11774 // overwritten even though mask[i] = false.
11775 if (HasPassthru)
11776 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11777
11778 SDValue LastWriteVal;
11779 APInt PassthruSplatVal;
11780 bool IsSplatPassthru =
11781 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11782
11783 if (IsSplatPassthru) {
11784 // As we do not know which position we wrote to last, we cannot simply
11785 // access that index from the passthru vector. So we first check if passthru
11786 // is a splat vector, to use any element ...
11787 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11788 } else if (HasPassthru) {
11789 // ... if it is not a splat vector, we need to get the passthru value at
11790 // position = popcount(mask) and re-load it from the stack before it is
11791 // overwritten in the loop below.
11792 EVT PopcountVT = ScalarVT.changeTypeToInteger();
11793 SDValue Popcount = DAG.getNode(
11794 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11795 Popcount =
11797 MaskVT.changeVectorElementType(PopcountVT), Popcount);
11798 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11799 SDValue LastElmtPtr =
11800 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11801 LastWriteVal = DAG.getLoad(
11802 ScalarVT, DL, Chain, LastElmtPtr,
11804 Chain = LastWriteVal.getValue(1);
11805 }
11806
11807 unsigned NumElms = VecVT.getVectorNumElements();
11808 for (unsigned I = 0; I < NumElms; I++) {
11810
11811 SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11812 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11813 Chain = DAG.getStore(
11814 Chain, DL, ValI, OutPtr,
11816
11817 // Get the mask value and add it to the current output position. This
11818 // either increments by 1 if MaskI is true or adds 0 otherwise.
11819 // Freeze in case we have poison/undef mask entries.
11820 SDValue MaskI = DAG.getFreeze(
11821 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11822 MaskI = DAG.getFreeze(MaskI);
11823 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11824 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11825 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11826
11827 if (HasPassthru && I == NumElms - 1) {
11828 SDValue EndOfVector =
11829 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11830 SDValue AllLanesSelected =
11831 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11832 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11833 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11834
11835 // Re-write the last ValI if all lanes were selected. Otherwise,
11836 // overwrite the last write it with the passthru value.
11837 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11838 LastWriteVal, SDNodeFlags::Unpredictable);
11839 Chain = DAG.getStore(
11840 Chain, DL, LastWriteVal, OutPtr,
11842 }
11843 }
11844
11845 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11846}
11847
11849 SDValue &LHS, SDValue &RHS,
11850 SDValue &CC, SDValue Mask,
11851 SDValue EVL, bool &NeedInvert,
11852 const SDLoc &dl, SDValue &Chain,
11853 bool IsSignaling) const {
11854 MVT OpVT = LHS.getSimpleValueType();
11855 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11856 NeedInvert = false;
11857 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11858 bool IsNonVP = !EVL;
11859 switch (getCondCodeAction(CCCode, OpVT)) {
11860 default:
11861 llvm_unreachable("Unknown condition code action!");
11863 // Nothing to do.
11864 break;
11867 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11868 std::swap(LHS, RHS);
11869 CC = DAG.getCondCode(InvCC);
11870 return true;
11871 }
11872 // Swapping operands didn't work. Try inverting the condition.
11873 bool NeedSwap = false;
11874 InvCC = getSetCCInverse(CCCode, OpVT);
11875 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11876 // If inverting the condition is not enough, try swapping operands
11877 // on top of it.
11878 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11879 NeedSwap = true;
11880 }
11881 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11882 CC = DAG.getCondCode(InvCC);
11883 NeedInvert = true;
11884 if (NeedSwap)
11885 std::swap(LHS, RHS);
11886 return true;
11887 }
11888
11889 // Special case: expand i1 comparisons using logical operations.
11890 if (OpVT == MVT::i1) {
11891 SDValue Ret;
11892 switch (CCCode) {
11893 default:
11894 llvm_unreachable("Unknown integer setcc!");
11895 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
11896 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
11897 MVT::i1);
11898 break;
11899 case ISD::SETNE: // X != Y --> (X ^ Y)
11900 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
11901 break;
11902 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11903 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11904 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
11905 DAG.getNOT(dl, LHS, MVT::i1));
11906 break;
11907 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11908 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11909 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
11910 DAG.getNOT(dl, RHS, MVT::i1));
11911 break;
11912 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11913 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11914 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
11915 DAG.getNOT(dl, LHS, MVT::i1));
11916 break;
11917 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11918 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11919 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
11920 DAG.getNOT(dl, RHS, MVT::i1));
11921 break;
11922 }
11923
11924 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
11925 RHS = SDValue();
11926 CC = SDValue();
11927 return true;
11928 }
11929
11931 unsigned Opc = 0;
11932 switch (CCCode) {
11933 default:
11934 llvm_unreachable("Don't know how to expand this condition!");
11935 case ISD::SETUO:
11936 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11937 CC1 = ISD::SETUNE;
11938 CC2 = ISD::SETUNE;
11939 Opc = ISD::OR;
11940 break;
11941 }
11943 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11944 NeedInvert = true;
11945 [[fallthrough]];
11946 case ISD::SETO:
11948 "If SETO is expanded, SETOEQ must be legal!");
11949 CC1 = ISD::SETOEQ;
11950 CC2 = ISD::SETOEQ;
11951 Opc = ISD::AND;
11952 break;
11953 case ISD::SETONE:
11954 case ISD::SETUEQ:
11955 // If the SETUO or SETO CC isn't legal, we might be able to use
11956 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11957 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11958 // the operands.
11959 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11960 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11961 isCondCodeLegal(ISD::SETOLT, OpVT))) {
11962 CC1 = ISD::SETOGT;
11963 CC2 = ISD::SETOLT;
11964 Opc = ISD::OR;
11965 NeedInvert = ((unsigned)CCCode & 0x8U);
11966 break;
11967 }
11968 [[fallthrough]];
11969 case ISD::SETOEQ:
11970 case ISD::SETOGT:
11971 case ISD::SETOGE:
11972 case ISD::SETOLT:
11973 case ISD::SETOLE:
11974 case ISD::SETUNE:
11975 case ISD::SETUGT:
11976 case ISD::SETUGE:
11977 case ISD::SETULT:
11978 case ISD::SETULE:
11979 // If we are floating point, assign and break, otherwise fall through.
11980 if (!OpVT.isInteger()) {
11981 // We can use the 4th bit to tell if we are the unordered
11982 // or ordered version of the opcode.
11983 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11984 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11985 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11986 break;
11987 }
11988 // Fallthrough if we are unsigned integer.
11989 [[fallthrough]];
11990 case ISD::SETLE:
11991 case ISD::SETGT:
11992 case ISD::SETGE:
11993 case ISD::SETLT:
11994 case ISD::SETNE:
11995 case ISD::SETEQ:
11996 // If all combinations of inverting the condition and swapping operands
11997 // didn't work then we have no means to expand the condition.
11998 llvm_unreachable("Don't know how to expand this condition!");
11999 }
12000
12001 SDValue SetCC1, SetCC2;
12002 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12003 // If we aren't the ordered or unorder operation,
12004 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12005 if (IsNonVP) {
12006 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12007 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12008 } else {
12009 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12010 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12011 }
12012 } else {
12013 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12014 if (IsNonVP) {
12015 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12016 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12017 } else {
12018 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12019 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12020 }
12021 }
12022 if (Chain)
12023 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12024 SetCC2.getValue(1));
12025 if (IsNonVP)
12026 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12027 else {
12028 // Transform the binary opcode to the VP equivalent.
12029 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12030 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12031 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12032 }
12033 RHS = SDValue();
12034 CC = SDValue();
12035 return true;
12036 }
12037 }
12038 return false;
12039}
12040
12042 SelectionDAG &DAG) const {
12043 EVT VT = Node->getValueType(0);
12044 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12045 // split into two equal parts.
12046 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12047 return SDValue();
12048
12049 // Restrict expansion to cases where both parts can be concatenated.
12050 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12051 if (LoVT != HiVT || !isTypeLegal(LoVT))
12052 return SDValue();
12053
12054 SDLoc DL(Node);
12055 unsigned Opcode = Node->getOpcode();
12056
12057 // Don't expand if the result is likely to be unrolled anyway.
12058 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12059 return SDValue();
12060
12061 SmallVector<SDValue, 4> LoOps, HiOps;
12062 for (const SDValue &V : Node->op_values()) {
12063 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12064 LoOps.push_back(Lo);
12065 HiOps.push_back(Hi);
12066 }
12067
12068 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12069 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12070 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12071}
unsigned const MachineRegisterInfo * MRI
static const LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:557
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1329
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1155
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1135
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1095
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1106
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1547
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1732
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1392
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1340
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
APInt reverseBits() const
Definition: APInt.cpp:741
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:834
void negate()
Negate this APInt in place.
Definition: APInt.h:1450
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
unsigned countLeadingZeros() const
Definition: APInt.h:1585
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
unsigned logBase2() const
Definition: APInt.h:1739
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1319
APInt multiplicativeInverse() const
Definition: APInt.cpp:1248
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:405
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1150
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1367
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
APInt byteSwap() const
Definition: APInt.cpp:719
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition: APInt.h:1424
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1343
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1119
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:709
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:462
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
Class to represent pointers.
Definition: DerivedTypes.h:670
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:980
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:456
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:888
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:794
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:904
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:571
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
iterator end() const
Definition: StringRef.h:118
Class to represent struct types.
Definition: DerivedTypes.h:218
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:740
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:295
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition: Value.cpp:698
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2982
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:512
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:502
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:387
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1123
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1664
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1669
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1639
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1606
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1586
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1771
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1625
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1535
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:301
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:313
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:320
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:293
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:178
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:247
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:211
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:79
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:234
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:281
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:225
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:187
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:313
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:240
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:215
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:60
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:97
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:804
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:159
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:278
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:205
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setIsSigned(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...