LLVM 20.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
24#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/LLVMContext.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCExpr.h"
35#include <cctype>
36using namespace llvm;
37
38/// NOTE: The TargetMachine owns TLOF.
40 : TargetLoweringBase(tm) {}
41
42const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43 return nullptr;
44}
45
48}
49
50/// Check whether a given call node is in tail position within its function. If
51/// so, it sets Chain to the input chain of the tail call.
53 SDValue &Chain) const {
55
56 // First, check if tail calls have been disabled in this function.
57 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
58 return false;
59
60 // Conservatively require the attributes of the call to match those of
61 // the return. Ignore following attributes because they don't affect the
62 // call sequence.
63 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
64 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
65 Attribute::DereferenceableOrNull, Attribute::NoAlias,
66 Attribute::NonNull, Attribute::NoUndef,
67 Attribute::Range, Attribute::NoFPClass})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80}
81
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104 return false;
105 }
106 return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
116 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
117 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
118 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
119 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
120 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
121 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
122 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
123 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
124 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
125 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
126 Alignment = Call->getParamStackAlign(ArgIdx);
127 IndirectType = nullptr;
129 "multiple ABI attributes?");
130 if (IsByVal) {
131 IndirectType = Call->getParamByValType(ArgIdx);
132 if (!Alignment)
133 Alignment = Call->getParamAlign(ArgIdx);
134 }
135 if (IsPreallocated)
136 IndirectType = Call->getParamPreallocatedType(ArgIdx);
137 if (IsInAlloca)
138 IndirectType = Call->getParamInAllocaType(ArgIdx);
139 if (IsSRet)
140 IndirectType = Call->getParamStructRetType(ArgIdx);
141}
142
143/// Generate a libcall taking the given operands as arguments and returning a
144/// result of type RetVT.
145std::pair<SDValue, SDValue>
148 MakeLibCallOptions CallOptions,
149 const SDLoc &dl,
150 SDValue InChain) const {
151 if (!InChain)
152 InChain = DAG.getEntryNode();
153
155 Args.reserve(Ops.size());
156
158 for (unsigned i = 0; i < Ops.size(); ++i) {
159 SDValue NewOp = Ops[i];
160 Entry.Node = NewOp;
161 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
162 Entry.IsSExt =
163 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
164 Entry.IsZExt = !Entry.IsSExt;
165
166 if (CallOptions.IsSoften &&
168 Entry.IsSExt = Entry.IsZExt = false;
169 }
170 Args.push_back(Entry);
171 }
172
173 if (LC == RTLIB::UNKNOWN_LIBCALL)
174 report_fatal_error("Unsupported library call operation!");
177
178 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
181 bool zeroExtend = !signExtend;
182
183 if (CallOptions.IsSoften &&
185 signExtend = zeroExtend = false;
186 }
187
188 CLI.setDebugLoc(dl)
189 .setChain(InChain)
190 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
191 .setNoReturn(CallOptions.DoesNotReturn)
194 .setSExtResult(signExtend)
195 .setZExtResult(zeroExtend);
196 return LowerCallTo(CLI);
197}
198
200 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201 unsigned SrcAS, const AttributeList &FuncAttributes) const {
202 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
203 Op.getSrcAlign() < Op.getDstAlign())
204 return false;
205
206 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207
208 if (VT == MVT::Other) {
209 // Use the largest integer type whose alignment constraints are satisfied.
210 // We only need to check DstAlign here as SrcAlign is always greater or
211 // equal to DstAlign (or zero).
212 VT = MVT::LAST_INTEGER_VALUETYPE;
213 if (Op.isFixedDstAlign())
214 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
215 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217 assert(VT.isInteger());
218
219 // Find the largest legal integer type.
220 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
221 while (!isTypeLegal(LVT))
222 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
223 assert(LVT.isInteger());
224
225 // If the type we've chosen is larger than the largest legal integer type
226 // then use that instead.
227 if (VT.bitsGT(LVT))
228 VT = LVT;
229 }
230
231 unsigned NumMemOps = 0;
232 uint64_t Size = Op.size();
233 while (Size) {
234 unsigned VTSize = VT.getSizeInBits() / 8;
235 while (VTSize > Size) {
236 // For now, only use non-vector load / store's for the left-over pieces.
237 EVT NewVT = VT;
238 unsigned NewVTSize;
239
240 bool Found = false;
241 if (VT.isVector() || VT.isFloatingPoint()) {
242 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
245 Found = true;
246 else if (NewVT == MVT::i64 &&
248 isSafeMemOpType(MVT::f64)) {
249 // i64 is usually not legal on 32-bit targets, but f64 may be.
250 NewVT = MVT::f64;
251 Found = true;
252 }
253 }
254
255 if (!Found) {
256 do {
257 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
258 if (NewVT == MVT::i8)
259 break;
260 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
261 }
262 NewVTSize = NewVT.getSizeInBits() / 8;
263
264 // If the new VT cannot cover all of the remaining bits, then consider
265 // issuing a (or a pair of) unaligned and overlapping load / store.
266 unsigned Fast;
267 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271 Fast)
272 VTSize = Size;
273 else {
274 VT = NewVT;
275 VTSize = NewVTSize;
276 }
277 }
278
279 if (++NumMemOps > Limit)
280 return false;
281
282 MemOps.push_back(VT);
283 Size -= VTSize;
284 }
285
286 return true;
287}
288
289/// Soften the operands of a comparison. This code is shared among BR_CC,
290/// SELECT_CC, and SETCC handlers.
292 SDValue &NewLHS, SDValue &NewRHS,
293 ISD::CondCode &CCCode,
294 const SDLoc &dl, const SDValue OldLHS,
295 const SDValue OldRHS) const {
296 SDValue Chain;
297 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
298 OldRHS, Chain);
299}
300
302 SDValue &NewLHS, SDValue &NewRHS,
303 ISD::CondCode &CCCode,
304 const SDLoc &dl, const SDValue OldLHS,
305 const SDValue OldRHS,
306 SDValue &Chain,
307 bool IsSignaling) const {
308 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309 // not supporting it. We can update this code when libgcc provides such
310 // functions.
311
312 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
313 && "Unsupported setcc type!");
314
315 // Expand into one or more soft-fp libcall(s).
316 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317 bool ShouldInvertCC = false;
318 switch (CCCode) {
319 case ISD::SETEQ:
320 case ISD::SETOEQ:
321 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
323 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324 break;
325 case ISD::SETNE:
326 case ISD::SETUNE:
327 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328 (VT == MVT::f64) ? RTLIB::UNE_F64 :
329 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330 break;
331 case ISD::SETGE:
332 case ISD::SETOGE:
333 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334 (VT == MVT::f64) ? RTLIB::OGE_F64 :
335 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336 break;
337 case ISD::SETLT:
338 case ISD::SETOLT:
339 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340 (VT == MVT::f64) ? RTLIB::OLT_F64 :
341 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342 break;
343 case ISD::SETLE:
344 case ISD::SETOLE:
345 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346 (VT == MVT::f64) ? RTLIB::OLE_F64 :
347 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348 break;
349 case ISD::SETGT:
350 case ISD::SETOGT:
351 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352 (VT == MVT::f64) ? RTLIB::OGT_F64 :
353 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354 break;
355 case ISD::SETO:
356 ShouldInvertCC = true;
357 [[fallthrough]];
358 case ISD::SETUO:
359 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360 (VT == MVT::f64) ? RTLIB::UO_F64 :
361 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362 break;
363 case ISD::SETONE:
364 // SETONE = O && UNE
365 ShouldInvertCC = true;
366 [[fallthrough]];
367 case ISD::SETUEQ:
368 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369 (VT == MVT::f64) ? RTLIB::UO_F64 :
370 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
373 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374 break;
375 default:
376 // Invert CC for unordered comparisons
377 ShouldInvertCC = true;
378 switch (CCCode) {
379 case ISD::SETULT:
380 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381 (VT == MVT::f64) ? RTLIB::OGE_F64 :
382 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383 break;
384 case ISD::SETULE:
385 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386 (VT == MVT::f64) ? RTLIB::OGT_F64 :
387 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388 break;
389 case ISD::SETUGT:
390 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391 (VT == MVT::f64) ? RTLIB::OLE_F64 :
392 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393 break;
394 case ISD::SETUGE:
395 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396 (VT == MVT::f64) ? RTLIB::OLT_F64 :
397 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398 break;
399 default: llvm_unreachable("Do not know how to soften this setcc!");
400 }
401 }
402
403 // Use the target specific return value for comparison lib calls.
405 SDValue Ops[2] = {NewLHS, NewRHS};
407 EVT OpsVT[2] = { OldLHS.getValueType(),
408 OldRHS.getValueType() };
409 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
410 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
411 NewLHS = Call.first;
412 NewRHS = DAG.getConstant(0, dl, RetVT);
413
414 CCCode = getCmpLibcallCC(LC1);
415 if (ShouldInvertCC) {
416 assert(RetVT.isInteger());
417 CCCode = getSetCCInverse(CCCode, RetVT);
418 }
419
420 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421 // Update Chain.
422 Chain = Call.second;
423 } else {
424 EVT SetCCVT =
425 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
426 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
427 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
428 CCCode = getCmpLibcallCC(LC2);
429 if (ShouldInvertCC)
430 CCCode = getSetCCInverse(CCCode, RetVT);
431 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
432 if (Chain)
433 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
434 Call2.second);
435 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
436 Tmp.getValueType(), Tmp, NewLHS);
437 NewRHS = SDValue();
438 }
439}
440
441/// Return the entry encoding for a jump table in the current function. The
442/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 // In non-pic modes, just use the address of a block.
445 if (!isPositionIndependent())
447
448 // In PIC mode, if the target supports a GPRel32 directive, use it.
449 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451
452 // Otherwise, use a label difference.
454}
455
457 SelectionDAG &DAG) const {
458 // If our PIC model is GP relative, use the global offset table as the base.
459 unsigned JTEncoding = getJumpTableEncoding();
460
464
465 return Table;
466}
467
468/// This returns the relocation base for the given PIC jumptable, the same as
469/// getPICJumpTableRelocBase, but as an MCExpr.
470const MCExpr *
472 unsigned JTI,MCContext &Ctx) const{
473 // The normal PIC reloc base is the label at the start of the jump table.
474 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
475}
476
478 SDValue Addr, int JTI,
479 SelectionDAG &DAG) const {
480 SDValue Chain = Value;
481 // Jump table debug info is only needed if CodeView is enabled.
483 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
484 }
485 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
486}
487
488bool
490 const TargetMachine &TM = getTargetMachine();
491 const GlobalValue *GV = GA->getGlobal();
492
493 // If the address is not even local to this DSO we will have to load it from
494 // a got and then add the offset.
495 if (!TM.shouldAssumeDSOLocal(GV))
496 return false;
497
498 // If the code is position independent we will have to add a base register.
499 if (isPositionIndependent())
500 return false;
501
502 // Otherwise we can do it.
503 return true;
504}
505
506//===----------------------------------------------------------------------===//
507// Optimization Methods
508//===----------------------------------------------------------------------===//
509
510/// If the specified instruction has a constant integer operand and there are
511/// bits set in that constant that are not demanded, then clear those bits and
512/// return true.
514 const APInt &DemandedBits,
515 const APInt &DemandedElts,
516 TargetLoweringOpt &TLO) const {
517 SDLoc DL(Op);
518 unsigned Opcode = Op.getOpcode();
519
520 // Early-out if we've ended up calling an undemanded node, leave this to
521 // constant folding.
522 if (DemandedBits.isZero() || DemandedElts.isZero())
523 return false;
524
525 // Do target-specific constant optimization.
526 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527 return TLO.New.getNode();
528
529 // FIXME: ISD::SELECT, ISD::SELECT_CC
530 switch (Opcode) {
531 default:
532 break;
533 case ISD::XOR:
534 case ISD::AND:
535 case ISD::OR: {
536 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
537 if (!Op1C || Op1C->isOpaque())
538 return false;
539
540 // If this is a 'not' op, don't touch it because that's a canonical form.
541 const APInt &C = Op1C->getAPIntValue();
542 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
543 return false;
544
545 if (!C.isSubsetOf(DemandedBits)) {
546 EVT VT = Op.getValueType();
547 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
548 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
549 Op->getFlags());
550 return TLO.CombineTo(Op, NewOp);
551 }
552
553 break;
554 }
555 }
556
557 return false;
558}
559
561 const APInt &DemandedBits,
562 TargetLoweringOpt &TLO) const {
563 EVT VT = Op.getValueType();
564 APInt DemandedElts = VT.isVector()
566 : APInt(1, 1);
567 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568}
569
570/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572/// but it could be generalized for targets with other types of implicit
573/// widening casts.
575 const APInt &DemandedBits,
576 TargetLoweringOpt &TLO) const {
577 assert(Op.getNumOperands() == 2 &&
578 "ShrinkDemandedOp only supports binary operators!");
579 assert(Op.getNode()->getNumValues() == 1 &&
580 "ShrinkDemandedOp only supports nodes with one result!");
581
582 EVT VT = Op.getValueType();
583 SelectionDAG &DAG = TLO.DAG;
584 SDLoc dl(Op);
585
586 // Early return, as this function cannot handle vector types.
587 if (VT.isVector())
588 return false;
589
590 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
591 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
592 "ShrinkDemandedOp only supports operands that have the same size!");
593
594 // Don't do this if the node has another user, which may require the
595 // full value.
596 if (!Op.getNode()->hasOneUse())
597 return false;
598
599 // Search for the smallest integer type with free casts to and from
600 // Op's type. For expedience, just check power-of-2 integer types.
601 unsigned DemandedSize = DemandedBits.getActiveBits();
602 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
603 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
604 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
605 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
606 // We found a type with free casts.
607
608 // If the operation has the 'disjoint' flag, then the
609 // operands on the new node are also disjoint.
610 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
612 SDValue X = DAG.getNode(
613 Op.getOpcode(), dl, SmallVT,
614 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
615 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
616 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
617 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
618 return TLO.CombineTo(Op, Z);
619 }
620 }
621 return false;
622}
623
625 DAGCombinerInfo &DCI) const {
626 SelectionDAG &DAG = DCI.DAG;
627 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
628 !DCI.isBeforeLegalizeOps());
629 KnownBits Known;
630
631 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
632 if (Simplified) {
633 DCI.AddToWorklist(Op.getNode());
635 }
636 return Simplified;
637}
638
640 const APInt &DemandedElts,
641 DAGCombinerInfo &DCI) const {
642 SelectionDAG &DAG = DCI.DAG;
643 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
644 !DCI.isBeforeLegalizeOps());
645 KnownBits Known;
646
647 bool Simplified =
648 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
649 if (Simplified) {
650 DCI.AddToWorklist(Op.getNode());
652 }
653 return Simplified;
654}
655
657 KnownBits &Known,
659 unsigned Depth,
660 bool AssumeSingleUse) const {
661 EVT VT = Op.getValueType();
662
663 // Since the number of lanes in a scalable vector is unknown at compile time,
664 // we track one bit which is implicitly broadcast to all lanes. This means
665 // that all lanes in a scalable vector are considered demanded.
666 APInt DemandedElts = VT.isFixedLengthVector()
668 : APInt(1, 1);
669 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
670 AssumeSingleUse);
671}
672
673// TODO: Under what circumstances can we create nodes? Constant folding?
675 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
676 SelectionDAG &DAG, unsigned Depth) const {
677 EVT VT = Op.getValueType();
678
679 // Limit search depth.
681 return SDValue();
682
683 // Ignore UNDEFs.
684 if (Op.isUndef())
685 return SDValue();
686
687 // Not demanding any bits/elts from Op.
688 if (DemandedBits == 0 || DemandedElts == 0)
689 return DAG.getUNDEF(VT);
690
691 bool IsLE = DAG.getDataLayout().isLittleEndian();
692 unsigned NumElts = DemandedElts.getBitWidth();
693 unsigned BitWidth = DemandedBits.getBitWidth();
694 KnownBits LHSKnown, RHSKnown;
695 switch (Op.getOpcode()) {
696 case ISD::BITCAST: {
697 if (VT.isScalableVector())
698 return SDValue();
699
700 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
701 EVT SrcVT = Src.getValueType();
702 EVT DstVT = Op.getValueType();
703 if (SrcVT == DstVT)
704 return Src;
705
706 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
707 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
708 if (NumSrcEltBits == NumDstEltBits)
709 if (SDValue V = SimplifyMultipleUseDemandedBits(
710 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
711 return DAG.getBitcast(DstVT, V);
712
713 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
714 unsigned Scale = NumDstEltBits / NumSrcEltBits;
715 unsigned NumSrcElts = SrcVT.getVectorNumElements();
716 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
717 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
718 for (unsigned i = 0; i != Scale; ++i) {
719 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
720 unsigned BitOffset = EltOffset * NumSrcEltBits;
721 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
722 if (!Sub.isZero()) {
723 DemandedSrcBits |= Sub;
724 for (unsigned j = 0; j != NumElts; ++j)
725 if (DemandedElts[j])
726 DemandedSrcElts.setBit((j * Scale) + i);
727 }
728 }
729
730 if (SDValue V = SimplifyMultipleUseDemandedBits(
731 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
732 return DAG.getBitcast(DstVT, V);
733 }
734
735 // TODO - bigendian once we have test coverage.
736 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
737 unsigned Scale = NumSrcEltBits / NumDstEltBits;
738 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
739 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
740 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
741 for (unsigned i = 0; i != NumElts; ++i)
742 if (DemandedElts[i]) {
743 unsigned Offset = (i % Scale) * NumDstEltBits;
744 DemandedSrcBits.insertBits(DemandedBits, Offset);
745 DemandedSrcElts.setBit(i / Scale);
746 }
747
748 if (SDValue V = SimplifyMultipleUseDemandedBits(
749 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
750 return DAG.getBitcast(DstVT, V);
751 }
752
753 break;
754 }
755 case ISD::FREEZE: {
756 SDValue N0 = Op.getOperand(0);
757 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
758 /*PoisonOnly=*/false))
759 return N0;
760 break;
761 }
762 case ISD::AND: {
763 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
764 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
765
766 // If all of the demanded bits are known 1 on one side, return the other.
767 // These bits cannot contribute to the result of the 'and' in this
768 // context.
769 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
770 return Op.getOperand(0);
771 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
772 return Op.getOperand(1);
773 break;
774 }
775 case ISD::OR: {
776 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
777 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
778
779 // If all of the demanded bits are known zero on one side, return the
780 // other. These bits cannot contribute to the result of the 'or' in this
781 // context.
782 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
783 return Op.getOperand(0);
784 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
785 return Op.getOperand(1);
786 break;
787 }
788 case ISD::XOR: {
789 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
790 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
791
792 // If all of the demanded bits are known zero on one side, return the
793 // other.
794 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
795 return Op.getOperand(0);
796 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
797 return Op.getOperand(1);
798 break;
799 }
800 case ISD::ADD: {
801 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802 if (RHSKnown.isZero())
803 return Op.getOperand(0);
804
805 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
806 if (LHSKnown.isZero())
807 return Op.getOperand(1);
808 break;
809 }
810 case ISD::SHL: {
811 // If we are only demanding sign bits then we can use the shift source
812 // directly.
813 if (std::optional<uint64_t> MaxSA =
814 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
815 SDValue Op0 = Op.getOperand(0);
816 unsigned ShAmt = *MaxSA;
817 unsigned NumSignBits =
818 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
819 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
820 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
821 return Op0;
822 }
823 break;
824 }
825 case ISD::SRL: {
826 // If we are only demanding sign bits then we can use the shift source
827 // directly.
828 if (std::optional<uint64_t> MaxSA =
829 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
830 SDValue Op0 = Op.getOperand(0);
831 unsigned ShAmt = *MaxSA;
832 // Must already be signbits in DemandedBits bounds, and can't demand any
833 // shifted in zeroes.
834 if (DemandedBits.countl_zero() >= ShAmt) {
835 unsigned NumSignBits =
836 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
837 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
838 return Op0;
839 }
840 }
841 break;
842 }
843 case ISD::SETCC: {
844 SDValue Op0 = Op.getOperand(0);
845 SDValue Op1 = Op.getOperand(1);
846 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
847 // If (1) we only need the sign-bit, (2) the setcc operands are the same
848 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
849 // -1, we may be able to bypass the setcc.
850 if (DemandedBits.isSignMask() &&
854 // If we're testing X < 0, then this compare isn't needed - just use X!
855 // FIXME: We're limiting to integer types here, but this should also work
856 // if we don't care about FP signed-zero. The use of SETLT with FP means
857 // that we don't care about NaNs.
858 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
860 return Op0;
861 }
862 break;
863 }
865 // If none of the extended bits are demanded, eliminate the sextinreg.
866 SDValue Op0 = Op.getOperand(0);
867 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
868 unsigned ExBits = ExVT.getScalarSizeInBits();
869 if (DemandedBits.getActiveBits() <= ExBits &&
871 return Op0;
872 // If the input is already sign extended, just drop the extension.
873 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
874 if (NumSignBits >= (BitWidth - ExBits + 1))
875 return Op0;
876 break;
877 }
881 if (VT.isScalableVector())
882 return SDValue();
883
884 // If we only want the lowest element and none of extended bits, then we can
885 // return the bitcasted source vector.
886 SDValue Src = Op.getOperand(0);
887 EVT SrcVT = Src.getValueType();
888 EVT DstVT = Op.getValueType();
889 if (IsLE && DemandedElts == 1 &&
890 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
891 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
892 return DAG.getBitcast(DstVT, Src);
893 }
894 break;
895 }
897 if (VT.isScalableVector())
898 return SDValue();
899
900 // If we don't demand the inserted element, return the base vector.
901 SDValue Vec = Op.getOperand(0);
902 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
903 EVT VecVT = Vec.getValueType();
904 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
905 !DemandedElts[CIdx->getZExtValue()])
906 return Vec;
907 break;
908 }
910 if (VT.isScalableVector())
911 return SDValue();
912
913 SDValue Vec = Op.getOperand(0);
914 SDValue Sub = Op.getOperand(1);
915 uint64_t Idx = Op.getConstantOperandVal(2);
916 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
917 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
918 // If we don't demand the inserted subvector, return the base vector.
919 if (DemandedSubElts == 0)
920 return Vec;
921 break;
922 }
923 case ISD::VECTOR_SHUFFLE: {
925 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
926
927 // If all the demanded elts are from one operand and are inline,
928 // then we can use the operand directly.
929 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
930 for (unsigned i = 0; i != NumElts; ++i) {
931 int M = ShuffleMask[i];
932 if (M < 0 || !DemandedElts[i])
933 continue;
934 AllUndef = false;
935 IdentityLHS &= (M == (int)i);
936 IdentityRHS &= ((M - NumElts) == i);
937 }
938
939 if (AllUndef)
940 return DAG.getUNDEF(Op.getValueType());
941 if (IdentityLHS)
942 return Op.getOperand(0);
943 if (IdentityRHS)
944 return Op.getOperand(1);
945 break;
946 }
947 default:
948 // TODO: Probably okay to remove after audit; here to reduce change size
949 // in initial enablement patch for scalable vectors
950 if (VT.isScalableVector())
951 return SDValue();
952
953 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
954 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
955 Op, DemandedBits, DemandedElts, DAG, Depth))
956 return V;
957 break;
958 }
959 return SDValue();
960}
961
964 unsigned Depth) const {
965 EVT VT = Op.getValueType();
966 // Since the number of lanes in a scalable vector is unknown at compile time,
967 // we track one bit which is implicitly broadcast to all lanes. This means
968 // that all lanes in a scalable vector are considered demanded.
969 APInt DemandedElts = VT.isFixedLengthVector()
971 : APInt(1, 1);
972 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
973 Depth);
974}
975
977 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
978 unsigned Depth) const {
979 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
980 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
981 Depth);
982}
983
984// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
985// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
988 const TargetLowering &TLI,
989 const APInt &DemandedBits,
990 const APInt &DemandedElts, unsigned Depth) {
991 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
992 "SRL or SRA node is required here!");
993 // Is the right shift using an immediate value of 1?
994 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
995 if (!N1C || !N1C->isOne())
996 return SDValue();
997
998 // We are looking for an avgfloor
999 // add(ext, ext)
1000 // or one of these as a avgceil
1001 // add(add(ext, ext), 1)
1002 // add(add(ext, 1), ext)
1003 // add(ext, add(ext, 1))
1004 SDValue Add = Op.getOperand(0);
1005 if (Add.getOpcode() != ISD::ADD)
1006 return SDValue();
1007
1008 SDValue ExtOpA = Add.getOperand(0);
1009 SDValue ExtOpB = Add.getOperand(1);
1010 SDValue Add2;
1011 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1012 ConstantSDNode *ConstOp;
1013 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1014 ConstOp->isOne()) {
1015 ExtOpA = Op1;
1016 ExtOpB = Op3;
1017 Add2 = A;
1018 return true;
1019 }
1020 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1021 ConstOp->isOne()) {
1022 ExtOpA = Op1;
1023 ExtOpB = Op2;
1024 Add2 = A;
1025 return true;
1026 }
1027 return false;
1028 };
1029 bool IsCeil =
1030 (ExtOpA.getOpcode() == ISD::ADD &&
1031 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1032 (ExtOpB.getOpcode() == ISD::ADD &&
1033 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1034
1035 // If the shift is signed (sra):
1036 // - Needs >= 2 sign bit for both operands.
1037 // - Needs >= 2 zero bits.
1038 // If the shift is unsigned (srl):
1039 // - Needs >= 1 zero bit for both operands.
1040 // - Needs 1 demanded bit zero and >= 2 sign bits.
1041 SelectionDAG &DAG = TLO.DAG;
1042 unsigned ShiftOpc = Op.getOpcode();
1043 bool IsSigned = false;
1044 unsigned KnownBits;
1045 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1046 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1047 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1048 unsigned NumZeroA =
1049 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1050 unsigned NumZeroB =
1051 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1052 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1053
1054 switch (ShiftOpc) {
1055 default:
1056 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1057 case ISD::SRA: {
1058 if (NumZero >= 2 && NumSigned < NumZero) {
1059 IsSigned = false;
1060 KnownBits = NumZero;
1061 break;
1062 }
1063 if (NumSigned >= 1) {
1064 IsSigned = true;
1065 KnownBits = NumSigned;
1066 break;
1067 }
1068 return SDValue();
1069 }
1070 case ISD::SRL: {
1071 if (NumZero >= 1 && NumSigned < NumZero) {
1072 IsSigned = false;
1073 KnownBits = NumZero;
1074 break;
1075 }
1076 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1077 IsSigned = true;
1078 KnownBits = NumSigned;
1079 break;
1080 }
1081 return SDValue();
1082 }
1083 }
1084
1085 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1086 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1087
1088 // Find the smallest power-2 type that is legal for this vector size and
1089 // operation, given the original type size and the number of known sign/zero
1090 // bits.
1091 EVT VT = Op.getValueType();
1092 unsigned MinWidth =
1093 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1094 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1096 return SDValue();
1097 if (VT.isVector())
1098 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1099 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1100 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1101 // larger type size to do the transform.
1102 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1103 return SDValue();
1104 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1105 Add.getOperand(1)) &&
1106 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1107 Add2.getOperand(1))))
1108 NVT = VT;
1109 else
1110 return SDValue();
1111 }
1112
1113 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1114 // this is likely to stop other folds (reassociation, value tracking etc.)
1115 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1116 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1117 return SDValue();
1118
1119 SDLoc DL(Op);
1120 SDValue ResultAVG =
1121 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1122 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1123 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1124}
1125
1126/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1127/// result of Op are ever used downstream. If we can use this information to
1128/// simplify Op, create a new simplified DAG node and return true, returning the
1129/// original and new nodes in Old and New. Otherwise, analyze the expression and
1130/// return a mask of Known bits for the expression (used to simplify the
1131/// caller). The Known bits may only be accurate for those bits in the
1132/// OriginalDemandedBits and OriginalDemandedElts.
1134 SDValue Op, const APInt &OriginalDemandedBits,
1135 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1136 unsigned Depth, bool AssumeSingleUse) const {
1137 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1138 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1139 "Mask size mismatches value type size!");
1140
1141 // Don't know anything.
1142 Known = KnownBits(BitWidth);
1143
1144 EVT VT = Op.getValueType();
1145 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1146 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1147 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1148 "Unexpected vector size");
1149
1150 APInt DemandedBits = OriginalDemandedBits;
1151 APInt DemandedElts = OriginalDemandedElts;
1152 SDLoc dl(Op);
1153
1154 // Undef operand.
1155 if (Op.isUndef())
1156 return false;
1157
1158 // We can't simplify target constants.
1159 if (Op.getOpcode() == ISD::TargetConstant)
1160 return false;
1161
1162 if (Op.getOpcode() == ISD::Constant) {
1163 // We know all of the bits for a constant!
1164 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1165 return false;
1166 }
1167
1168 if (Op.getOpcode() == ISD::ConstantFP) {
1169 // We know all of the bits for a floating point constant!
1171 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1172 return false;
1173 }
1174
1175 // Other users may use these bits.
1176 bool HasMultiUse = false;
1177 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1179 // Limit search depth.
1180 return false;
1181 }
1182 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1184 DemandedElts = APInt::getAllOnes(NumElts);
1185 HasMultiUse = true;
1186 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1187 // Not demanding any bits/elts from Op.
1188 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1189 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1190 // Limit search depth.
1191 return false;
1192 }
1193
1194 KnownBits Known2;
1195 switch (Op.getOpcode()) {
1196 case ISD::SCALAR_TO_VECTOR: {
1197 if (VT.isScalableVector())
1198 return false;
1199 if (!DemandedElts[0])
1200 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1201
1202 KnownBits SrcKnown;
1203 SDValue Src = Op.getOperand(0);
1204 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1205 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1206 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1207 return true;
1208
1209 // Upper elements are undef, so only get the knownbits if we just demand
1210 // the bottom element.
1211 if (DemandedElts == 1)
1212 Known = SrcKnown.anyextOrTrunc(BitWidth);
1213 break;
1214 }
1215 case ISD::BUILD_VECTOR:
1216 // Collect the known bits that are shared by every demanded element.
1217 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1218 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1219 return false; // Don't fall through, will infinitely loop.
1220 case ISD::SPLAT_VECTOR: {
1221 SDValue Scl = Op.getOperand(0);
1222 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1223 KnownBits KnownScl;
1224 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1225 return true;
1226
1227 // Implicitly truncate the bits to match the official semantics of
1228 // SPLAT_VECTOR.
1229 Known = KnownScl.trunc(BitWidth);
1230 break;
1231 }
1232 case ISD::LOAD: {
1233 auto *LD = cast<LoadSDNode>(Op);
1234 if (getTargetConstantFromLoad(LD)) {
1235 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1236 return false; // Don't fall through, will infinitely loop.
1237 }
1238 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1239 // If this is a ZEXTLoad and we are looking at the loaded value.
1240 EVT MemVT = LD->getMemoryVT();
1241 unsigned MemBits = MemVT.getScalarSizeInBits();
1242 Known.Zero.setBitsFrom(MemBits);
1243 return false; // Don't fall through, will infinitely loop.
1244 }
1245 break;
1246 }
1248 if (VT.isScalableVector())
1249 return false;
1250 SDValue Vec = Op.getOperand(0);
1251 SDValue Scl = Op.getOperand(1);
1252 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1253 EVT VecVT = Vec.getValueType();
1254
1255 // If index isn't constant, assume we need all vector elements AND the
1256 // inserted element.
1257 APInt DemandedVecElts(DemandedElts);
1258 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1259 unsigned Idx = CIdx->getZExtValue();
1260 DemandedVecElts.clearBit(Idx);
1261
1262 // Inserted element is not required.
1263 if (!DemandedElts[Idx])
1264 return TLO.CombineTo(Op, Vec);
1265 }
1266
1267 KnownBits KnownScl;
1268 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1269 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1270 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1271 return true;
1272
1273 Known = KnownScl.anyextOrTrunc(BitWidth);
1274
1275 KnownBits KnownVec;
1276 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1277 Depth + 1))
1278 return true;
1279
1280 if (!!DemandedVecElts)
1281 Known = Known.intersectWith(KnownVec);
1282
1283 return false;
1284 }
1285 case ISD::INSERT_SUBVECTOR: {
1286 if (VT.isScalableVector())
1287 return false;
1288 // Demand any elements from the subvector and the remainder from the src its
1289 // inserted into.
1290 SDValue Src = Op.getOperand(0);
1291 SDValue Sub = Op.getOperand(1);
1292 uint64_t Idx = Op.getConstantOperandVal(2);
1293 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1294 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1295 APInt DemandedSrcElts = DemandedElts;
1296 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1297
1298 KnownBits KnownSub, KnownSrc;
1299 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1300 Depth + 1))
1301 return true;
1302 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1303 Depth + 1))
1304 return true;
1305
1306 Known.Zero.setAllBits();
1307 Known.One.setAllBits();
1308 if (!!DemandedSubElts)
1309 Known = Known.intersectWith(KnownSub);
1310 if (!!DemandedSrcElts)
1311 Known = Known.intersectWith(KnownSrc);
1312
1313 // Attempt to avoid multi-use src if we don't need anything from it.
1314 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1315 !DemandedSrcElts.isAllOnes()) {
1316 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1317 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1318 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1319 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1320 if (NewSub || NewSrc) {
1321 NewSub = NewSub ? NewSub : Sub;
1322 NewSrc = NewSrc ? NewSrc : Src;
1323 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1324 Op.getOperand(2));
1325 return TLO.CombineTo(Op, NewOp);
1326 }
1327 }
1328 break;
1329 }
1331 if (VT.isScalableVector())
1332 return false;
1333 // Offset the demanded elts by the subvector index.
1334 SDValue Src = Op.getOperand(0);
1335 if (Src.getValueType().isScalableVector())
1336 break;
1337 uint64_t Idx = Op.getConstantOperandVal(1);
1338 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1339 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1340
1341 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1342 Depth + 1))
1343 return true;
1344
1345 // Attempt to avoid multi-use src if we don't need anything from it.
1346 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1347 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1348 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1349 if (DemandedSrc) {
1350 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1351 Op.getOperand(1));
1352 return TLO.CombineTo(Op, NewOp);
1353 }
1354 }
1355 break;
1356 }
1357 case ISD::CONCAT_VECTORS: {
1358 if (VT.isScalableVector())
1359 return false;
1360 Known.Zero.setAllBits();
1361 Known.One.setAllBits();
1362 EVT SubVT = Op.getOperand(0).getValueType();
1363 unsigned NumSubVecs = Op.getNumOperands();
1364 unsigned NumSubElts = SubVT.getVectorNumElements();
1365 for (unsigned i = 0; i != NumSubVecs; ++i) {
1366 APInt DemandedSubElts =
1367 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1368 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1369 Known2, TLO, Depth + 1))
1370 return true;
1371 // Known bits are shared by every demanded subvector element.
1372 if (!!DemandedSubElts)
1373 Known = Known.intersectWith(Known2);
1374 }
1375 break;
1376 }
1377 case ISD::VECTOR_SHUFFLE: {
1378 assert(!VT.isScalableVector());
1379 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1380
1381 // Collect demanded elements from shuffle operands..
1382 APInt DemandedLHS, DemandedRHS;
1383 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1384 DemandedRHS))
1385 break;
1386
1387 if (!!DemandedLHS || !!DemandedRHS) {
1388 SDValue Op0 = Op.getOperand(0);
1389 SDValue Op1 = Op.getOperand(1);
1390
1391 Known.Zero.setAllBits();
1392 Known.One.setAllBits();
1393 if (!!DemandedLHS) {
1394 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1395 Depth + 1))
1396 return true;
1397 Known = Known.intersectWith(Known2);
1398 }
1399 if (!!DemandedRHS) {
1400 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1401 Depth + 1))
1402 return true;
1403 Known = Known.intersectWith(Known2);
1404 }
1405
1406 // Attempt to avoid multi-use ops if we don't need anything from them.
1407 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1408 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1409 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1410 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1411 if (DemandedOp0 || DemandedOp1) {
1412 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1413 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1414 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1415 return TLO.CombineTo(Op, NewOp);
1416 }
1417 }
1418 break;
1419 }
1420 case ISD::AND: {
1421 SDValue Op0 = Op.getOperand(0);
1422 SDValue Op1 = Op.getOperand(1);
1423
1424 // If the RHS is a constant, check to see if the LHS would be zero without
1425 // using the bits from the RHS. Below, we use knowledge about the RHS to
1426 // simplify the LHS, here we're using information from the LHS to simplify
1427 // the RHS.
1428 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1429 // Do not increment Depth here; that can cause an infinite loop.
1430 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1431 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1432 if ((LHSKnown.Zero & DemandedBits) ==
1433 (~RHSC->getAPIntValue() & DemandedBits))
1434 return TLO.CombineTo(Op, Op0);
1435
1436 // If any of the set bits in the RHS are known zero on the LHS, shrink
1437 // the constant.
1438 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1439 DemandedElts, TLO))
1440 return true;
1441
1442 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1443 // constant, but if this 'and' is only clearing bits that were just set by
1444 // the xor, then this 'and' can be eliminated by shrinking the mask of
1445 // the xor. For example, for a 32-bit X:
1446 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1447 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1448 LHSKnown.One == ~RHSC->getAPIntValue()) {
1449 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1450 return TLO.CombineTo(Op, Xor);
1451 }
1452 }
1453
1454 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1455 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1456 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1457 (Op0.getOperand(0).isUndef() ||
1459 Op0->hasOneUse()) {
1460 unsigned NumSubElts =
1462 unsigned SubIdx = Op0.getConstantOperandVal(2);
1463 APInt DemandedSub =
1464 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1465 KnownBits KnownSubMask =
1466 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1467 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1468 SDValue NewAnd =
1469 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1470 SDValue NewInsert =
1471 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1472 Op0.getOperand(1), Op0.getOperand(2));
1473 return TLO.CombineTo(Op, NewInsert);
1474 }
1475 }
1476
1477 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1478 Depth + 1))
1479 return true;
1480 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1481 Known2, TLO, Depth + 1))
1482 return true;
1483
1484 // If all of the demanded bits are known one on one side, return the other.
1485 // These bits cannot contribute to the result of the 'and'.
1486 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1487 return TLO.CombineTo(Op, Op0);
1488 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1489 return TLO.CombineTo(Op, Op1);
1490 // If all of the demanded bits in the inputs are known zeros, return zero.
1491 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1492 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1493 // If the RHS is a constant, see if we can simplify it.
1494 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1495 TLO))
1496 return true;
1497 // If the operation can be done in a smaller type, do so.
1498 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1499 return true;
1500
1501 // Attempt to avoid multi-use ops if we don't need anything from them.
1502 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1503 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1504 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1505 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1506 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1507 if (DemandedOp0 || DemandedOp1) {
1508 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1509 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1510 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1511 return TLO.CombineTo(Op, NewOp);
1512 }
1513 }
1514
1515 Known &= Known2;
1516 break;
1517 }
1518 case ISD::OR: {
1519 SDValue Op0 = Op.getOperand(0);
1520 SDValue Op1 = Op.getOperand(1);
1521 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1522 Depth + 1)) {
1523 Op->dropFlags(SDNodeFlags::Disjoint);
1524 return true;
1525 }
1526
1527 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1528 Known2, TLO, Depth + 1)) {
1529 Op->dropFlags(SDNodeFlags::Disjoint);
1530 return true;
1531 }
1532
1533 // If all of the demanded bits are known zero on one side, return the other.
1534 // These bits cannot contribute to the result of the 'or'.
1535 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1536 return TLO.CombineTo(Op, Op0);
1537 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1538 return TLO.CombineTo(Op, Op1);
1539 // If the RHS is a constant, see if we can simplify it.
1540 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1541 return true;
1542 // If the operation can be done in a smaller type, do so.
1543 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1544 return true;
1545
1546 // Attempt to avoid multi-use ops if we don't need anything from them.
1547 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1548 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1549 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1550 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1551 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1552 if (DemandedOp0 || DemandedOp1) {
1553 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1554 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1555 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1556 return TLO.CombineTo(Op, NewOp);
1557 }
1558 }
1559
1560 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1561 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1562 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1563 Op0->hasOneUse() && Op1->hasOneUse()) {
1564 // Attempt to match all commutations - m_c_Or would've been useful!
1565 for (int I = 0; I != 2; ++I) {
1566 SDValue X = Op.getOperand(I).getOperand(0);
1567 SDValue C1 = Op.getOperand(I).getOperand(1);
1568 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1569 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1570 if (Alt.getOpcode() == ISD::OR) {
1571 for (int J = 0; J != 2; ++J) {
1572 if (X == Alt.getOperand(J)) {
1573 SDValue Y = Alt.getOperand(1 - J);
1574 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1575 {C1, C2})) {
1576 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1577 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1578 return TLO.CombineTo(
1579 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1580 }
1581 }
1582 }
1583 }
1584 }
1585 }
1586
1587 Known |= Known2;
1588 break;
1589 }
1590 case ISD::XOR: {
1591 SDValue Op0 = Op.getOperand(0);
1592 SDValue Op1 = Op.getOperand(1);
1593
1594 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1595 Depth + 1))
1596 return true;
1597 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1598 Depth + 1))
1599 return true;
1600
1601 // If all of the demanded bits are known zero on one side, return the other.
1602 // These bits cannot contribute to the result of the 'xor'.
1603 if (DemandedBits.isSubsetOf(Known.Zero))
1604 return TLO.CombineTo(Op, Op0);
1605 if (DemandedBits.isSubsetOf(Known2.Zero))
1606 return TLO.CombineTo(Op, Op1);
1607 // If the operation can be done in a smaller type, do so.
1608 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1609 return true;
1610
1611 // If all of the unknown bits are known to be zero on one side or the other
1612 // turn this into an *inclusive* or.
1613 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1614 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1615 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1616
1617 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1618 if (C) {
1619 // If one side is a constant, and all of the set bits in the constant are
1620 // also known set on the other side, turn this into an AND, as we know
1621 // the bits will be cleared.
1622 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1623 // NB: it is okay if more bits are known than are requested
1624 if (C->getAPIntValue() == Known2.One) {
1625 SDValue ANDC =
1626 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1627 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1628 }
1629
1630 // If the RHS is a constant, see if we can change it. Don't alter a -1
1631 // constant because that's a 'not' op, and that is better for combining
1632 // and codegen.
1633 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1634 // We're flipping all demanded bits. Flip the undemanded bits too.
1635 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1636 return TLO.CombineTo(Op, New);
1637 }
1638
1639 unsigned Op0Opcode = Op0.getOpcode();
1640 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1641 if (ConstantSDNode *ShiftC =
1642 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1643 // Don't crash on an oversized shift. We can not guarantee that a
1644 // bogus shift has been simplified to undef.
1645 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1646 uint64_t ShiftAmt = ShiftC->getZExtValue();
1648 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1649 : Ones.lshr(ShiftAmt);
1650 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1651 isDesirableToCommuteXorWithShift(Op.getNode())) {
1652 // If the xor constant is a demanded mask, do a 'not' before the
1653 // shift:
1654 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1655 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1656 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1657 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1658 Op0.getOperand(1)));
1659 }
1660 }
1661 }
1662 }
1663 }
1664
1665 // If we can't turn this into a 'not', try to shrink the constant.
1666 if (!C || !C->isAllOnes())
1667 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1668 return true;
1669
1670 // Attempt to avoid multi-use ops if we don't need anything from them.
1671 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1672 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1673 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1674 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1675 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1676 if (DemandedOp0 || DemandedOp1) {
1677 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1678 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1679 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1680 return TLO.CombineTo(Op, NewOp);
1681 }
1682 }
1683
1684 Known ^= Known2;
1685 break;
1686 }
1687 case ISD::SELECT:
1688 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1689 Known, TLO, Depth + 1))
1690 return true;
1691 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1692 Known2, TLO, Depth + 1))
1693 return true;
1694
1695 // If the operands are constants, see if we can simplify them.
1696 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1697 return true;
1698
1699 // Only known if known in both the LHS and RHS.
1700 Known = Known.intersectWith(Known2);
1701 break;
1702 case ISD::VSELECT:
1703 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1704 Known, TLO, Depth + 1))
1705 return true;
1706 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1707 Known2, TLO, Depth + 1))
1708 return true;
1709
1710 // Only known if known in both the LHS and RHS.
1711 Known = Known.intersectWith(Known2);
1712 break;
1713 case ISD::SELECT_CC:
1714 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1715 Known, TLO, Depth + 1))
1716 return true;
1717 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1718 Known2, TLO, Depth + 1))
1719 return true;
1720
1721 // If the operands are constants, see if we can simplify them.
1722 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1723 return true;
1724
1725 // Only known if known in both the LHS and RHS.
1726 Known = Known.intersectWith(Known2);
1727 break;
1728 case ISD::SETCC: {
1729 SDValue Op0 = Op.getOperand(0);
1730 SDValue Op1 = Op.getOperand(1);
1731 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1732 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1733 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1734 // -1, we may be able to bypass the setcc.
1735 if (DemandedBits.isSignMask() &&
1739 // If we're testing X < 0, then this compare isn't needed - just use X!
1740 // FIXME: We're limiting to integer types here, but this should also work
1741 // if we don't care about FP signed-zero. The use of SETLT with FP means
1742 // that we don't care about NaNs.
1743 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1745 return TLO.CombineTo(Op, Op0);
1746
1747 // TODO: Should we check for other forms of sign-bit comparisons?
1748 // Examples: X <= -1, X >= 0
1749 }
1750 if (getBooleanContents(Op0.getValueType()) ==
1752 BitWidth > 1)
1753 Known.Zero.setBitsFrom(1);
1754 break;
1755 }
1756 case ISD::SHL: {
1757 SDValue Op0 = Op.getOperand(0);
1758 SDValue Op1 = Op.getOperand(1);
1759 EVT ShiftVT = Op1.getValueType();
1760
1761 if (std::optional<uint64_t> KnownSA =
1762 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1763 unsigned ShAmt = *KnownSA;
1764 if (ShAmt == 0)
1765 return TLO.CombineTo(Op, Op0);
1766
1767 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1768 // single shift. We can do this if the bottom bits (which are shifted
1769 // out) are never demanded.
1770 // TODO - support non-uniform vector amounts.
1771 if (Op0.getOpcode() == ISD::SRL) {
1772 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1773 if (std::optional<uint64_t> InnerSA =
1774 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1775 unsigned C1 = *InnerSA;
1776 unsigned Opc = ISD::SHL;
1777 int Diff = ShAmt - C1;
1778 if (Diff < 0) {
1779 Diff = -Diff;
1780 Opc = ISD::SRL;
1781 }
1782 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1783 return TLO.CombineTo(
1784 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1785 }
1786 }
1787 }
1788
1789 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1790 // are not demanded. This will likely allow the anyext to be folded away.
1791 // TODO - support non-uniform vector amounts.
1792 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1793 SDValue InnerOp = Op0.getOperand(0);
1794 EVT InnerVT = InnerOp.getValueType();
1795 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1796 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1797 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1798 SDValue NarrowShl = TLO.DAG.getNode(
1799 ISD::SHL, dl, InnerVT, InnerOp,
1800 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1801 return TLO.CombineTo(
1802 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1803 }
1804
1805 // Repeat the SHL optimization above in cases where an extension
1806 // intervenes: (shl (anyext (shr x, c1)), c2) to
1807 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1808 // aren't demanded (as above) and that the shifted upper c1 bits of
1809 // x aren't demanded.
1810 // TODO - support non-uniform vector amounts.
1811 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1812 InnerOp.hasOneUse()) {
1813 if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1814 InnerOp, DemandedElts, Depth + 2)) {
1815 unsigned InnerShAmt = *SA2;
1816 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1817 DemandedBits.getActiveBits() <=
1818 (InnerBits - InnerShAmt + ShAmt) &&
1819 DemandedBits.countr_zero() >= ShAmt) {
1820 SDValue NewSA =
1821 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1822 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1823 InnerOp.getOperand(0));
1824 return TLO.CombineTo(
1825 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1826 }
1827 }
1828 }
1829 }
1830
1831 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1832 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1833 Depth + 1)) {
1834 // Disable the nsw and nuw flags. We can no longer guarantee that we
1835 // won't wrap after simplification.
1836 Op->dropFlags(SDNodeFlags::NoWrap);
1837 return true;
1838 }
1839 Known.Zero <<= ShAmt;
1840 Known.One <<= ShAmt;
1841 // low bits known zero.
1842 Known.Zero.setLowBits(ShAmt);
1843
1844 // Attempt to avoid multi-use ops if we don't need anything from them.
1845 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1846 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1847 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1848 if (DemandedOp0) {
1849 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1850 return TLO.CombineTo(Op, NewOp);
1851 }
1852 }
1853
1854 // TODO: Can we merge this fold with the one below?
1855 // Try shrinking the operation as long as the shift amount will still be
1856 // in range.
1857 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1858 Op.getNode()->hasOneUse()) {
1859 // Search for the smallest integer type with free casts to and from
1860 // Op's type. For expedience, just check power-of-2 integer types.
1861 unsigned DemandedSize = DemandedBits.getActiveBits();
1862 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1863 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1864 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1865 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1866 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1867 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1868 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1869 assert(DemandedSize <= SmallVTBits &&
1870 "Narrowed below demanded bits?");
1871 // We found a type with free casts.
1872 SDValue NarrowShl = TLO.DAG.getNode(
1873 ISD::SHL, dl, SmallVT,
1874 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1875 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1876 return TLO.CombineTo(
1877 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1878 }
1879 }
1880 }
1881
1882 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1883 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1884 // Only do this if we demand the upper half so the knownbits are correct.
1885 unsigned HalfWidth = BitWidth / 2;
1886 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1887 DemandedBits.countLeadingOnes() >= HalfWidth) {
1888 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1889 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1890 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1891 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1892 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1893 // If we're demanding the upper bits at all, we must ensure
1894 // that the upper bits of the shift result are known to be zero,
1895 // which is equivalent to the narrow shift being NUW.
1896 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1897 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1898 SDNodeFlags Flags;
1899 Flags.setNoSignedWrap(IsNSW);
1900 Flags.setNoUnsignedWrap(IsNUW);
1901 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1902 SDValue NewShiftAmt =
1903 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1904 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1905 NewShiftAmt, Flags);
1906 SDValue NewExt =
1907 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1908 return TLO.CombineTo(Op, NewExt);
1909 }
1910 }
1911 }
1912 } else {
1913 // This is a variable shift, so we can't shift the demand mask by a known
1914 // amount. But if we are not demanding high bits, then we are not
1915 // demanding those bits from the pre-shifted operand either.
1916 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1917 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1918 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1919 Depth + 1)) {
1920 // Disable the nsw and nuw flags. We can no longer guarantee that we
1921 // won't wrap after simplification.
1922 Op->dropFlags(SDNodeFlags::NoWrap);
1923 return true;
1924 }
1925 Known.resetAll();
1926 }
1927 }
1928
1929 // If we are only demanding sign bits then we can use the shift source
1930 // directly.
1931 if (std::optional<uint64_t> MaxSA =
1932 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1933 unsigned ShAmt = *MaxSA;
1934 unsigned NumSignBits =
1935 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1936 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1937 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1938 return TLO.CombineTo(Op, Op0);
1939 }
1940 break;
1941 }
1942 case ISD::SRL: {
1943 SDValue Op0 = Op.getOperand(0);
1944 SDValue Op1 = Op.getOperand(1);
1945 EVT ShiftVT = Op1.getValueType();
1946
1947 if (std::optional<uint64_t> KnownSA =
1948 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1949 unsigned ShAmt = *KnownSA;
1950 if (ShAmt == 0)
1951 return TLO.CombineTo(Op, Op0);
1952
1953 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1954 // single shift. We can do this if the top bits (which are shifted out)
1955 // are never demanded.
1956 // TODO - support non-uniform vector amounts.
1957 if (Op0.getOpcode() == ISD::SHL) {
1958 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1959 if (std::optional<uint64_t> InnerSA =
1960 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1961 unsigned C1 = *InnerSA;
1962 unsigned Opc = ISD::SRL;
1963 int Diff = ShAmt - C1;
1964 if (Diff < 0) {
1965 Diff = -Diff;
1966 Opc = ISD::SHL;
1967 }
1968 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1969 return TLO.CombineTo(
1970 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1971 }
1972 }
1973 }
1974
1975 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1976 // single sra. We can do this if the top bits are never demanded.
1977 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1978 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1979 if (std::optional<uint64_t> InnerSA =
1980 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1981 unsigned C1 = *InnerSA;
1982 // Clamp the combined shift amount if it exceeds the bit width.
1983 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1984 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1985 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1986 Op0.getOperand(0), NewSA));
1987 }
1988 }
1989 }
1990
1991 APInt InDemandedMask = (DemandedBits << ShAmt);
1992
1993 // If the shift is exact, then it does demand the low bits (and knows that
1994 // they are zero).
1995 if (Op->getFlags().hasExact())
1996 InDemandedMask.setLowBits(ShAmt);
1997
1998 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1999 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2000 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2002 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2003 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2004 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2005 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2006 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2007 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2008 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2009 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2010 SDValue NewShiftAmt =
2011 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2012 SDValue NewShift =
2013 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2014 return TLO.CombineTo(
2015 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2016 }
2017 }
2018
2019 // Compute the new bits that are at the top now.
2020 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2021 Depth + 1))
2022 return true;
2023 Known.Zero.lshrInPlace(ShAmt);
2024 Known.One.lshrInPlace(ShAmt);
2025 // High bits known zero.
2026 Known.Zero.setHighBits(ShAmt);
2027
2028 // Attempt to avoid multi-use ops if we don't need anything from them.
2029 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2030 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2031 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2032 if (DemandedOp0) {
2033 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2034 return TLO.CombineTo(Op, NewOp);
2035 }
2036 }
2037 } else {
2038 // Use generic knownbits computation as it has support for non-uniform
2039 // shift amounts.
2040 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2041 }
2042
2043 // If we are only demanding sign bits then we can use the shift source
2044 // directly.
2045 if (std::optional<uint64_t> MaxSA =
2046 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2047 unsigned ShAmt = *MaxSA;
2048 // Must already be signbits in DemandedBits bounds, and can't demand any
2049 // shifted in zeroes.
2050 if (DemandedBits.countl_zero() >= ShAmt) {
2051 unsigned NumSignBits =
2052 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2053 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2054 return TLO.CombineTo(Op, Op0);
2055 }
2056 }
2057
2058 // Try to match AVG patterns (after shift simplification).
2059 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2060 DemandedElts, Depth + 1))
2061 return TLO.CombineTo(Op, AVG);
2062
2063 break;
2064 }
2065 case ISD::SRA: {
2066 SDValue Op0 = Op.getOperand(0);
2067 SDValue Op1 = Op.getOperand(1);
2068 EVT ShiftVT = Op1.getValueType();
2069
2070 // If we only want bits that already match the signbit then we don't need
2071 // to shift.
2072 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2073 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2074 NumHiDemandedBits)
2075 return TLO.CombineTo(Op, Op0);
2076
2077 // If this is an arithmetic shift right and only the low-bit is set, we can
2078 // always convert this into a logical shr, even if the shift amount is
2079 // variable. The low bit of the shift cannot be an input sign bit unless
2080 // the shift amount is >= the size of the datatype, which is undefined.
2081 if (DemandedBits.isOne())
2082 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2083
2084 if (std::optional<uint64_t> KnownSA =
2085 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2086 unsigned ShAmt = *KnownSA;
2087 if (ShAmt == 0)
2088 return TLO.CombineTo(Op, Op0);
2089
2090 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2091 // supports sext_inreg.
2092 if (Op0.getOpcode() == ISD::SHL) {
2093 if (std::optional<uint64_t> InnerSA =
2094 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2095 unsigned LowBits = BitWidth - ShAmt;
2096 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2097 if (VT.isVector())
2098 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2100
2101 if (*InnerSA == ShAmt) {
2102 if (!TLO.LegalOperations() ||
2104 return TLO.CombineTo(
2105 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2106 Op0.getOperand(0),
2107 TLO.DAG.getValueType(ExtVT)));
2108
2109 // Even if we can't convert to sext_inreg, we might be able to
2110 // remove this shift pair if the input is already sign extended.
2111 unsigned NumSignBits =
2112 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2113 if (NumSignBits > ShAmt)
2114 return TLO.CombineTo(Op, Op0.getOperand(0));
2115 }
2116 }
2117 }
2118
2119 APInt InDemandedMask = (DemandedBits << ShAmt);
2120
2121 // If the shift is exact, then it does demand the low bits (and knows that
2122 // they are zero).
2123 if (Op->getFlags().hasExact())
2124 InDemandedMask.setLowBits(ShAmt);
2125
2126 // If any of the demanded bits are produced by the sign extension, we also
2127 // demand the input sign bit.
2128 if (DemandedBits.countl_zero() < ShAmt)
2129 InDemandedMask.setSignBit();
2130
2131 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2132 Depth + 1))
2133 return true;
2134 Known.Zero.lshrInPlace(ShAmt);
2135 Known.One.lshrInPlace(ShAmt);
2136
2137 // If the input sign bit is known to be zero, or if none of the top bits
2138 // are demanded, turn this into an unsigned shift right.
2139 if (Known.Zero[BitWidth - ShAmt - 1] ||
2140 DemandedBits.countl_zero() >= ShAmt) {
2141 SDNodeFlags Flags;
2142 Flags.setExact(Op->getFlags().hasExact());
2143 return TLO.CombineTo(
2144 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2145 }
2146
2147 int Log2 = DemandedBits.exactLogBase2();
2148 if (Log2 >= 0) {
2149 // The bit must come from the sign.
2150 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2151 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2152 }
2153
2154 if (Known.One[BitWidth - ShAmt - 1])
2155 // New bits are known one.
2156 Known.One.setHighBits(ShAmt);
2157
2158 // Attempt to avoid multi-use ops if we don't need anything from them.
2159 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2160 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2161 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2162 if (DemandedOp0) {
2163 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2164 return TLO.CombineTo(Op, NewOp);
2165 }
2166 }
2167 }
2168
2169 // Try to match AVG patterns (after shift simplification).
2170 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2171 DemandedElts, Depth + 1))
2172 return TLO.CombineTo(Op, AVG);
2173
2174 break;
2175 }
2176 case ISD::FSHL:
2177 case ISD::FSHR: {
2178 SDValue Op0 = Op.getOperand(0);
2179 SDValue Op1 = Op.getOperand(1);
2180 SDValue Op2 = Op.getOperand(2);
2181 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2182
2183 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2184 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2185
2186 // For fshl, 0-shift returns the 1st arg.
2187 // For fshr, 0-shift returns the 2nd arg.
2188 if (Amt == 0) {
2189 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2190 Known, TLO, Depth + 1))
2191 return true;
2192 break;
2193 }
2194
2195 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2196 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2197 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2198 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2199 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2200 Depth + 1))
2201 return true;
2202 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2203 Depth + 1))
2204 return true;
2205
2206 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2207 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2208 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2209 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2210 Known = Known.unionWith(Known2);
2211
2212 // Attempt to avoid multi-use ops if we don't need anything from them.
2213 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2214 !DemandedElts.isAllOnes()) {
2215 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2216 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2217 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2218 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2219 if (DemandedOp0 || DemandedOp1) {
2220 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2221 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2222 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2223 DemandedOp1, Op2);
2224 return TLO.CombineTo(Op, NewOp);
2225 }
2226 }
2227 }
2228
2229 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2230 if (isPowerOf2_32(BitWidth)) {
2231 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2232 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2233 Known2, TLO, Depth + 1))
2234 return true;
2235 }
2236 break;
2237 }
2238 case ISD::ROTL:
2239 case ISD::ROTR: {
2240 SDValue Op0 = Op.getOperand(0);
2241 SDValue Op1 = Op.getOperand(1);
2242 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2243
2244 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2245 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2246 return TLO.CombineTo(Op, Op0);
2247
2248 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2249 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2250 unsigned RevAmt = BitWidth - Amt;
2251
2252 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2253 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2254 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2255 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2256 Depth + 1))
2257 return true;
2258
2259 // rot*(x, 0) --> x
2260 if (Amt == 0)
2261 return TLO.CombineTo(Op, Op0);
2262
2263 // See if we don't demand either half of the rotated bits.
2264 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2265 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2266 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2267 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2268 }
2269 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2270 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2271 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2272 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2273 }
2274 }
2275
2276 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2277 if (isPowerOf2_32(BitWidth)) {
2278 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2279 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2280 Depth + 1))
2281 return true;
2282 }
2283 break;
2284 }
2285 case ISD::SMIN:
2286 case ISD::SMAX:
2287 case ISD::UMIN:
2288 case ISD::UMAX: {
2289 unsigned Opc = Op.getOpcode();
2290 SDValue Op0 = Op.getOperand(0);
2291 SDValue Op1 = Op.getOperand(1);
2292
2293 // If we're only demanding signbits, then we can simplify to OR/AND node.
2294 unsigned BitOp =
2295 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2296 unsigned NumSignBits =
2297 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2298 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2299 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2300 if (NumSignBits >= NumDemandedUpperBits)
2301 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2302
2303 // Check if one arg is always less/greater than (or equal) to the other arg.
2304 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2305 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2306 switch (Opc) {
2307 case ISD::SMIN:
2308 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2309 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2310 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2311 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2312 Known = KnownBits::smin(Known0, Known1);
2313 break;
2314 case ISD::SMAX:
2315 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2316 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2317 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2318 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2319 Known = KnownBits::smax(Known0, Known1);
2320 break;
2321 case ISD::UMIN:
2322 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2323 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2324 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2325 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2326 Known = KnownBits::umin(Known0, Known1);
2327 break;
2328 case ISD::UMAX:
2329 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2330 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2331 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2332 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2333 Known = KnownBits::umax(Known0, Known1);
2334 break;
2335 }
2336 break;
2337 }
2338 case ISD::BITREVERSE: {
2339 SDValue Src = Op.getOperand(0);
2340 APInt DemandedSrcBits = DemandedBits.reverseBits();
2341 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2342 Depth + 1))
2343 return true;
2344 Known.One = Known2.One.reverseBits();
2345 Known.Zero = Known2.Zero.reverseBits();
2346 break;
2347 }
2348 case ISD::BSWAP: {
2349 SDValue Src = Op.getOperand(0);
2350
2351 // If the only bits demanded come from one byte of the bswap result,
2352 // just shift the input byte into position to eliminate the bswap.
2353 unsigned NLZ = DemandedBits.countl_zero();
2354 unsigned NTZ = DemandedBits.countr_zero();
2355
2356 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2357 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2358 // have 14 leading zeros, round to 8.
2359 NLZ = alignDown(NLZ, 8);
2360 NTZ = alignDown(NTZ, 8);
2361 // If we need exactly one byte, we can do this transformation.
2362 if (BitWidth - NLZ - NTZ == 8) {
2363 // Replace this with either a left or right shift to get the byte into
2364 // the right place.
2365 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2366 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2367 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2368 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2369 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2370 return TLO.CombineTo(Op, NewOp);
2371 }
2372 }
2373
2374 APInt DemandedSrcBits = DemandedBits.byteSwap();
2375 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2376 Depth + 1))
2377 return true;
2378 Known.One = Known2.One.byteSwap();
2379 Known.Zero = Known2.Zero.byteSwap();
2380 break;
2381 }
2382 case ISD::CTPOP: {
2383 // If only 1 bit is demanded, replace with PARITY as long as we're before
2384 // op legalization.
2385 // FIXME: Limit to scalars for now.
2386 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2387 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2388 Op.getOperand(0)));
2389
2390 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2391 break;
2392 }
2394 SDValue Op0 = Op.getOperand(0);
2395 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2396 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2397
2398 // If we only care about the highest bit, don't bother shifting right.
2399 if (DemandedBits.isSignMask()) {
2400 unsigned MinSignedBits =
2401 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2402 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2403 // However if the input is already sign extended we expect the sign
2404 // extension to be dropped altogether later and do not simplify.
2405 if (!AlreadySignExtended) {
2406 // Compute the correct shift amount type, which must be getShiftAmountTy
2407 // for scalar types after legalization.
2408 SDValue ShiftAmt =
2409 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2410 return TLO.CombineTo(Op,
2411 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2412 }
2413 }
2414
2415 // If none of the extended bits are demanded, eliminate the sextinreg.
2416 if (DemandedBits.getActiveBits() <= ExVTBits)
2417 return TLO.CombineTo(Op, Op0);
2418
2419 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2420
2421 // Since the sign extended bits are demanded, we know that the sign
2422 // bit is demanded.
2423 InputDemandedBits.setBit(ExVTBits - 1);
2424
2425 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2426 Depth + 1))
2427 return true;
2428
2429 // If the sign bit of the input is known set or clear, then we know the
2430 // top bits of the result.
2431
2432 // If the input sign bit is known zero, convert this into a zero extension.
2433 if (Known.Zero[ExVTBits - 1])
2434 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2435
2436 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2437 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2438 Known.One.setBitsFrom(ExVTBits);
2439 Known.Zero &= Mask;
2440 } else { // Input sign bit unknown
2441 Known.Zero &= Mask;
2442 Known.One &= Mask;
2443 }
2444 break;
2445 }
2446 case ISD::BUILD_PAIR: {
2447 EVT HalfVT = Op.getOperand(0).getValueType();
2448 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2449
2450 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2451 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2452
2453 KnownBits KnownLo, KnownHi;
2454
2455 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2456 return true;
2457
2458 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2459 return true;
2460
2461 Known = KnownHi.concat(KnownLo);
2462 break;
2463 }
2465 if (VT.isScalableVector())
2466 return false;
2467 [[fallthrough]];
2468 case ISD::ZERO_EXTEND: {
2469 SDValue Src = Op.getOperand(0);
2470 EVT SrcVT = Src.getValueType();
2471 unsigned InBits = SrcVT.getScalarSizeInBits();
2472 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2473 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2474
2475 // If none of the top bits are demanded, convert this into an any_extend.
2476 if (DemandedBits.getActiveBits() <= InBits) {
2477 // If we only need the non-extended bits of the bottom element
2478 // then we can just bitcast to the result.
2479 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2480 VT.getSizeInBits() == SrcVT.getSizeInBits())
2481 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2482
2483 unsigned Opc =
2485 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2486 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2487 }
2488
2489 APInt InDemandedBits = DemandedBits.trunc(InBits);
2490 APInt InDemandedElts = DemandedElts.zext(InElts);
2491 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2492 Depth + 1)) {
2493 Op->dropFlags(SDNodeFlags::NonNeg);
2494 return true;
2495 }
2496 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2497 Known = Known.zext(BitWidth);
2498
2499 // Attempt to avoid multi-use ops if we don't need anything from them.
2500 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2501 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2502 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2503 break;
2504 }
2506 if (VT.isScalableVector())
2507 return false;
2508 [[fallthrough]];
2509 case ISD::SIGN_EXTEND: {
2510 SDValue Src = Op.getOperand(0);
2511 EVT SrcVT = Src.getValueType();
2512 unsigned InBits = SrcVT.getScalarSizeInBits();
2513 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2514 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2515
2516 APInt InDemandedElts = DemandedElts.zext(InElts);
2517 APInt InDemandedBits = DemandedBits.trunc(InBits);
2518
2519 // Since some of the sign extended bits are demanded, we know that the sign
2520 // bit is demanded.
2521 InDemandedBits.setBit(InBits - 1);
2522
2523 // If none of the top bits are demanded, convert this into an any_extend.
2524 if (DemandedBits.getActiveBits() <= InBits) {
2525 // If we only need the non-extended bits of the bottom element
2526 // then we can just bitcast to the result.
2527 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2528 VT.getSizeInBits() == SrcVT.getSizeInBits())
2529 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2530
2531 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2533 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2534 InBits) {
2535 unsigned Opc =
2537 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2538 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2539 }
2540 }
2541
2542 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2543 Depth + 1))
2544 return true;
2545 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2546
2547 // If the sign bit is known one, the top bits match.
2548 Known = Known.sext(BitWidth);
2549
2550 // If the sign bit is known zero, convert this to a zero extend.
2551 if (Known.isNonNegative()) {
2552 unsigned Opc =
2554 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2555 SDNodeFlags Flags;
2556 if (!IsVecInReg)
2557 Flags |= SDNodeFlags::NonNeg;
2558 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2559 }
2560 }
2561
2562 // Attempt to avoid multi-use ops if we don't need anything from them.
2563 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2564 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2565 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2566 break;
2567 }
2569 if (VT.isScalableVector())
2570 return false;
2571 [[fallthrough]];
2572 case ISD::ANY_EXTEND: {
2573 SDValue Src = Op.getOperand(0);
2574 EVT SrcVT = Src.getValueType();
2575 unsigned InBits = SrcVT.getScalarSizeInBits();
2576 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2577 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2578
2579 // If we only need the bottom element then we can just bitcast.
2580 // TODO: Handle ANY_EXTEND?
2581 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2582 VT.getSizeInBits() == SrcVT.getSizeInBits())
2583 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2584
2585 APInt InDemandedBits = DemandedBits.trunc(InBits);
2586 APInt InDemandedElts = DemandedElts.zext(InElts);
2587 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2588 Depth + 1))
2589 return true;
2590 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2591 Known = Known.anyext(BitWidth);
2592
2593 // Attempt to avoid multi-use ops if we don't need anything from them.
2594 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2595 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2596 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2597 break;
2598 }
2599 case ISD::TRUNCATE: {
2600 SDValue Src = Op.getOperand(0);
2601
2602 // Simplify the input, using demanded bit information, and compute the known
2603 // zero/one bits live out.
2604 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2605 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2606 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2607 Depth + 1)) {
2608 // Disable the nsw and nuw flags. We can no longer guarantee that we
2609 // won't wrap after simplification.
2610 Op->dropFlags(SDNodeFlags::NoWrap);
2611 return true;
2612 }
2613 Known = Known.trunc(BitWidth);
2614
2615 // Attempt to avoid multi-use ops if we don't need anything from them.
2616 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2617 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2618 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2619
2620 // If the input is only used by this truncate, see if we can shrink it based
2621 // on the known demanded bits.
2622 switch (Src.getOpcode()) {
2623 default:
2624 break;
2625 case ISD::SRL:
2626 // Shrink SRL by a constant if none of the high bits shifted in are
2627 // demanded.
2628 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2629 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2630 // undesirable.
2631 break;
2632
2633 if (Src.getNode()->hasOneUse()) {
2634 if (isTruncateFree(Src, VT) &&
2635 !isTruncateFree(Src.getValueType(), VT)) {
2636 // If truncate is only free at trunc(srl), do not turn it into
2637 // srl(trunc). The check is done by first check the truncate is free
2638 // at Src's opcode(srl), then check the truncate is not done by
2639 // referencing sub-register. In test, if both trunc(srl) and
2640 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2641 // trunc(srl)'s trunc is free, trunc(srl) is better.
2642 break;
2643 }
2644
2645 std::optional<uint64_t> ShAmtC =
2646 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2647 if (!ShAmtC || *ShAmtC >= BitWidth)
2648 break;
2649 uint64_t ShVal = *ShAmtC;
2650
2651 APInt HighBits =
2652 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2653 HighBits.lshrInPlace(ShVal);
2654 HighBits = HighBits.trunc(BitWidth);
2655 if (!(HighBits & DemandedBits)) {
2656 // None of the shifted in bits are needed. Add a truncate of the
2657 // shift input, then shift it.
2658 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2659 SDValue NewTrunc =
2660 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2661 return TLO.CombineTo(
2662 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2663 }
2664 }
2665 break;
2666 }
2667
2668 break;
2669 }
2670 case ISD::AssertZext: {
2671 // AssertZext demands all of the high bits, plus any of the low bits
2672 // demanded by its users.
2673 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2675 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2676 TLO, Depth + 1))
2677 return true;
2678
2679 Known.Zero |= ~InMask;
2680 Known.One &= (~Known.Zero);
2681 break;
2682 }
2684 SDValue Src = Op.getOperand(0);
2685 SDValue Idx = Op.getOperand(1);
2686 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2687 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2688
2689 if (SrcEltCnt.isScalable())
2690 return false;
2691
2692 // Demand the bits from every vector element without a constant index.
2693 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2694 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2695 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2696 if (CIdx->getAPIntValue().ult(NumSrcElts))
2697 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2698
2699 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2700 // anything about the extended bits.
2701 APInt DemandedSrcBits = DemandedBits;
2702 if (BitWidth > EltBitWidth)
2703 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2704
2705 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2706 Depth + 1))
2707 return true;
2708
2709 // Attempt to avoid multi-use ops if we don't need anything from them.
2710 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2711 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2712 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2713 SDValue NewOp =
2714 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2715 return TLO.CombineTo(Op, NewOp);
2716 }
2717 }
2718
2719 Known = Known2;
2720 if (BitWidth > EltBitWidth)
2721 Known = Known.anyext(BitWidth);
2722 break;
2723 }
2724 case ISD::BITCAST: {
2725 if (VT.isScalableVector())
2726 return false;
2727 SDValue Src = Op.getOperand(0);
2728 EVT SrcVT = Src.getValueType();
2729 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2730
2731 // If this is an FP->Int bitcast and if the sign bit is the only
2732 // thing demanded, turn this into a FGETSIGN.
2733 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2734 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2735 SrcVT.isFloatingPoint()) {
2736 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2737 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2738 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2739 SrcVT != MVT::f128) {
2740 // Cannot eliminate/lower SHL for f128 yet.
2741 EVT Ty = OpVTLegal ? VT : MVT::i32;
2742 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2743 // place. We expect the SHL to be eliminated by other optimizations.
2744 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2745 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2746 if (!OpVTLegal && OpVTSizeInBits > 32)
2747 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2748 unsigned ShVal = Op.getValueSizeInBits() - 1;
2749 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2750 return TLO.CombineTo(Op,
2751 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2752 }
2753 }
2754
2755 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2756 // Demand the elt/bit if any of the original elts/bits are demanded.
2757 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2758 unsigned Scale = BitWidth / NumSrcEltBits;
2759 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2760 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2761 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2762 for (unsigned i = 0; i != Scale; ++i) {
2763 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2764 unsigned BitOffset = EltOffset * NumSrcEltBits;
2765 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2766 if (!Sub.isZero()) {
2767 DemandedSrcBits |= Sub;
2768 for (unsigned j = 0; j != NumElts; ++j)
2769 if (DemandedElts[j])
2770 DemandedSrcElts.setBit((j * Scale) + i);
2771 }
2772 }
2773
2774 APInt KnownSrcUndef, KnownSrcZero;
2775 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2776 KnownSrcZero, TLO, Depth + 1))
2777 return true;
2778
2779 KnownBits KnownSrcBits;
2780 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2781 KnownSrcBits, TLO, Depth + 1))
2782 return true;
2783 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2784 // TODO - bigendian once we have test coverage.
2785 unsigned Scale = NumSrcEltBits / BitWidth;
2786 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2787 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2788 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2789 for (unsigned i = 0; i != NumElts; ++i)
2790 if (DemandedElts[i]) {
2791 unsigned Offset = (i % Scale) * BitWidth;
2792 DemandedSrcBits.insertBits(DemandedBits, Offset);
2793 DemandedSrcElts.setBit(i / Scale);
2794 }
2795
2796 if (SrcVT.isVector()) {
2797 APInt KnownSrcUndef, KnownSrcZero;
2798 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2799 KnownSrcZero, TLO, Depth + 1))
2800 return true;
2801 }
2802
2803 KnownBits KnownSrcBits;
2804 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2805 KnownSrcBits, TLO, Depth + 1))
2806 return true;
2807
2808 // Attempt to avoid multi-use ops if we don't need anything from them.
2809 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2810 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2811 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2812 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2813 return TLO.CombineTo(Op, NewOp);
2814 }
2815 }
2816 }
2817
2818 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2819 // recursive call where Known may be useful to the caller.
2820 if (Depth > 0) {
2821 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2822 return false;
2823 }
2824 break;
2825 }
2826 case ISD::MUL:
2827 if (DemandedBits.isPowerOf2()) {
2828 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2829 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2830 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2831 unsigned CTZ = DemandedBits.countr_zero();
2832 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2833 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2834 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2835 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2836 return TLO.CombineTo(Op, Shl);
2837 }
2838 }
2839 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2840 // X * X is odd iff X is odd.
2841 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2842 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2843 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2844 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2845 return TLO.CombineTo(Op, And1);
2846 }
2847 [[fallthrough]];
2848 case ISD::ADD:
2849 case ISD::SUB: {
2850 // Add, Sub, and Mul don't demand any bits in positions beyond that
2851 // of the highest bit demanded of them.
2852 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2853 SDNodeFlags Flags = Op.getNode()->getFlags();
2854 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2855 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2856 KnownBits KnownOp0, KnownOp1;
2857 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2858 const KnownBits &KnownRHS) {
2859 if (Op.getOpcode() == ISD::MUL)
2860 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2861 return Demanded;
2862 };
2863 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2864 Depth + 1) ||
2865 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2866 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2867 // See if the operation should be performed at a smaller bit width.
2868 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2869 // Disable the nsw and nuw flags. We can no longer guarantee that we
2870 // won't wrap after simplification.
2871 Op->dropFlags(SDNodeFlags::NoWrap);
2872 return true;
2873 }
2874
2875 // neg x with only low bit demanded is simply x.
2876 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2877 isNullConstant(Op0))
2878 return TLO.CombineTo(Op, Op1);
2879
2880 // Attempt to avoid multi-use ops if we don't need anything from them.
2881 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2882 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2883 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2884 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2885 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2886 if (DemandedOp0 || DemandedOp1) {
2887 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2888 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2889 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2890 Flags & ~SDNodeFlags::NoWrap);
2891 return TLO.CombineTo(Op, NewOp);
2892 }
2893 }
2894
2895 // If we have a constant operand, we may be able to turn it into -1 if we
2896 // do not demand the high bits. This can make the constant smaller to
2897 // encode, allow more general folding, or match specialized instruction
2898 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2899 // is probably not useful (and could be detrimental).
2901 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2902 if (C && !C->isAllOnes() && !C->isOne() &&
2903 (C->getAPIntValue() | HighMask).isAllOnes()) {
2904 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2905 // Disable the nsw and nuw flags. We can no longer guarantee that we
2906 // won't wrap after simplification.
2907 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2908 Flags & ~SDNodeFlags::NoWrap);
2909 return TLO.CombineTo(Op, NewOp);
2910 }
2911
2912 // Match a multiply with a disguised negated-power-of-2 and convert to a
2913 // an equivalent shift-left amount.
2914 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2915 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2916 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2917 return 0;
2918
2919 // Don't touch opaque constants. Also, ignore zero and power-of-2
2920 // multiplies. Those will get folded later.
2921 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2922 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2923 !MulC->getAPIntValue().isPowerOf2()) {
2924 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2925 if (UnmaskedC.isNegatedPowerOf2())
2926 return (-UnmaskedC).logBase2();
2927 }
2928 return 0;
2929 };
2930
2931 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2932 unsigned ShlAmt) {
2933 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2934 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2935 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2936 return TLO.CombineTo(Op, Res);
2937 };
2938
2940 if (Op.getOpcode() == ISD::ADD) {
2941 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2942 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2943 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2944 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2945 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2946 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2947 }
2948 if (Op.getOpcode() == ISD::SUB) {
2949 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2950 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2951 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2952 }
2953 }
2954
2955 if (Op.getOpcode() == ISD::MUL) {
2956 Known = KnownBits::mul(KnownOp0, KnownOp1);
2957 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2959 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2960 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2961 }
2962 break;
2963 }
2964 default:
2965 // We also ask the target about intrinsics (which could be specific to it).
2966 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2967 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2968 // TODO: Probably okay to remove after audit; here to reduce change size
2969 // in initial enablement patch for scalable vectors
2970 if (Op.getValueType().isScalableVector())
2971 break;
2972 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2973 Known, TLO, Depth))
2974 return true;
2975 break;
2976 }
2977
2978 // Just use computeKnownBits to compute output bits.
2979 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2980 break;
2981 }
2982
2983 // If we know the value of all of the demanded bits, return this as a
2984 // constant.
2985 if (!isTargetCanonicalConstantNode(Op) &&
2986 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2987 // Avoid folding to a constant if any OpaqueConstant is involved.
2988 const SDNode *N = Op.getNode();
2989 for (SDNode *Op :
2991 if (auto *C = dyn_cast<ConstantSDNode>(Op))
2992 if (C->isOpaque())
2993 return false;
2994 }
2995 if (VT.isInteger())
2996 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2997 if (VT.isFloatingPoint())
2998 return TLO.CombineTo(
2999 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3000 dl, VT));
3001 }
3002
3003 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3004 // Try again just for the original demanded elts.
3005 // Ensure we do this AFTER constant folding above.
3006 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3007 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3008
3009 return false;
3010}
3011
3013 const APInt &DemandedElts,
3014 DAGCombinerInfo &DCI) const {
3015 SelectionDAG &DAG = DCI.DAG;
3016 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3017 !DCI.isBeforeLegalizeOps());
3018
3019 APInt KnownUndef, KnownZero;
3020 bool Simplified =
3021 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3022 if (Simplified) {
3023 DCI.AddToWorklist(Op.getNode());
3024 DCI.CommitTargetLoweringOpt(TLO);
3025 }
3026
3027 return Simplified;
3028}
3029
3030/// Given a vector binary operation and known undefined elements for each input
3031/// operand, compute whether each element of the output is undefined.
3033 const APInt &UndefOp0,
3034 const APInt &UndefOp1) {
3035 EVT VT = BO.getValueType();
3037 "Vector binop only");
3038
3039 EVT EltVT = VT.getVectorElementType();
3040 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3041 assert(UndefOp0.getBitWidth() == NumElts &&
3042 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3043
3044 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3045 const APInt &UndefVals) {
3046 if (UndefVals[Index])
3047 return DAG.getUNDEF(EltVT);
3048
3049 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3050 // Try hard to make sure that the getNode() call is not creating temporary
3051 // nodes. Ignore opaque integers because they do not constant fold.
3052 SDValue Elt = BV->getOperand(Index);
3053 auto *C = dyn_cast<ConstantSDNode>(Elt);
3054 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3055 return Elt;
3056 }
3057
3058 return SDValue();
3059 };
3060
3061 APInt KnownUndef = APInt::getZero(NumElts);
3062 for (unsigned i = 0; i != NumElts; ++i) {
3063 // If both inputs for this element are either constant or undef and match
3064 // the element type, compute the constant/undef result for this element of
3065 // the vector.
3066 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3067 // not handle FP constants. The code within getNode() should be refactored
3068 // to avoid the danger of creating a bogus temporary node here.
3069 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3070 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3071 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3072 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3073 KnownUndef.setBit(i);
3074 }
3075 return KnownUndef;
3076}
3077
3079 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3080 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3081 bool AssumeSingleUse) const {
3082 EVT VT = Op.getValueType();
3083 unsigned Opcode = Op.getOpcode();
3084 APInt DemandedElts = OriginalDemandedElts;
3085 unsigned NumElts = DemandedElts.getBitWidth();
3086 assert(VT.isVector() && "Expected vector op");
3087
3088 KnownUndef = KnownZero = APInt::getZero(NumElts);
3089
3090 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3091 return false;
3092
3093 // TODO: For now we assume we know nothing about scalable vectors.
3094 if (VT.isScalableVector())
3095 return false;
3096
3097 assert(VT.getVectorNumElements() == NumElts &&
3098 "Mask size mismatches value type element count!");
3099
3100 // Undef operand.
3101 if (Op.isUndef()) {
3102 KnownUndef.setAllBits();
3103 return false;
3104 }
3105
3106 // If Op has other users, assume that all elements are needed.
3107 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3108 DemandedElts.setAllBits();
3109
3110 // Not demanding any elements from Op.
3111 if (DemandedElts == 0) {
3112 KnownUndef.setAllBits();
3113 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3114 }
3115
3116 // Limit search depth.
3118 return false;
3119
3120 SDLoc DL(Op);
3121 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3122 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3123
3124 // Helper for demanding the specified elements and all the bits of both binary
3125 // operands.
3126 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3127 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3128 TLO.DAG, Depth + 1);
3129 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3130 TLO.DAG, Depth + 1);
3131 if (NewOp0 || NewOp1) {
3132 SDValue NewOp =
3133 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3134 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3135 return TLO.CombineTo(Op, NewOp);
3136 }
3137 return false;
3138 };
3139
3140 switch (Opcode) {
3141 case ISD::SCALAR_TO_VECTOR: {
3142 if (!DemandedElts[0]) {
3143 KnownUndef.setAllBits();
3144 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3145 }
3146 SDValue ScalarSrc = Op.getOperand(0);
3147 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3148 SDValue Src = ScalarSrc.getOperand(0);
3149 SDValue Idx = ScalarSrc.getOperand(1);
3150 EVT SrcVT = Src.getValueType();
3151
3152 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3153
3154 if (SrcEltCnt.isScalable())
3155 return false;
3156
3157 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3158 if (isNullConstant(Idx)) {
3159 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3160 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3161 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3162 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3163 TLO, Depth + 1))
3164 return true;
3165 }
3166 }
3167 KnownUndef.setHighBits(NumElts - 1);
3168 break;
3169 }
3170 case ISD::BITCAST: {
3171 SDValue Src = Op.getOperand(0);
3172 EVT SrcVT = Src.getValueType();
3173
3174 // We only handle vectors here.
3175 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3176 if (!SrcVT.isVector())
3177 break;
3178
3179 // Fast handling of 'identity' bitcasts.
3180 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3181 if (NumSrcElts == NumElts)
3182 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3183 KnownZero, TLO, Depth + 1);
3184
3185 APInt SrcDemandedElts, SrcZero, SrcUndef;
3186
3187 // Bitcast from 'large element' src vector to 'small element' vector, we
3188 // must demand a source element if any DemandedElt maps to it.
3189 if ((NumElts % NumSrcElts) == 0) {
3190 unsigned Scale = NumElts / NumSrcElts;
3191 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3192 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3193 TLO, Depth + 1))
3194 return true;
3195
3196 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3197 // of the large element.
3198 // TODO - bigendian once we have test coverage.
3199 if (IsLE) {
3200 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3201 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3202 for (unsigned i = 0; i != NumElts; ++i)
3203 if (DemandedElts[i]) {
3204 unsigned Ofs = (i % Scale) * EltSizeInBits;
3205 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3206 }
3207
3208 KnownBits Known;
3209 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3210 TLO, Depth + 1))
3211 return true;
3212
3213 // The bitcast has split each wide element into a number of
3214 // narrow subelements. We have just computed the Known bits
3215 // for wide elements. See if element splitting results in
3216 // some subelements being zero. Only for demanded elements!
3217 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3218 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3219 .isAllOnes())
3220 continue;
3221 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3222 unsigned Elt = Scale * SrcElt + SubElt;
3223 if (DemandedElts[Elt])
3224 KnownZero.setBit(Elt);
3225 }
3226 }
3227 }
3228
3229 // If the src element is zero/undef then all the output elements will be -
3230 // only demanded elements are guaranteed to be correct.
3231 for (unsigned i = 0; i != NumSrcElts; ++i) {
3232 if (SrcDemandedElts[i]) {
3233 if (SrcZero[i])
3234 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3235 if (SrcUndef[i])
3236 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3237 }
3238 }
3239 }
3240
3241 // Bitcast from 'small element' src vector to 'large element' vector, we
3242 // demand all smaller source elements covered by the larger demanded element
3243 // of this vector.
3244 if ((NumSrcElts % NumElts) == 0) {
3245 unsigned Scale = NumSrcElts / NumElts;
3246 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3247 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3248 TLO, Depth + 1))
3249 return true;
3250
3251 // If all the src elements covering an output element are zero/undef, then
3252 // the output element will be as well, assuming it was demanded.
3253 for (unsigned i = 0; i != NumElts; ++i) {
3254 if (DemandedElts[i]) {
3255 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3256 KnownZero.setBit(i);
3257 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3258 KnownUndef.setBit(i);
3259 }
3260 }
3261 }
3262 break;
3263 }
3264 case ISD::FREEZE: {
3265 SDValue N0 = Op.getOperand(0);
3266 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3267 /*PoisonOnly=*/false))
3268 return TLO.CombineTo(Op, N0);
3269
3270 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3271 // freeze(op(x, ...)) -> op(freeze(x), ...).
3272 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3273 return TLO.CombineTo(
3275 TLO.DAG.getFreeze(N0.getOperand(0))));
3276 break;
3277 }
3278 case ISD::BUILD_VECTOR: {
3279 // Check all elements and simplify any unused elements with UNDEF.
3280 if (!DemandedElts.isAllOnes()) {
3281 // Don't simplify BROADCASTS.
3282 if (llvm::any_of(Op->op_values(),
3283 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3284 SmallVector<SDValue, 32> Ops(Op->ops());
3285 bool Updated = false;
3286 for (unsigned i = 0; i != NumElts; ++i) {
3287 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3288 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3289 KnownUndef.setBit(i);
3290 Updated = true;
3291 }
3292 }
3293 if (Updated)
3294 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3295 }
3296 }
3297 for (unsigned i = 0; i != NumElts; ++i) {
3298 SDValue SrcOp = Op.getOperand(i);
3299 if (SrcOp.isUndef()) {
3300 KnownUndef.setBit(i);
3301 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3303 KnownZero.setBit(i);
3304 }
3305 }
3306 break;
3307 }
3308 case ISD::CONCAT_VECTORS: {
3309 EVT SubVT = Op.getOperand(0).getValueType();
3310 unsigned NumSubVecs = Op.getNumOperands();
3311 unsigned NumSubElts = SubVT.getVectorNumElements();
3312 for (unsigned i = 0; i != NumSubVecs; ++i) {
3313 SDValue SubOp = Op.getOperand(i);
3314 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3315 APInt SubUndef, SubZero;
3316 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3317 Depth + 1))
3318 return true;
3319 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3320 KnownZero.insertBits(SubZero, i * NumSubElts);
3321 }
3322
3323 // Attempt to avoid multi-use ops if we don't need anything from them.
3324 if (!DemandedElts.isAllOnes()) {
3325 bool FoundNewSub = false;
3326 SmallVector<SDValue, 2> DemandedSubOps;
3327 for (unsigned i = 0; i != NumSubVecs; ++i) {
3328 SDValue SubOp = Op.getOperand(i);
3329 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3330 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3331 SubOp, SubElts, TLO.DAG, Depth + 1);
3332 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3333 FoundNewSub = NewSubOp ? true : FoundNewSub;
3334 }
3335 if (FoundNewSub) {
3336 SDValue NewOp =
3337 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3338 return TLO.CombineTo(Op, NewOp);
3339 }
3340 }
3341 break;
3342 }
3343 case ISD::INSERT_SUBVECTOR: {
3344 // Demand any elements from the subvector and the remainder from the src its
3345 // inserted into.
3346 SDValue Src = Op.getOperand(0);
3347 SDValue Sub = Op.getOperand(1);
3348 uint64_t Idx = Op.getConstantOperandVal(2);
3349 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3350 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3351 APInt DemandedSrcElts = DemandedElts;
3352 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3353
3354 APInt SubUndef, SubZero;
3355 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3356 Depth + 1))
3357 return true;
3358
3359 // If none of the src operand elements are demanded, replace it with undef.
3360 if (!DemandedSrcElts && !Src.isUndef())
3361 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3362 TLO.DAG.getUNDEF(VT), Sub,
3363 Op.getOperand(2)));
3364
3365 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3366 TLO, Depth + 1))
3367 return true;
3368 KnownUndef.insertBits(SubUndef, Idx);
3369 KnownZero.insertBits(SubZero, Idx);
3370
3371 // Attempt to avoid multi-use ops if we don't need anything from them.
3372 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3373 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3374 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3375 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3376 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3377 if (NewSrc || NewSub) {
3378 NewSrc = NewSrc ? NewSrc : Src;
3379 NewSub = NewSub ? NewSub : Sub;
3380 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3381 NewSub, Op.getOperand(2));
3382 return TLO.CombineTo(Op, NewOp);
3383 }
3384 }
3385 break;
3386 }
3388 // Offset the demanded elts by the subvector index.
3389 SDValue Src = Op.getOperand(0);
3390 if (Src.getValueType().isScalableVector())
3391 break;
3392 uint64_t Idx = Op.getConstantOperandVal(1);
3393 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3394 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3395
3396 APInt SrcUndef, SrcZero;
3397 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3398 Depth + 1))
3399 return true;
3400 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3401 KnownZero = SrcZero.extractBits(NumElts, Idx);
3402
3403 // Attempt to avoid multi-use ops if we don't need anything from them.
3404 if (!DemandedElts.isAllOnes()) {
3405 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3406 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3407 if (NewSrc) {
3408 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3409 Op.getOperand(1));
3410 return TLO.CombineTo(Op, NewOp);
3411 }
3412 }
3413 break;
3414 }
3416 SDValue Vec = Op.getOperand(0);
3417 SDValue Scl = Op.getOperand(1);
3418 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3419
3420 // For a legal, constant insertion index, if we don't need this insertion
3421 // then strip it, else remove it from the demanded elts.
3422 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3423 unsigned Idx = CIdx->getZExtValue();
3424 if (!DemandedElts[Idx])
3425 return TLO.CombineTo(Op, Vec);
3426
3427 APInt DemandedVecElts(DemandedElts);
3428 DemandedVecElts.clearBit(Idx);
3429 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3430 KnownZero, TLO, Depth + 1))
3431 return true;
3432
3433 KnownUndef.setBitVal(Idx, Scl.isUndef());
3434
3435 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3436 break;
3437 }
3438
3439 APInt VecUndef, VecZero;
3440 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3441 Depth + 1))
3442 return true;
3443 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3444 break;
3445 }
3446 case ISD::VSELECT: {
3447 SDValue Sel = Op.getOperand(0);
3448 SDValue LHS = Op.getOperand(1);
3449 SDValue RHS = Op.getOperand(2);
3450
3451 // Try to transform the select condition based on the current demanded
3452 // elements.
3453 APInt UndefSel, ZeroSel;
3454 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3455 Depth + 1))
3456 return true;
3457
3458 // See if we can simplify either vselect operand.
3459 APInt DemandedLHS(DemandedElts);
3460 APInt DemandedRHS(DemandedElts);
3461 APInt UndefLHS, ZeroLHS;
3462 APInt UndefRHS, ZeroRHS;
3463 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3464 Depth + 1))
3465 return true;
3466 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3467 Depth + 1))
3468 return true;
3469
3470 KnownUndef = UndefLHS & UndefRHS;
3471 KnownZero = ZeroLHS & ZeroRHS;
3472
3473 // If we know that the selected element is always zero, we don't need the
3474 // select value element.
3475 APInt DemandedSel = DemandedElts & ~KnownZero;
3476 if (DemandedSel != DemandedElts)
3477 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3478 Depth + 1))
3479 return true;
3480
3481 break;
3482 }
3483 case ISD::VECTOR_SHUFFLE: {
3484 SDValue LHS = Op.getOperand(0);
3485 SDValue RHS = Op.getOperand(1);
3486 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3487
3488 // Collect demanded elements from shuffle operands..
3489 APInt DemandedLHS(NumElts, 0);
3490 APInt DemandedRHS(NumElts, 0);
3491 for (unsigned i = 0; i != NumElts; ++i) {
3492 int M = ShuffleMask[i];
3493 if (M < 0 || !DemandedElts[i])
3494 continue;
3495 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3496 if (M < (int)NumElts)
3497 DemandedLHS.setBit(M);
3498 else
3499 DemandedRHS.setBit(M - NumElts);
3500 }
3501
3502 // See if we can simplify either shuffle operand.
3503 APInt UndefLHS, ZeroLHS;
3504 APInt UndefRHS, ZeroRHS;
3505 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3506 Depth + 1))
3507 return true;
3508 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3509 Depth + 1))
3510 return true;
3511
3512 // Simplify mask using undef elements from LHS/RHS.
3513 bool Updated = false;
3514 bool IdentityLHS = true, IdentityRHS = true;
3515 SmallVector<int, 32> NewMask(ShuffleMask);
3516 for (unsigned i = 0; i != NumElts; ++i) {
3517 int &M = NewMask[i];
3518 if (M < 0)
3519 continue;
3520 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3521 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3522 Updated = true;
3523 M = -1;
3524 }
3525 IdentityLHS &= (M < 0) || (M == (int)i);
3526 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3527 }
3528
3529 // Update legal shuffle masks based on demanded elements if it won't reduce
3530 // to Identity which can cause premature removal of the shuffle mask.
3531 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3532 SDValue LegalShuffle =
3533 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3534 if (LegalShuffle)
3535 return TLO.CombineTo(Op, LegalShuffle);
3536 }
3537
3538 // Propagate undef/zero elements from LHS/RHS.
3539 for (unsigned i = 0; i != NumElts; ++i) {
3540 int M = ShuffleMask[i];
3541 if (M < 0) {
3542 KnownUndef.setBit(i);
3543 } else if (M < (int)NumElts) {
3544 if (UndefLHS[M])
3545 KnownUndef.setBit(i);
3546 if (ZeroLHS[M])
3547 KnownZero.setBit(i);
3548 } else {
3549 if (UndefRHS[M - NumElts])
3550 KnownUndef.setBit(i);
3551 if (ZeroRHS[M - NumElts])
3552 KnownZero.setBit(i);
3553 }
3554 }
3555 break;
3556 }
3560 APInt SrcUndef, SrcZero;
3561 SDValue Src = Op.getOperand(0);
3562 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3563 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3564 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3565 Depth + 1))
3566 return true;
3567 KnownZero = SrcZero.zextOrTrunc(NumElts);
3568 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3569
3570 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3571 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3572 DemandedSrcElts == 1) {
3573 // aext - if we just need the bottom element then we can bitcast.
3574 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3575 }
3576
3577 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3578 // zext(undef) upper bits are guaranteed to be zero.
3579 if (DemandedElts.isSubsetOf(KnownUndef))
3580 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3581 KnownUndef.clearAllBits();
3582
3583 // zext - if we just need the bottom element then we can mask:
3584 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3585 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3586 Op->isOnlyUserOf(Src.getNode()) &&
3587 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3588 SDLoc DL(Op);
3589 EVT SrcVT = Src.getValueType();
3590 EVT SrcSVT = SrcVT.getScalarType();
3591 SmallVector<SDValue> MaskElts;
3592 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3593 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3594 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3595 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3596 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3597 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3598 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3599 }
3600 }
3601 }
3602 break;
3603 }
3604
3605 // TODO: There are more binop opcodes that could be handled here - MIN,
3606 // MAX, saturated math, etc.
3607 case ISD::ADD: {
3608 SDValue Op0 = Op.getOperand(0);
3609 SDValue Op1 = Op.getOperand(1);
3610 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3611 APInt UndefLHS, ZeroLHS;
3612 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3613 Depth + 1, /*AssumeSingleUse*/ true))
3614 return true;
3615 }
3616 [[fallthrough]];
3617 }
3618 case ISD::AVGCEILS:
3619 case ISD::AVGCEILU:
3620 case ISD::AVGFLOORS:
3621 case ISD::AVGFLOORU:
3622 case ISD::OR:
3623 case ISD::XOR:
3624 case ISD::SUB:
3625 case ISD::FADD:
3626 case ISD::FSUB:
3627 case ISD::FMUL:
3628 case ISD::FDIV:
3629 case ISD::FREM: {
3630 SDValue Op0 = Op.getOperand(0);
3631 SDValue Op1 = Op.getOperand(1);
3632
3633 APInt UndefRHS, ZeroRHS;
3634 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3635 Depth + 1))
3636 return true;
3637 APInt UndefLHS, ZeroLHS;
3638 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3639 Depth + 1))
3640 return true;
3641
3642 KnownZero = ZeroLHS & ZeroRHS;
3643 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3644
3645 // Attempt to avoid multi-use ops if we don't need anything from them.
3646 // TODO - use KnownUndef to relax the demandedelts?
3647 if (!DemandedElts.isAllOnes())
3648 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3649 return true;
3650 break;
3651 }
3652 case ISD::SHL:
3653 case ISD::SRL:
3654 case ISD::SRA:
3655 case ISD::ROTL:
3656 case ISD::ROTR: {
3657 SDValue Op0 = Op.getOperand(0);
3658 SDValue Op1 = Op.getOperand(1);
3659
3660 APInt UndefRHS, ZeroRHS;
3661 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3662 Depth + 1))
3663 return true;
3664 APInt UndefLHS, ZeroLHS;
3665 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3666 Depth + 1))
3667 return true;
3668
3669 KnownZero = ZeroLHS;
3670 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3671
3672 // Attempt to avoid multi-use ops if we don't need anything from them.
3673 // TODO - use KnownUndef to relax the demandedelts?
3674 if (!DemandedElts.isAllOnes())
3675 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3676 return true;
3677 break;
3678 }
3679 case ISD::MUL:
3680 case ISD::MULHU:
3681 case ISD::MULHS:
3682 case ISD::AND: {
3683 SDValue Op0 = Op.getOperand(0);
3684 SDValue Op1 = Op.getOperand(1);
3685
3686 APInt SrcUndef, SrcZero;
3687 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3688 Depth + 1))
3689 return true;
3690 // If we know that a demanded element was zero in Op1 we don't need to
3691 // demand it in Op0 - its guaranteed to be zero.
3692 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3693 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3694 TLO, Depth + 1))
3695 return true;
3696
3697 KnownUndef &= DemandedElts0;
3698 KnownZero &= DemandedElts0;
3699
3700 // If every element pair has a zero/undef then just fold to zero.
3701 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3702 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3703 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3704 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3705
3706 // If either side has a zero element, then the result element is zero, even
3707 // if the other is an UNDEF.
3708 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3709 // and then handle 'and' nodes with the rest of the binop opcodes.
3710 KnownZero |= SrcZero;
3711 KnownUndef &= SrcUndef;
3712 KnownUndef &= ~KnownZero;
3713
3714 // Attempt to avoid multi-use ops if we don't need anything from them.
3715 if (!DemandedElts.isAllOnes())
3716 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3717 return true;
3718 break;
3719 }
3720 case ISD::TRUNCATE:
3721 case ISD::SIGN_EXTEND:
3722 case ISD::ZERO_EXTEND:
3723 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3724 KnownZero, TLO, Depth + 1))
3725 return true;
3726
3727 if (!DemandedElts.isAllOnes())
3728 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3729 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3730 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3731
3732 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3733 // zext(undef) upper bits are guaranteed to be zero.
3734 if (DemandedElts.isSubsetOf(KnownUndef))
3735 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3736 KnownUndef.clearAllBits();
3737 }
3738 break;
3739 case ISD::SINT_TO_FP:
3740 case ISD::UINT_TO_FP:
3741 case ISD::FP_TO_SINT:
3742 case ISD::FP_TO_UINT:
3743 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3744 KnownZero, TLO, Depth + 1))
3745 return true;
3746 // Don't fall through to generic undef -> undef handling.
3747 return false;
3748 default: {
3749 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3750 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3751 KnownZero, TLO, Depth))
3752 return true;
3753 } else {
3754 KnownBits Known;
3755 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3756 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3757 TLO, Depth, AssumeSingleUse))
3758 return true;
3759 }
3760 break;
3761 }
3762 }
3763 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3764
3765 // Constant fold all undef cases.
3766 // TODO: Handle zero cases as well.
3767 if (DemandedElts.isSubsetOf(KnownUndef))
3768 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3769
3770 return false;
3771}
3772
3773/// Determine which of the bits specified in Mask are known to be either zero or
3774/// one and return them in the Known.
3776 KnownBits &Known,
3777 const APInt &DemandedElts,
3778 const SelectionDAG &DAG,
3779 unsigned Depth) const {
3780 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3781 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3782 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3783 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3784 "Should use MaskedValueIsZero if you don't know whether Op"
3785 " is a target node!");
3786 Known.resetAll();
3787}
3788
3791 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3792 unsigned Depth) const {
3793 Known.resetAll();
3794}
3795
3797 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3798 // The low bits are known zero if the pointer is aligned.
3799 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3800}
3801
3804 unsigned Depth) const {
3805 return Align(1);
3806}
3807
3808/// This method can be implemented by targets that want to expose additional
3809/// information about sign bits to the DAG Combiner.
3811 const APInt &,
3812 const SelectionDAG &,
3813 unsigned Depth) const {
3814 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3815 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3816 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3817 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3818 "Should use ComputeNumSignBits if you don't know whether Op"
3819 " is a target node!");
3820 return 1;
3821}
3822
3824 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3825 const MachineRegisterInfo &MRI, unsigned Depth) const {
3826 return 1;
3827}
3828
3830 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3831 TargetLoweringOpt &TLO, unsigned Depth) const {
3832 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3833 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3834 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3835 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3836 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3837 " is a target node!");
3838 return false;
3839}
3840
3842 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3843 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3844 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3845 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3846 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3847 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3848 "Should use SimplifyDemandedBits if you don't know whether Op"
3849 " is a target node!");
3850 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3851 return false;
3852}
3853
3855 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3856 SelectionDAG &DAG, unsigned Depth) const {
3857 assert(
3858 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3859 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3860 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3861 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3862 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3863 " is a target node!");
3864 return SDValue();
3865}
3866
3867SDValue
3870 SelectionDAG &DAG) const {
3871 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3872 if (!LegalMask) {
3873 std::swap(N0, N1);
3875 LegalMask = isShuffleMaskLegal(Mask, VT);
3876 }
3877
3878 if (!LegalMask)
3879 return SDValue();
3880
3881 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3882}
3883
3885 return nullptr;
3886}
3887
3889 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3890 bool PoisonOnly, unsigned Depth) const {
3891 assert(
3892 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3893 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3894 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3895 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3896 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3897 " is a target node!");
3898
3899 // If Op can't create undef/poison and none of its operands are undef/poison
3900 // then Op is never undef/poison.
3901 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3902 /*ConsiderFlags*/ true, Depth) &&
3903 all_of(Op->ops(), [&](SDValue V) {
3904 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3905 Depth + 1);
3906 });
3907}
3908
3910 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3911 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3912 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3913 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3914 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3915 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3916 "Should use canCreateUndefOrPoison if you don't know whether Op"
3917 " is a target node!");
3918 // Be conservative and return true.
3919 return true;
3920}
3921
3923 const SelectionDAG &DAG,
3924 bool SNaN,
3925 unsigned Depth) const {
3926 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3927 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3928 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3929 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3930 "Should use isKnownNeverNaN if you don't know whether Op"
3931 " is a target node!");
3932 return false;
3933}
3934
3936 const APInt &DemandedElts,
3937 APInt &UndefElts,
3938 const SelectionDAG &DAG,
3939 unsigned Depth) const {
3940 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3941 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3942 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3943 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3944 "Should use isSplatValue if you don't know whether Op"
3945 " is a target node!");
3946 return false;
3947}
3948
3949// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3950// work with truncating build vectors and vectors with elements of less than
3951// 8 bits.
3953 if (!N)
3954 return false;
3955
3956 unsigned EltWidth;
3957 APInt CVal;
3958 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3959 /*AllowTruncation=*/true)) {
3960 CVal = CN->getAPIntValue();
3961 EltWidth = N.getValueType().getScalarSizeInBits();
3962 } else
3963 return false;
3964
3965 // If this is a truncating splat, truncate the splat value.
3966 // Otherwise, we may fail to match the expected values below.
3967 if (EltWidth < CVal.getBitWidth())
3968 CVal = CVal.trunc(EltWidth);
3969
3970 switch (getBooleanContents(N.getValueType())) {
3972 return CVal[0];
3974 return CVal.isOne();
3976 return CVal.isAllOnes();
3977 }
3978
3979 llvm_unreachable("Invalid boolean contents");
3980}
3981
3983 if (!N)
3984 return false;
3985
3986 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3987 if (!CN) {
3988 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3989 if (!BV)
3990 return false;
3991
3992 // Only interested in constant splats, we don't care about undef
3993 // elements in identifying boolean constants and getConstantSplatNode
3994 // returns NULL if all ops are undef;
3995 CN = BV->getConstantSplatNode();
3996 if (!CN)
3997 return false;
3998 }
3999
4000 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4001 return !CN->getAPIntValue()[0];
4002
4003 return CN->isZero();
4004}
4005
4007 bool SExt) const {
4008 if (VT == MVT::i1)
4009 return N->isOne();
4010
4012 switch (Cnt) {
4014 // An extended value of 1 is always true, unless its original type is i1,
4015 // in which case it will be sign extended to -1.
4016 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4019 return N->isAllOnes() && SExt;
4020 }
4021 llvm_unreachable("Unexpected enumeration.");
4022}
4023
4024/// This helper function of SimplifySetCC tries to optimize the comparison when
4025/// either operand of the SetCC node is a bitwise-and instruction.
4026SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4027 ISD::CondCode Cond, const SDLoc &DL,
4028 DAGCombinerInfo &DCI) const {
4029 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4030 std::swap(N0, N1);
4031
4032 SelectionDAG &DAG = DCI.DAG;
4033 EVT OpVT = N0.getValueType();
4034 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4035 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4036 return SDValue();
4037
4038 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4039 // iff everything but LSB is known zero:
4040 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4043 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4044 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4045 if (DAG.MaskedValueIsZero(N0, UpperBits))
4046 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4047 }
4048
4049 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4050 // test in a narrow type that we can truncate to with no cost. Examples:
4051 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4052 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4053 // TODO: This conservatively checks for type legality on the source and
4054 // destination types. That may inhibit optimizations, but it also
4055 // allows setcc->shift transforms that may be more beneficial.
4056 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4057 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4058 isTypeLegal(OpVT) && N0.hasOneUse()) {
4059 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4060 AndC->getAPIntValue().getActiveBits());
4061 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4062 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4063 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4064 return DAG.getSetCC(DL, VT, Trunc, Zero,
4066 }
4067 }
4068
4069 // Match these patterns in any of their permutations:
4070 // (X & Y) == Y
4071 // (X & Y) != Y
4072 SDValue X, Y;
4073 if (N0.getOperand(0) == N1) {
4074 X = N0.getOperand(1);
4075 Y = N0.getOperand(0);
4076 } else if (N0.getOperand(1) == N1) {
4077 X = N0.getOperand(0);
4078 Y = N0.getOperand(1);
4079 } else {
4080 return SDValue();
4081 }
4082
4083 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4084 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4085 // its liable to create and infinite loop.
4086 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4087 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4089 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4090 // Note that where Y is variable and is known to have at most one bit set
4091 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4092 // equivalent when Y == 0.
4093 assert(OpVT.isInteger());
4095 if (DCI.isBeforeLegalizeOps() ||
4097 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4098 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4099 // If the target supports an 'and-not' or 'and-complement' logic operation,
4100 // try to use that to make a comparison operation more efficient.
4101 // But don't do this transform if the mask is a single bit because there are
4102 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4103 // 'rlwinm' on PPC).
4104
4105 // Bail out if the compare operand that we want to turn into a zero is
4106 // already a zero (otherwise, infinite loop).
4107 if (isNullConstant(Y))
4108 return SDValue();
4109
4110 // Transform this into: ~X & Y == 0.
4111 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4112 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4113 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4114 }
4115
4116 return SDValue();
4117}
4118
4119/// There are multiple IR patterns that could be checking whether certain
4120/// truncation of a signed number would be lossy or not. The pattern which is
4121/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4122/// We are looking for the following pattern: (KeptBits is a constant)
4123/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4124/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4125/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4126/// We will unfold it into the natural trunc+sext pattern:
4127/// ((%x << C) a>> C) dstcond %x
4128/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4129SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4130 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4131 const SDLoc &DL) const {
4132 // We must be comparing with a constant.
4133 ConstantSDNode *C1;
4134 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4135 return SDValue();
4136
4137 // N0 should be: add %x, (1 << (KeptBits-1))
4138 if (N0->getOpcode() != ISD::ADD)
4139 return SDValue();
4140
4141 // And we must be 'add'ing a constant.
4142 ConstantSDNode *C01;
4143 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4144 return SDValue();
4145
4146 SDValue X = N0->getOperand(0);
4147 EVT XVT = X.getValueType();
4148
4149 // Validate constants ...
4150
4151 APInt I1 = C1->getAPIntValue();
4152
4153 ISD::CondCode NewCond;
4154 if (Cond == ISD::CondCode::SETULT) {
4155 NewCond = ISD::CondCode::SETEQ;
4156 } else if (Cond == ISD::CondCode::SETULE) {
4157 NewCond = ISD::CondCode::SETEQ;
4158 // But need to 'canonicalize' the constant.
4159 I1 += 1;
4160 } else if (Cond == ISD::CondCode::SETUGT) {
4161 NewCond = ISD::CondCode::SETNE;
4162 // But need to 'canonicalize' the constant.
4163 I1 += 1;
4164 } else if (Cond == ISD::CondCode::SETUGE) {
4165 NewCond = ISD::CondCode::SETNE;
4166 } else
4167 return SDValue();
4168
4169 APInt I01 = C01->getAPIntValue();
4170
4171 auto checkConstants = [&I1, &I01]() -> bool {
4172 // Both of them must be power-of-two, and the constant from setcc is bigger.
4173 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4174 };
4175
4176 if (checkConstants()) {
4177 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4178 } else {
4179 // What if we invert constants? (and the target predicate)
4180 I1.negate();
4181 I01.negate();
4182 assert(XVT.isInteger());
4183 NewCond = getSetCCInverse(NewCond, XVT);
4184 if (!checkConstants())
4185 return SDValue();
4186 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4187 }
4188
4189 // They are power-of-two, so which bit is set?
4190 const unsigned KeptBits = I1.logBase2();
4191 const unsigned KeptBitsMinusOne = I01.logBase2();
4192
4193 // Magic!
4194 if (KeptBits != (KeptBitsMinusOne + 1))
4195 return SDValue();
4196 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4197
4198 // We don't want to do this in every single case.
4199 SelectionDAG &DAG = DCI.DAG;
4200 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4201 return SDValue();
4202
4203 // Unfold into: sext_inreg(%x) cond %x
4204 // Where 'cond' will be either 'eq' or 'ne'.
4205 SDValue SExtInReg = DAG.getNode(
4207 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4208 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4209}
4210
4211// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4212SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4213 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4214 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4216 "Should be a comparison with 0.");
4217 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4218 "Valid only for [in]equality comparisons.");
4219
4220 unsigned NewShiftOpcode;
4221 SDValue X, C, Y;
4222
4223 SelectionDAG &DAG = DCI.DAG;
4224
4225 // Look for '(C l>>/<< Y)'.
4226 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4227 // The shift should be one-use.
4228 if (!V.hasOneUse())
4229 return false;
4230 unsigned OldShiftOpcode = V.getOpcode();
4231 switch (OldShiftOpcode) {
4232 case ISD::SHL:
4233 NewShiftOpcode = ISD::SRL;
4234 break;
4235 case ISD::SRL:
4236 NewShiftOpcode = ISD::SHL;
4237 break;
4238 default:
4239 return false; // must be a logical shift.
4240 }
4241 // We should be shifting a constant.
4242 // FIXME: best to use isConstantOrConstantVector().
4243 C = V.getOperand(0);
4245 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4246 if (!CC)
4247 return false;
4248 Y = V.getOperand(1);
4249
4251 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4253 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4254 };
4255
4256 // LHS of comparison should be an one-use 'and'.
4257 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4258 return SDValue();
4259
4260 X = N0.getOperand(0);
4261 SDValue Mask = N0.getOperand(1);
4262
4263 // 'and' is commutative!
4264 if (!Match(Mask)) {
4265 std::swap(X, Mask);
4266 if (!Match(Mask))
4267 return SDValue();
4268 }
4269
4270 EVT VT = X.getValueType();
4271
4272 // Produce:
4273 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4274 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4275 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4276 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4277 return T2;
4278}
4279
4280/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4281/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4282/// handle the commuted versions of these patterns.
4283SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4284 ISD::CondCode Cond, const SDLoc &DL,
4285 DAGCombinerInfo &DCI) const {
4286 unsigned BOpcode = N0.getOpcode();
4287 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4288 "Unexpected binop");
4289 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4290
4291 // (X + Y) == X --> Y == 0
4292 // (X - Y) == X --> Y == 0
4293 // (X ^ Y) == X --> Y == 0
4294 SelectionDAG &DAG = DCI.DAG;
4295 EVT OpVT = N0.getValueType();
4296 SDValue X = N0.getOperand(0);
4297 SDValue Y = N0.getOperand(1);
4298 if (X == N1)
4299 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4300
4301 if (Y != N1)
4302 return SDValue();
4303
4304 // (X + Y) == Y --> X == 0
4305 // (X ^ Y) == Y --> X == 0
4306 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4307 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4308
4309 // The shift would not be valid if the operands are boolean (i1).
4310 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4311 return SDValue();
4312
4313 // (X - Y) == Y --> X == Y << 1
4314 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4315 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4316 if (!DCI.isCalledByLegalizer())
4317 DCI.AddToWorklist(YShl1.getNode());
4318 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4319}
4320
4322 SDValue N0, const APInt &C1,
4323 ISD::CondCode Cond, const SDLoc &dl,
4324 SelectionDAG &DAG) {
4325 // Look through truncs that don't change the value of a ctpop.
4326 // FIXME: Add vector support? Need to be careful with setcc result type below.
4327 SDValue CTPOP = N0;
4328 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4330 CTPOP = N0.getOperand(0);
4331
4332 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4333 return SDValue();
4334
4335 EVT CTVT = CTPOP.getValueType();
4336 SDValue CTOp = CTPOP.getOperand(0);
4337
4338 // Expand a power-of-2-or-zero comparison based on ctpop:
4339 // (ctpop x) u< 2 -> (x & x-1) == 0
4340 // (ctpop x) u> 1 -> (x & x-1) != 0
4341 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4342 // Keep the CTPOP if it is a cheap vector op.
4343 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4344 return SDValue();
4345
4346 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4347 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4348 return SDValue();
4349 if (C1 == 0 && (Cond == ISD::SETULT))
4350 return SDValue(); // This is handled elsewhere.
4351
4352 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4353
4354 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4355 SDValue Result = CTOp;
4356 for (unsigned i = 0; i < Passes; i++) {
4357 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4358 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4359 }
4361 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4362 }
4363
4364 // Expand a power-of-2 comparison based on ctpop
4365 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4366 // Keep the CTPOP if it is cheap.
4367 if (TLI.isCtpopFast(CTVT))
4368 return SDValue();
4369
4370 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4371 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4372 assert(CTVT.isInteger());
4373 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4374
4375 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4376 // check before emitting a potentially unnecessary op.
4377 if (DAG.isKnownNeverZero(CTOp)) {
4378 // (ctpop x) == 1 --> (x & x-1) == 0
4379 // (ctpop x) != 1 --> (x & x-1) != 0
4380 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4381 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4382 return RHS;
4383 }
4384
4385 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4386 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4387 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4389 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4390 }
4391
4392 return SDValue();
4393}
4394
4396 ISD::CondCode Cond, const SDLoc &dl,
4397 SelectionDAG &DAG) {
4398 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4399 return SDValue();
4400
4401 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4402 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4403 return SDValue();
4404
4405 auto getRotateSource = [](SDValue X) {
4406 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4407 return X.getOperand(0);
4408 return SDValue();
4409 };
4410
4411 // Peek through a rotated value compared against 0 or -1:
4412 // (rot X, Y) == 0/-1 --> X == 0/-1
4413 // (rot X, Y) != 0/-1 --> X != 0/-1
4414 if (SDValue R = getRotateSource(N0))
4415 return DAG.getSetCC(dl, VT, R, N1, Cond);
4416
4417 // Peek through an 'or' of a rotated value compared against 0:
4418 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4419 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4420 //
4421 // TODO: Add the 'and' with -1 sibling.
4422 // TODO: Recurse through a series of 'or' ops to find the rotate.
4423 EVT OpVT = N0.getValueType();
4424 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4425 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4426 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4427 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4428 }
4429 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4430 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4431 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4432 }
4433 }
4434
4435 return SDValue();
4436}
4437
4439 ISD::CondCode Cond, const SDLoc &dl,
4440 SelectionDAG &DAG) {
4441 // If we are testing for all-bits-clear, we might be able to do that with
4442 // less shifting since bit-order does not matter.
4443 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4444 return SDValue();
4445
4446 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4447 if (!C1 || !C1->isZero())
4448 return SDValue();
4449
4450 if (!N0.hasOneUse() ||
4451 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4452 return SDValue();
4453
4454 unsigned BitWidth = N0.getScalarValueSizeInBits();
4455 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4456 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4457 return SDValue();
4458
4459 // Canonicalize fshr as fshl to reduce pattern-matching.
4460 unsigned ShAmt = ShAmtC->getZExtValue();
4461 if (N0.getOpcode() == ISD::FSHR)
4462 ShAmt = BitWidth - ShAmt;
4463
4464 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4465 SDValue X, Y;
4466 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4467 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4468 return false;
4469 if (Or.getOperand(0) == Other) {
4470 X = Or.getOperand(0);
4471 Y = Or.getOperand(1);
4472 return true;
4473 }
4474 if (Or.getOperand(1) == Other) {
4475 X = Or.getOperand(1);
4476 Y = Or.getOperand(0);
4477 return true;
4478 }
4479 return false;
4480 };
4481
4482 EVT OpVT = N0.getValueType();
4483 EVT ShAmtVT = N0.getOperand(2).getValueType();
4484 SDValue F0 = N0.getOperand(0);
4485 SDValue F1 = N0.getOperand(1);
4486 if (matchOr(F0, F1)) {
4487 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4488 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4489 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4490 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4491 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4492 }
4493 if (matchOr(F1, F0)) {
4494 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4495 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4496 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4497 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4498 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4499 }
4500
4501 return SDValue();
4502}
4503
4504/// Try to simplify a setcc built with the specified operands and cc. If it is
4505/// unable to simplify it, return a null SDValue.
4507 ISD::CondCode Cond, bool foldBooleans,
4508 DAGCombinerInfo &DCI,
4509 const SDLoc &dl) const {
4510 SelectionDAG &DAG = DCI.DAG;
4511 const DataLayout &Layout = DAG.getDataLayout();
4512 EVT OpVT = N0.getValueType();
4514
4515 // Constant fold or commute setcc.
4516 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4517 return Fold;
4518
4519 bool N0ConstOrSplat =
4520 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4521 bool N1ConstOrSplat =
4522 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4523
4524 // Canonicalize toward having the constant on the RHS.
4525 // TODO: Handle non-splat vector constants. All undef causes trouble.
4526 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4527 // infinite loop here when we encounter one.
4529 if (N0ConstOrSplat && !N1ConstOrSplat &&
4530 (DCI.isBeforeLegalizeOps() ||
4531 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4532 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4533
4534 // If we have a subtract with the same 2 non-constant operands as this setcc
4535 // -- but in reverse order -- then try to commute the operands of this setcc
4536 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4537 // instruction on some targets.
4538 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4539 (DCI.isBeforeLegalizeOps() ||
4540 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4541 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4542 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4543 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4544
4545 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4546 return V;
4547
4548 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4549 return V;
4550
4551 if (auto *N1C = isConstOrConstSplat(N1)) {
4552 const APInt &C1 = N1C->getAPIntValue();
4553
4554 // Optimize some CTPOP cases.
4555 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4556 return V;
4557
4558 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4559 // X * Y == 0 --> (X == 0) || (Y == 0)
4560 // X * Y != 0 --> (X != 0) && (Y != 0)
4561 // TODO: This bails out if minsize is set, but if the target doesn't have a
4562 // single instruction multiply for this type, it would likely be
4563 // smaller to decompose.
4564 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4565 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4566 (N0->getFlags().hasNoUnsignedWrap() ||
4567 N0->getFlags().hasNoSignedWrap()) &&
4568 !Attr.hasFnAttr(Attribute::MinSize)) {
4569 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4570 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4571 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4572 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4573 }
4574
4575 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4576 // equality comparison, then we're just comparing whether X itself is
4577 // zero.
4578 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4579 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4580 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4581 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4582 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4583 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4584 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4585 // (srl (ctlz x), 5) == 0 -> X != 0
4586 // (srl (ctlz x), 5) != 1 -> X != 0
4587 Cond = ISD::SETNE;
4588 } else {
4589 // (srl (ctlz x), 5) != 0 -> X == 0
4590 // (srl (ctlz x), 5) == 1 -> X == 0
4591 Cond = ISD::SETEQ;
4592 }
4593 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4594 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4595 Cond);
4596 }
4597 }
4598 }
4599 }
4600
4601 // FIXME: Support vectors.
4602 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4603 const APInt &C1 = N1C->getAPIntValue();
4604
4605 // (zext x) == C --> x == (trunc C)
4606 // (sext x) == C --> x == (trunc C)
4607 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4608 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4609 unsigned MinBits = N0.getValueSizeInBits();
4610 SDValue PreExt;
4611 bool Signed = false;
4612 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4613 // ZExt
4614 MinBits = N0->getOperand(0).getValueSizeInBits();
4615 PreExt = N0->getOperand(0);
4616 } else if (N0->getOpcode() == ISD::AND) {
4617 // DAGCombine turns costly ZExts into ANDs
4618 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4619 if ((C->getAPIntValue()+1).isPowerOf2()) {
4620 MinBits = C->getAPIntValue().countr_one();
4621 PreExt = N0->getOperand(0);
4622 }
4623 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4624 // SExt
4625 MinBits = N0->getOperand(0).getValueSizeInBits();
4626 PreExt = N0->getOperand(0);
4627 Signed = true;
4628 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4629 // ZEXTLOAD / SEXTLOAD
4630 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4631 MinBits = LN0->getMemoryVT().getSizeInBits();
4632 PreExt = N0;
4633 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4634 Signed = true;
4635 MinBits = LN0->getMemoryVT().getSizeInBits();
4636 PreExt = N0;
4637 }
4638 }
4639
4640 // Figure out how many bits we need to preserve this constant.
4641 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4642
4643 // Make sure we're not losing bits from the constant.
4644 if (MinBits > 0 &&
4645 MinBits < C1.getBitWidth() &&
4646 MinBits >= ReqdBits) {
4647 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4648 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4649 // Will get folded away.
4650 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4651 if (MinBits == 1 && C1 == 1)
4652 // Invert the condition.
4653 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4655 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4656 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4657 }
4658
4659 // If truncating the setcc operands is not desirable, we can still
4660 // simplify the expression in some cases:
4661 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4662 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4663 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4664 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4665 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4666 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4667 SDValue TopSetCC = N0->getOperand(0);
4668 unsigned N0Opc = N0->getOpcode();
4669 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4670 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4671 TopSetCC.getOpcode() == ISD::SETCC &&
4672 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4673 (isConstFalseVal(N1) ||
4674 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4675
4676 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4677 (!N1C->isZero() && Cond == ISD::SETNE);
4678
4679 if (!Inverse)
4680 return TopSetCC;
4681
4683 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4684 TopSetCC.getOperand(0).getValueType());
4685 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4686 TopSetCC.getOperand(1),
4687 InvCond);
4688 }
4689 }
4690 }
4691
4692 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4693 // equality or unsigned, and all 1 bits of the const are in the same
4694 // partial word, see if we can shorten the load.
4695 if (DCI.isBeforeLegalize() &&
4697 N0.getOpcode() == ISD::AND && C1 == 0 &&
4698 N0.getNode()->hasOneUse() &&
4699 isa<LoadSDNode>(N0.getOperand(0)) &&
4700 N0.getOperand(0).getNode()->hasOneUse() &&
4701 isa<ConstantSDNode>(N0.getOperand(1))) {
4702 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4703 APInt bestMask;
4704 unsigned bestWidth = 0, bestOffset = 0;
4705 if (Lod->isSimple() && Lod->isUnindexed() &&
4706 (Lod->getMemoryVT().isByteSized() ||
4707 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4708 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4709 unsigned origWidth = N0.getValueSizeInBits();
4710 unsigned maskWidth = origWidth;
4711 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4712 // 8 bits, but have to be careful...
4713 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4714 origWidth = Lod->getMemoryVT().getSizeInBits();
4715 const APInt &Mask = N0.getConstantOperandAPInt(1);
4716 // Only consider power-of-2 widths (and at least one byte) as candiates
4717 // for the narrowed load.
4718 for (unsigned width = 8; width < origWidth; width *= 2) {
4719 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4720 if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4721 continue;
4722 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4723 // Avoid accessing any padding here for now (we could use memWidth
4724 // instead of origWidth here otherwise).
4725 unsigned maxOffset = origWidth - width;
4726 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4727 if (Mask.isSubsetOf(newMask)) {
4728 unsigned ptrOffset =
4729 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4730 unsigned IsFast = 0;
4731 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4733 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4734 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4735 IsFast) {
4736 bestOffset = ptrOffset / 8;
4737 bestMask = Mask.lshr(offset);
4738 bestWidth = width;
4739 break;
4740 }
4741 }
4742 newMask <<= 8;
4743 }
4744 if (bestWidth)
4745 break;
4746 }
4747 }
4748 if (bestWidth) {
4749 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4750 SDValue Ptr = Lod->getBasePtr();
4751 if (bestOffset != 0)
4752 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4753 SDValue NewLoad =
4754 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4755 Lod->getPointerInfo().getWithOffset(bestOffset),
4756 Lod->getOriginalAlign());
4757 SDValue And =
4758 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4759 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4760 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4761 }
4762 }
4763
4764 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4765 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4766 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4767
4768 // If the comparison constant has bits in the upper part, the
4769 // zero-extended value could never match.
4771 C1.getBitWidth() - InSize))) {
4772 switch (Cond) {
4773 case ISD::SETUGT:
4774 case ISD::SETUGE:
4775 case ISD::SETEQ:
4776 return DAG.getConstant(0, dl, VT);
4777 case ISD::SETULT:
4778 case ISD::SETULE:
4779 case ISD::SETNE:
4780 return DAG.getConstant(1, dl, VT);
4781 case ISD::SETGT:
4782 case ISD::SETGE:
4783 // True if the sign bit of C1 is set.
4784 return DAG.getConstant(C1.isNegative(), dl, VT);
4785 case ISD::SETLT:
4786 case ISD::SETLE:
4787 // True if the sign bit of C1 isn't set.
4788 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4789 default:
4790 break;
4791 }
4792 }
4793
4794 // Otherwise, we can perform the comparison with the low bits.
4795 switch (Cond) {
4796 case ISD::SETEQ:
4797 case ISD::SETNE:
4798 case ISD::SETUGT:
4799 case ISD::SETUGE:
4800 case ISD::SETULT:
4801 case ISD::SETULE: {
4802 EVT newVT = N0.getOperand(0).getValueType();
4803 // FIXME: Should use isNarrowingProfitable.
4804 if (DCI.isBeforeLegalizeOps() ||
4805 (isOperationLegal(ISD::SETCC, newVT) &&
4806 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4807 isTypeDesirableForOp(ISD::SETCC, newVT))) {
4808 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4809 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4810
4811 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4812 NewConst, Cond);
4813 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4814 }
4815 break;
4816 }
4817 default:
4818 break; // todo, be more careful with signed comparisons
4819 }
4820 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4821 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4822 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4823 OpVT)) {
4824 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4825 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4826 EVT ExtDstTy = N0.getValueType();
4827 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4828
4829 // If the constant doesn't fit into the number of bits for the source of
4830 // the sign extension, it is impossible for both sides to be equal.
4831 if (C1.getSignificantBits() > ExtSrcTyBits)
4832 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4833
4834 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4835 ExtDstTy != ExtSrcTy && "Unexpected types!");
4836 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4837 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4838 DAG.getConstant(Imm, dl, ExtDstTy));
4839 if (!DCI.isCalledByLegalizer())
4840 DCI.AddToWorklist(ZextOp.getNode());
4841 // Otherwise, make this a use of a zext.
4842 return DAG.getSetCC(dl, VT, ZextOp,
4843 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4844 } else if ((N1C->isZero() || N1C->isOne()) &&
4845 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4846 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4847 // excluded as they are handled below whilst checking for foldBooleans.
4848 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4849 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4850 (N0.getValueType() == MVT::i1 ||
4854 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4855 if (TrueWhenTrue)
4856 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4857 // Invert the condition.
4858 if (N0.getOpcode() == ISD::SETCC) {
4859 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4861 if (DCI.isBeforeLegalizeOps() ||
4863 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4864 }
4865 }
4866
4867 if ((N0.getOpcode() == ISD::XOR ||
4868 (N0.getOpcode() == ISD::AND &&
4869 N0.getOperand(0).getOpcode() == ISD::XOR &&
4870 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4871 isOneConstant(N0.getOperand(1))) {
4872 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4873 // can only do this if the top bits are known zero.
4874 unsigned BitWidth = N0.getValueSizeInBits();
4875 if (DAG.MaskedValueIsZero(N0,
4877 BitWidth-1))) {
4878 // Okay, get the un-inverted input value.
4879 SDValue Val;
4880 if (N0.getOpcode() == ISD::XOR) {
4881 Val = N0.getOperand(0);
4882 } else {
4883 assert(N0.getOpcode() == ISD::AND &&
4884 N0.getOperand(0).getOpcode() == ISD::XOR);
4885 // ((X^1)&1)^1 -> X & 1
4886 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4887 N0.getOperand(0).getOperand(0),
4888 N0.getOperand(1));
4889 }
4890
4891 return DAG.getSetCC(dl, VT, Val, N1,
4893 }
4894 } else if (N1C->isOne()) {
4895 SDValue Op0 = N0;
4896 if (Op0.getOpcode() == ISD::TRUNCATE)
4897 Op0 = Op0.getOperand(0);
4898
4899 if ((Op0.getOpcode() == ISD::XOR) &&
4900 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4901 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4902 SDValue XorLHS = Op0.getOperand(0);
4903 SDValue XorRHS = Op0.getOperand(1);
4904 // Ensure that the input setccs return an i1 type or 0/1 value.
4905 if (Op0.getValueType() == MVT::i1 ||
4910 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4912 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4913 }
4914 }
4915 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4916 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4917 if (Op0.getValueType().bitsGT(VT))
4918 Op0 = DAG.getNode(ISD::AND, dl, VT,
4919 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4920 DAG.getConstant(1, dl, VT));
4921 else if (Op0.getValueType().bitsLT(VT))
4922 Op0 = DAG.getNode(ISD::AND, dl, VT,
4923 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4924 DAG.getConstant(1, dl, VT));
4925
4926 return DAG.getSetCC(dl, VT, Op0,
4927 DAG.getConstant(0, dl, Op0.getValueType()),
4929 }
4930 if (Op0.getOpcode() == ISD::AssertZext &&
4931 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4932 return DAG.getSetCC(dl, VT, Op0,
4933 DAG.getConstant(0, dl, Op0.getValueType()),
4935 }
4936 }
4937
4938 // Given:
4939 // icmp eq/ne (urem %x, %y), 0
4940 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4941 // icmp eq/ne %x, 0
4942 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4943 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4944 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4945 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4946 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4947 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4948 }
4949
4950 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4951 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4952 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4953 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4954 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4955 N1C->isAllOnes()) {
4956 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4957 DAG.getConstant(0, dl, OpVT),
4959 }
4960
4961 if (SDValue V =
4962 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4963 return V;
4964 }
4965
4966 // These simplifications apply to splat vectors as well.
4967 // TODO: Handle more splat vector cases.
4968 if (auto *N1C = isConstOrConstSplat(N1)) {
4969 const APInt &C1 = N1C->getAPIntValue();
4970
4971 APInt MinVal, MaxVal;
4972 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4974 MinVal = APInt::getSignedMinValue(OperandBitSize);
4975 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4976 } else {
4977 MinVal = APInt::getMinValue(OperandBitSize);
4978 MaxVal = APInt::getMaxValue(OperandBitSize);
4979 }
4980
4981 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4982 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4983 // X >= MIN --> true
4984 if (C1 == MinVal)
4985 return DAG.getBoolConstant(true, dl, VT, OpVT);
4986
4987 if (!VT.isVector()) { // TODO: Support this for vectors.
4988 // X >= C0 --> X > (C0 - 1)
4989 APInt C = C1 - 1;
4991 if ((DCI.isBeforeLegalizeOps() ||
4992 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
4993 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4994 isLegalICmpImmediate(C.getSExtValue())))) {
4995 return DAG.getSetCC(dl, VT, N0,
4996 DAG.getConstant(C, dl, N1.getValueType()),
4997 NewCC);
4998 }
4999 }
5000 }
5001
5002 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5003 // X <= MAX --> true
5004 if (C1 == MaxVal)
5005 return DAG.getBoolConstant(true, dl, VT, OpVT);
5006
5007 // X <= C0 --> X < (C0 + 1)
5008 if (!VT.isVector()) { // TODO: Support this for vectors.
5009 APInt C = C1 + 1;
5011 if ((DCI.isBeforeLegalizeOps() ||
5012 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5013 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5014 isLegalICmpImmediate(C.getSExtValue())))) {
5015 return DAG.getSetCC(dl, VT, N0,
5016 DAG.getConstant(C, dl, N1.getValueType()),
5017 NewCC);
5018 }
5019 }
5020 }
5021
5022 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5023 if (C1 == MinVal)
5024 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5025
5026 // TODO: Support this for vectors after legalize ops.
5027 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5028 // Canonicalize setlt X, Max --> setne X, Max
5029 if (C1 == MaxVal)
5030 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5031
5032 // If we have setult X, 1, turn it into seteq X, 0
5033 if (C1 == MinVal+1)
5034 return DAG.getSetCC(dl, VT, N0,
5035 DAG.getConstant(MinVal, dl, N0.getValueType()),
5036 ISD::SETEQ);
5037 }
5038 }
5039
5040 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5041 if (C1 == MaxVal)
5042 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5043
5044 // TODO: Support this for vectors after legalize ops.
5045 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5046 // Canonicalize setgt X, Min --> setne X, Min
5047 if (C1 == MinVal)
5048 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5049
5050 // If we have setugt X, Max-1, turn it into seteq X, Max
5051 if (C1 == MaxVal-1)
5052 return DAG.getSetCC(dl, VT, N0,
5053 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5054 ISD::SETEQ);
5055 }
5056 }
5057
5058 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5059 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5060 if (C1.isZero())
5061 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5062 VT, N0, N1, Cond, DCI, dl))
5063 return CC;
5064
5065 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5066 // For example, when high 32-bits of i64 X are known clear:
5067 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5068 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5069 bool CmpZero = N1C->isZero();
5070 bool CmpNegOne = N1C->isAllOnes();
5071 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5072 // Match or(lo,shl(hi,bw/2)) pattern.
5073 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5074 unsigned EltBits = V.getScalarValueSizeInBits();
5075 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5076 return false;
5077 SDValue LHS = V.getOperand(0);
5078 SDValue RHS = V.getOperand(1);
5079 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5080 // Unshifted element must have zero upperbits.
5081 if (RHS.getOpcode() == ISD::SHL &&
5082 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5083 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5084 DAG.MaskedValueIsZero(LHS, HiBits)) {
5085 Lo = LHS;
5086 Hi = RHS.getOperand(0);
5087 return true;
5088 }
5089 if (LHS.getOpcode() == ISD::SHL &&
5090 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5091 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5092 DAG.MaskedValueIsZero(RHS, HiBits)) {
5093 Lo = RHS;
5094 Hi = LHS.getOperand(0);
5095 return true;
5096 }
5097 return false;
5098 };
5099
5100 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5101 unsigned EltBits = N0.getScalarValueSizeInBits();
5102 unsigned HalfBits = EltBits / 2;
5103 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5104 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5105 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5106 SDValue NewN0 =
5107 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5108 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5109 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5110 };
5111
5112 SDValue Lo, Hi;
5113 if (IsConcat(N0, Lo, Hi))
5114 return MergeConcat(Lo, Hi);
5115
5116 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5117 SDValue Lo0, Lo1, Hi0, Hi1;
5118 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5119 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5120 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5121 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5122 }
5123 }
5124 }
5125 }
5126
5127 // If we have "setcc X, C0", check to see if we can shrink the immediate
5128 // by changing cc.
5129 // TODO: Support this for vectors after legalize ops.
5130 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5131 // SETUGT X, SINTMAX -> SETLT X, 0
5132 // SETUGE X, SINTMIN -> SETLT X, 0
5133 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5134 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5135 return DAG.getSetCC(dl, VT, N0,
5136 DAG.getConstant(0, dl, N1.getValueType()),
5137 ISD::SETLT);
5138
5139 // SETULT X, SINTMIN -> SETGT X, -1
5140 // SETULE X, SINTMAX -> SETGT X, -1
5141 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5142 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5143 return DAG.getSetCC(dl, VT, N0,
5144 DAG.getAllOnesConstant(dl, N1.getValueType()),
5145 ISD::SETGT);
5146 }
5147 }
5148
5149 // Back to non-vector simplifications.
5150 // TODO: Can we do these for vector splats?
5151 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5152 const APInt &C1 = N1C->getAPIntValue();
5153 EVT ShValTy = N0.getValueType();
5154
5155 // Fold bit comparisons when we can. This will result in an
5156 // incorrect value when boolean false is negative one, unless
5157 // the bitsize is 1 in which case the false value is the same
5158 // in practice regardless of the representation.
5159 if ((VT.getSizeInBits() == 1 ||
5161 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5162 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5163 N0.getOpcode() == ISD::AND) {
5164 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5165 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5166 // Perform the xform if the AND RHS is a single bit.
5167 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5168 if (AndRHS->getAPIntValue().isPowerOf2() &&
5169 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5170 return DAG.getNode(
5171 ISD::TRUNCATE, dl, VT,
5172 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5173 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5174 }
5175 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5176 // (X & 8) == 8 --> (X & 8) >> 3
5177 // Perform the xform if C1 is a single bit.
5178 unsigned ShCt = C1.logBase2();
5179 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5180 return DAG.getNode(
5181 ISD::TRUNCATE, dl, VT,
5182 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5183 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5184 }
5185 }
5186 }
5187 }
5188
5189 if (C1.getSignificantBits() <= 64 &&
5191 // (X & -256) == 256 -> (X >> 8) == 1
5192 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5193 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5194 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5195 const APInt &AndRHSC = AndRHS->getAPIntValue();
5196 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5197 unsigned ShiftBits = AndRHSC.countr_zero();
5198 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5199 SDValue Shift = DAG.getNode(
5200 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5201 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5202 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5203 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5204 }
5205 }
5206 }
5207 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5208 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5209 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5210 // X < 0x100000000 -> (X >> 32) < 1
5211 // X >= 0x100000000 -> (X >> 32) >= 1
5212 // X <= 0x0ffffffff -> (X >> 32) < 1
5213 // X > 0x0ffffffff -> (X >> 32) >= 1
5214 unsigned ShiftBits;
5215 APInt NewC = C1;
5216 ISD::CondCode NewCond = Cond;
5217 if (AdjOne) {
5218 ShiftBits = C1.countr_one();
5219 NewC = NewC + 1;
5220 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5221 } else {
5222 ShiftBits = C1.countr_zero();
5223 }
5224 NewC.lshrInPlace(ShiftBits);
5225 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5227 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5228 SDValue Shift =
5229 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5230 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5231 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5232 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5233 }
5234 }
5235 }
5236 }
5237
5238 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5239 auto *CFP = cast<ConstantFPSDNode>(N1);
5240 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5241
5242 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5243 // constant if knowing that the operand is non-nan is enough. We prefer to
5244 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5245 // materialize 0.0.
5246 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5247 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5248
5249 // setcc (fneg x), C -> setcc swap(pred) x, -C
5250 if (N0.getOpcode() == ISD::FNEG) {
5252 if (DCI.isBeforeLegalizeOps() ||
5253 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5254 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5255 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5256 }
5257 }
5258
5259 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5261 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5262 bool IsFabs = N0.getOpcode() == ISD::FABS;
5263 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5264 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5265 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5266 : (IsFabs ? fcInf : fcPosInf);
5267 if (Cond == ISD::SETUEQ)
5268 Flag |= fcNan;
5269 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5270 DAG.getTargetConstant(Flag, dl, MVT::i32));
5271 }
5272 }
5273
5274 // If the condition is not legal, see if we can find an equivalent one
5275 // which is legal.
5277 // If the comparison was an awkward floating-point == or != and one of
5278 // the comparison operands is infinity or negative infinity, convert the
5279 // condition to a less-awkward <= or >=.
5280 if (CFP->getValueAPF().isInfinity()) {
5281 bool IsNegInf = CFP->getValueAPF().isNegative();
5283 switch (Cond) {
5284 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5285 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5286 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5287 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5288 default: break;
5289 }
5290 if (NewCond != ISD::SETCC_INVALID &&
5291 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5292 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5293 }
5294 }
5295 }
5296
5297 if (N0 == N1) {
5298 // The sext(setcc()) => setcc() optimization relies on the appropriate
5299 // constant being emitted.
5300 assert(!N0.getValueType().isInteger() &&
5301 "Integer types should be handled by FoldSetCC");
5302
5303 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5304 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5305 if (UOF == 2) // FP operators that are undefined on NaNs.
5306 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5307 if (UOF == unsigned(EqTrue))
5308 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5309 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5310 // if it is not already.
5311 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5312 if (NewCond != Cond &&
5313 (DCI.isBeforeLegalizeOps() ||
5314 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5315 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5316 }
5317
5318 // ~X > ~Y --> Y > X
5319 // ~X < ~Y --> Y < X
5320 // ~X < C --> X > ~C
5321 // ~X > C --> X < ~C
5322 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5323 N0.getValueType().isInteger()) {
5324 if (isBitwiseNot(N0)) {
5325 if (isBitwiseNot(N1))
5326 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5327
5330 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5331 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5332 }
5333 }
5334 }
5335
5336 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5337 N0.getValueType().isInteger()) {
5338 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5339 N0.getOpcode() == ISD::XOR) {
5340 // Simplify (X+Y) == (X+Z) --> Y == Z
5341 if (N0.getOpcode() == N1.getOpcode()) {
5342 if (N0.getOperand(0) == N1.getOperand(0))
5343 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5344 if (N0.getOperand(1) == N1.getOperand(1))
5345 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5346 if (isCommutativeBinOp(N0.getOpcode())) {
5347 // If X op Y == Y op X, try other combinations.
5348 if (N0.getOperand(0) == N1.getOperand(1))
5349 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5350 Cond);
5351 if (N0.getOperand(1) == N1.getOperand(0))
5352 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5353 Cond);
5354 }
5355 }
5356
5357 // If RHS is a legal immediate value for a compare instruction, we need
5358 // to be careful about increasing register pressure needlessly.
5359 bool LegalRHSImm = false;
5360
5361 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5362 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5363 // Turn (X+C1) == C2 --> X == C2-C1
5364 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5365 return DAG.getSetCC(
5366 dl, VT, N0.getOperand(0),
5367 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5368 dl, N0.getValueType()),
5369 Cond);
5370
5371 // Turn (X^C1) == C2 --> X == C1^C2
5372 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5373 return DAG.getSetCC(
5374 dl, VT, N0.getOperand(0),
5375 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5376 dl, N0.getValueType()),
5377 Cond);
5378 }
5379
5380 // Turn (C1-X) == C2 --> X == C1-C2
5381 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5382 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5383 return DAG.getSetCC(
5384 dl, VT, N0.getOperand(1),
5385 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5386 dl, N0.getValueType()),
5387 Cond);
5388
5389 // Could RHSC fold directly into a compare?
5390 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5391 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5392 }
5393
5394 // (X+Y) == X --> Y == 0 and similar folds.
5395 // Don't do this if X is an immediate that can fold into a cmp
5396 // instruction and X+Y has other uses. It could be an induction variable
5397 // chain, and the transform would increase register pressure.
5398 if (!LegalRHSImm || N0.hasOneUse())
5399 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5400 return V;
5401 }
5402
5403 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5404 N1.getOpcode() == ISD::XOR)
5405 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5406 return V;
5407
5408 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5409 return V;
5410 }
5411
5412 // Fold remainder of division by a constant.
5413 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5414 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5415 // When division is cheap or optimizing for minimum size,
5416 // fall through to DIVREM creation by skipping this fold.
5417 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5418 if (N0.getOpcode() == ISD::UREM) {
5419 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5420 return Folded;
5421 } else if (N0.getOpcode() == ISD::SREM) {
5422 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5423 return Folded;
5424 }
5425 }
5426 }
5427
5428 // Fold away ALL boolean setcc's.
5429 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5430 SDValue Temp;
5431 switch (Cond) {
5432 default: llvm_unreachable("Unknown integer setcc!");
5433 case ISD::SETEQ: // X == Y -> ~(X^Y)
5434 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5435 N0 = DAG.getNOT(dl, Temp, OpVT);
5436 if (!DCI.isCalledByLegalizer())
5437 DCI.AddToWorklist(Temp.getNode());
5438 break;
5439 case ISD::SETNE: // X != Y --> (X^Y)
5440 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5441 break;
5442 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5443 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5444 Temp = DAG.getNOT(dl, N0, OpVT);
5445 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5446 if (!DCI.isCalledByLegalizer())
5447 DCI.AddToWorklist(Temp.getNode());
5448 break;
5449 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5450 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5451 Temp = DAG.getNOT(dl, N1, OpVT);
5452 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5453 if (!DCI.isCalledByLegalizer())
5454 DCI.AddToWorklist(Temp.getNode());
5455 break;
5456 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5457 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5458 Temp = DAG.getNOT(dl, N0, OpVT);
5459 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5460 if (!DCI.isCalledByLegalizer())
5461 DCI.AddToWorklist(Temp.getNode());
5462 break;
5463 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5464 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5465 Temp = DAG.getNOT(dl, N1, OpVT);
5466 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5467 break;
5468 }
5469 if (VT.getScalarType() != MVT::i1) {
5470 if (!DCI.isCalledByLegalizer())
5471 DCI.AddToWorklist(N0.getNode());
5472 // FIXME: If running after legalize, we probably can't do this.
5474 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5475 }
5476 return N0;
5477 }
5478
5479 // Could not fold it.
5480 return SDValue();
5481}
5482
5483/// Returns true (and the GlobalValue and the offset) if the node is a
5484/// GlobalAddress + offset.
5486 int64_t &Offset) const {
5487
5488 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5489
5490 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5491 GA = GASD->getGlobal();
5492 Offset += GASD->getOffset();
5493 return true;
5494 }
5495
5496 if (N->getOpcode() == ISD::ADD) {
5497 SDValue N1 = N->getOperand(0);
5498 SDValue N2 = N->getOperand(1);
5499 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5500 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5501 Offset += V->getSExtValue();
5502 return true;
5503 }
5504 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5505 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5506 Offset += V->getSExtValue();
5507 return true;
5508 }
5509 }
5510 }
5511
5512 return false;
5513}
5514
5516 DAGCombinerInfo &DCI) const {
5517 // Default implementation: no optimization.
5518 return SDValue();
5519}
5520
5521//===----------------------------------------------------------------------===//
5522// Inline Assembler Implementation Methods
5523//===----------------------------------------------------------------------===//
5524
5527 unsigned S = Constraint.size();
5528
5529 if (S == 1) {
5530 switch (Constraint[0]) {
5531 default: break;
5532 case 'r':
5533 return C_RegisterClass;
5534 case 'm': // memory
5535 case 'o': // offsetable
5536 case 'V': // not offsetable
5537 return C_Memory;
5538 case 'p': // Address.
5539 return C_Address;
5540 case 'n': // Simple Integer
5541 case 'E': // Floating Point Constant
5542 case 'F': // Floating Point Constant
5543 return C_Immediate;
5544 case 'i': // Simple Integer or Relocatable Constant
5545 case 's': // Relocatable Constant
5546 case 'X': // Allow ANY value.
5547 case 'I': // Target registers.
5548 case 'J':
5549 case 'K':
5550 case 'L':
5551 case 'M':
5552 case 'N':
5553 case 'O':
5554 case 'P':
5555 case '<':
5556 case '>':
5557 return C_Other;
5558 }
5559 }
5560
5561 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5562 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5563 return C_Memory;
5564 return C_Register;
5565 }
5566 return C_Unknown;
5567}
5568
5569/// Try to replace an X constraint, which matches anything, with another that
5570/// has more specific requirements based on the type of the corresponding
5571/// operand.
5572const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5573 if (ConstraintVT.isInteger())
5574 return "r";
5575 if (ConstraintVT.isFloatingPoint())
5576 return "f"; // works for many targets
5577 return nullptr;
5578}
5579
5581 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5582 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5583 return SDValue();
5584}
5585
5586/// Lower the specified operand into the Ops vector.
5587/// If it is invalid, don't add anything to Ops.
5589 StringRef Constraint,
5590 std::vector<SDValue> &Ops,
5591 SelectionDAG &DAG) const {
5592
5593 if (Constraint.size() > 1)
5594 return;
5595
5596 char ConstraintLetter = Constraint[0];
5597 switch (ConstraintLetter) {
5598 default: break;
5599 case 'X': // Allows any operand
5600 case 'i': // Simple Integer or Relocatable Constant
5601 case 'n': // Simple Integer
5602 case 's': { // Relocatable Constant
5603
5605 uint64_t Offset = 0;
5606
5607 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5608 // etc., since getelementpointer is variadic. We can't use
5609 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5610 // while in this case the GA may be furthest from the root node which is
5611 // likely an ISD::ADD.
5612 while (true) {
5613 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5614 // gcc prints these as sign extended. Sign extend value to 64 bits
5615 // now; without this it would get ZExt'd later in
5616 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5617 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5618 BooleanContent BCont = getBooleanContents(MVT::i64);
5619 ISD::NodeType ExtOpc =
5620 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5621 int64_t ExtVal =
5622 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5623 Ops.push_back(
5624 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5625 return;
5626 }
5627 if (ConstraintLetter != 'n') {
5628 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5629 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5630 GA->getValueType(0),
5631 Offset + GA->getOffset()));
5632 return;
5633 }
5634 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5635 Ops.push_back(DAG.getTargetBlockAddress(
5636 BA->getBlockAddress(), BA->getValueType(0),
5637 Offset + BA->getOffset(), BA->getTargetFlags()));
5638 return;
5639 }
5640 if (isa<BasicBlockSDNode>(Op)) {
5641 Ops.push_back(Op);
5642 return;
5643 }
5644 }
5645 const unsigned OpCode = Op.getOpcode();
5646 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5647 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5648 Op = Op.getOperand(1);
5649 // Subtraction is not commutative.
5650 else if (OpCode == ISD::ADD &&
5651 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5652 Op = Op.getOperand(0);
5653 else
5654 return;
5655 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5656 continue;
5657 }
5658 return;
5659 }
5660 break;
5661 }
5662 }
5663}
5664
5666 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5667}
5668
5669std::pair<unsigned, const TargetRegisterClass *>
5671 StringRef Constraint,
5672 MVT VT) const {
5673 if (!Constraint.starts_with("{"))
5674 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5675 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5676
5677 // Remove the braces from around the name.
5678 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5679
5680 std::pair<unsigned, const TargetRegisterClass *> R =
5681 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5682
5683 // Figure out which register class contains this reg.
5684 for (const TargetRegisterClass *RC : RI->regclasses()) {
5685 // If none of the value types for this register class are valid, we
5686 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5687 if (!isLegalRC(*RI, *RC))
5688 continue;
5689
5690 for (const MCPhysReg &PR : *RC) {
5691 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5692 std::pair<unsigned, const TargetRegisterClass *> S =
5693 std::make_pair(PR, RC);
5694
5695 // If this register class has the requested value type, return it,
5696 // otherwise keep searching and return the first class found
5697 // if no other is found which explicitly has the requested type.
5698 if (RI->isTypeLegalForClass(*RC, VT))
5699 return S;
5700 if (!R.second)
5701 R = S;
5702 }
5703 }
5704 }
5705
5706 return R;
5707}
5708
5709//===----------------------------------------------------------------------===//
5710// Constraint Selection.
5711
5712/// Return true of this is an input operand that is a matching constraint like
5713/// "4".
5715 assert(!ConstraintCode.empty() && "No known constraint!");
5716 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5717}
5718
5719/// If this is an input matching constraint, this method returns the output
5720/// operand it matches.
5722 assert(!ConstraintCode.empty() && "No known constraint!");
5723 return atoi(ConstraintCode.c_str());
5724}
5725
5726/// Split up the constraint string from the inline assembly value into the
5727/// specific constraints and their prefixes, and also tie in the associated
5728/// operand values.
5729/// If this returns an empty vector, and if the constraint string itself
5730/// isn't empty, there was an error parsing.
5733 const TargetRegisterInfo *TRI,
5734 const CallBase &Call) const {
5735 /// Information about all of the constraints.
5736 AsmOperandInfoVector ConstraintOperands;
5737 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5738 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5739
5740 // Do a prepass over the constraints, canonicalizing them, and building up the
5741 // ConstraintOperands list.
5742 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5743 unsigned ResNo = 0; // ResNo - The result number of the next output.
5744 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5745
5746 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5747 ConstraintOperands.emplace_back(std::move(CI));
5748 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5749
5750 // Update multiple alternative constraint count.
5751 if (OpInfo.multipleAlternatives.size() > maCount)
5752 maCount = OpInfo.multipleAlternatives.size();
5753
5754 OpInfo.ConstraintVT = MVT::Other;
5755
5756 // Compute the value type for each operand.
5757 switch (OpInfo.Type) {
5759 // Indirect outputs just consume an argument.
5760 if (OpInfo.isIndirect) {
5761 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5762 break;
5763 }
5764
5765 // The return value of the call is this value. As such, there is no
5766 // corresponding argument.
5767 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5768 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5769 OpInfo.ConstraintVT =
5770 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5771 .getSimpleVT();
5772 } else {
5773 assert(ResNo == 0 && "Asm only has one result!");
5774 OpInfo.ConstraintVT =
5775 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5776 }
5777 ++ResNo;
5778 break;
5779 case InlineAsm::isInput:
5780 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5781 break;
5782 case InlineAsm::isLabel:
5783 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5784 ++LabelNo;
5785 continue;
5787 // Nothing to do.
5788 break;
5789 }
5790
5791 if (OpInfo.CallOperandVal) {
5792 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5793 if (OpInfo.isIndirect) {
5794 OpTy = Call.getParamElementType(ArgNo);
5795 assert(OpTy && "Indirect operand must have elementtype attribute");
5796 }
5797
5798 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5799 if (StructType *STy = dyn_cast<StructType>(OpTy))
5800 if (STy->getNumElements() == 1)
5801 OpTy = STy->getElementType(0);
5802
5803 // If OpTy is not a single value, it may be a struct/union that we
5804 // can tile with integers.
5805 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5806 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5807 switch (BitSize) {
5808 default: break;
5809 case 1:
5810 case 8:
5811 case 16:
5812 case 32:
5813 case 64:
5814 case 128:
5815 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5816 break;
5817 }
5818 }
5819
5820 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5821 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5822 ArgNo++;
5823 }
5824 }
5825
5826 // If we have multiple alternative constraints, select the best alternative.
5827 if (!ConstraintOperands.empty()) {
5828 if (maCount) {
5829 unsigned bestMAIndex = 0;
5830 int bestWeight = -1;
5831 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5832 int weight = -1;
5833 unsigned maIndex;
5834 // Compute the sums of the weights for each alternative, keeping track
5835 // of the best (highest weight) one so far.
5836 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5837 int weightSum = 0;
5838 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5839 cIndex != eIndex; ++cIndex) {
5840 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5841 if (OpInfo.Type == InlineAsm::isClobber)
5842 continue;
5843
5844 // If this is an output operand with a matching input operand,
5845 // look up the matching input. If their types mismatch, e.g. one
5846 // is an integer, the other is floating point, or their sizes are
5847 // different, flag it as an maCantMatch.
5848 if (OpInfo.hasMatchingInput()) {
5849 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5850 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5851 if ((OpInfo.ConstraintVT.isInteger() !=
5852 Input.ConstraintVT.isInteger()) ||
5853 (OpInfo.ConstraintVT.getSizeInBits() !=
5854 Input.ConstraintVT.getSizeInBits())) {
5855 weightSum = -1; // Can't match.
5856 break;
5857 }
5858 }
5859 }
5860 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5861 if (weight == -1) {
5862 weightSum = -1;
5863 break;
5864 }
5865 weightSum += weight;
5866 }
5867 // Update best.
5868 if (weightSum > bestWeight) {
5869 bestWeight = weightSum;
5870 bestMAIndex = maIndex;
5871 }
5872 }
5873
5874 // Now select chosen alternative in each constraint.
5875 for (AsmOperandInfo &cInfo : ConstraintOperands)
5876 if (cInfo.Type != InlineAsm::isClobber)
5877 cInfo.selectAlternative(bestMAIndex);
5878 }
5879 }
5880
5881 // Check and hook up tied operands, choose constraint code to use.
5882 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5883 cIndex != eIndex; ++cIndex) {
5884 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5885
5886 // If this is an output operand with a matching input operand, look up the
5887 // matching input. If their types mismatch, e.g. one is an integer, the
5888 // other is floating point, or their sizes are different, flag it as an
5889 // error.
5890 if (OpInfo.hasMatchingInput()) {
5891 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5892
5893 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5894 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5895 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5896 OpInfo.ConstraintVT);
5897 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5898 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5899 Input.ConstraintVT);
5900 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5902 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5904 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5905 (MatchRC.second != InputRC.second)) {
5906 report_fatal_error("Unsupported asm: input constraint"
5907 " with a matching output constraint of"
5908 " incompatible type!");
5909 }
5910 }
5911 }
5912 }
5913
5914 return ConstraintOperands;
5915}
5916
5917/// Return a number indicating our preference for chosing a type of constraint
5918/// over another, for the purpose of sorting them. Immediates are almost always
5919/// preferrable (when they can be emitted). A higher return value means a
5920/// stronger preference for one constraint type relative to another.
5921/// FIXME: We should prefer registers over memory but doing so may lead to
5922/// unrecoverable register exhaustion later.
5923/// https://github.com/llvm/llvm-project/issues/20571
5925 switch (CT) {
5928 return 4;
5931 return 3;
5933 return 2;
5935 return 1;
5937 return 0;
5938 }
5939 llvm_unreachable("Invalid constraint type");
5940}
5941
5942/// Examine constraint type and operand type and determine a weight value.
5943/// This object must already have been set up with the operand type
5944/// and the current alternative constraint selected.
5947 AsmOperandInfo &info, int maIndex) const {
5949 if (maIndex >= (int)info.multipleAlternatives.size())
5950 rCodes = &info.Codes;
5951 else
5952 rCodes = &info.multipleAlternatives[maIndex].Codes;
5953 ConstraintWeight BestWeight = CW_Invalid;
5954
5955 // Loop over the options, keeping track of the most general one.
5956 for (const std::string &rCode : *rCodes) {
5957 ConstraintWeight weight =
5958 getSingleConstraintMatchWeight(info, rCode.c_str());
5959 if (weight > BestWeight)
5960 BestWeight = weight;
5961 }
5962
5963 return BestWeight;
5964}
5965
5966/// Examine constraint type and operand type and determine a weight value.
5967/// This object must already have been set up with the operand type
5968/// and the current alternative constraint selected.
5971 AsmOperandInfo &info, const char *constraint) const {
5972 ConstraintWeight weight = CW_Invalid;
5973 Value *CallOperandVal = info.CallOperandVal;
5974 // If we don't have a value, we can't do a match,
5975 // but allow it at the lowest weight.
5976 if (!CallOperandVal)
5977 return CW_Default;
5978 // Look at the constraint type.
5979 switch (*constraint) {
5980 case 'i': // immediate integer.
5981 case 'n': // immediate integer with a known value.
5982 if (isa<ConstantInt>(CallOperandVal))
5983 weight = CW_Constant;
5984 break;
5985 case 's': // non-explicit intregal immediate.
5986 if (isa<GlobalValue>(CallOperandVal))
5987 weight = CW_Constant;
5988 break;
5989 case 'E': // immediate float if host format.
5990 case 'F': // immediate float.
5991 if (isa<ConstantFP>(CallOperandVal))
5992 weight = CW_Constant;
5993 break;
5994 case '<': // memory operand with autodecrement.
5995 case '>': // memory operand with autoincrement.
5996 case 'm': // memory operand.
5997 case 'o': // offsettable memory operand
5998 case 'V': // non-offsettable memory operand
5999 weight = CW_Memory;
6000 break;
6001 case 'r': // general register.
6002 case 'g': // general register, memory operand or immediate integer.
6003 // note: Clang converts "g" to "imr".
6004 if (CallOperandVal->getType()->isIntegerTy())
6005 weight = CW_Register;
6006 break;
6007 case 'X': // any operand.
6008 default:
6009 weight = CW_Default;
6010 break;
6011 }
6012 return weight;
6013}
6014
6015/// If there are multiple different constraints that we could pick for this
6016/// operand (e.g. "imr") try to pick the 'best' one.
6017/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6018/// into seven classes:
6019/// Register -> one specific register
6020/// RegisterClass -> a group of regs
6021/// Memory -> memory
6022/// Address -> a symbolic memory reference
6023/// Immediate -> immediate values
6024/// Other -> magic values (such as "Flag Output Operands")
6025/// Unknown -> something we don't recognize yet and can't handle
6026/// Ideally, we would pick the most specific constraint possible: if we have
6027/// something that fits into a register, we would pick it. The problem here
6028/// is that if we have something that could either be in a register or in
6029/// memory that use of the register could cause selection of *other*
6030/// operands to fail: they might only succeed if we pick memory. Because of
6031/// this the heuristic we use is:
6032///
6033/// 1) If there is an 'other' constraint, and if the operand is valid for
6034/// that constraint, use it. This makes us take advantage of 'i'
6035/// constraints when available.
6036/// 2) Otherwise, pick the most general constraint present. This prefers
6037/// 'm' over 'r', for example.
6038///
6040 TargetLowering::AsmOperandInfo &OpInfo) const {
6041 ConstraintGroup Ret;
6042
6043 Ret.reserve(OpInfo.Codes.size());
6044 for (StringRef Code : OpInfo.Codes) {
6045 TargetLowering::ConstraintType CType = getConstraintType(Code);
6046
6047 // Indirect 'other' or 'immediate' constraints are not allowed.
6048 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6049 CType == TargetLowering::C_Register ||
6051 continue;
6052
6053 // Things with matching constraints can only be registers, per gcc
6054 // documentation. This mainly affects "g" constraints.
6055 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6056 continue;
6057
6058 Ret.emplace_back(Code, CType);
6059 }
6060
6061 std::stable_sort(
6062 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6063 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6064 });
6065
6066 return Ret;
6067}
6068
6069/// If we have an immediate, see if we can lower it. Return true if we can,
6070/// false otherwise.
6072 SDValue Op, SelectionDAG *DAG,
6073 const TargetLowering &TLI) {
6074
6075 assert((P.second == TargetLowering::C_Other ||
6076 P.second == TargetLowering::C_Immediate) &&
6077 "need immediate or other");
6078
6079 if (!Op.getNode())
6080 return false;
6081
6082 std::vector<SDValue> ResultOps;
6083 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6084 return !ResultOps.empty();
6085}
6086
6087/// Determines the constraint code and constraint type to use for the specific
6088/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6090 SDValue Op,
6091 SelectionDAG *DAG) const {
6092 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6093
6094 // Single-letter constraints ('r') are very common.
6095 if (OpInfo.Codes.size() == 1) {
6096 OpInfo.ConstraintCode = OpInfo.Codes[0];
6097 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6098 } else {
6099 ConstraintGroup G = getConstraintPreferences(OpInfo);
6100 if (G.empty())
6101 return;
6102
6103 unsigned BestIdx = 0;
6104 for (const unsigned E = G.size();
6105 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6106 G[BestIdx].second == TargetLowering::C_Immediate);
6107 ++BestIdx) {
6108 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6109 break;
6110 // If we're out of constraints, just pick the first one.
6111 if (BestIdx + 1 == E) {
6112 BestIdx = 0;
6113 break;
6114 }
6115 }
6116
6117 OpInfo.ConstraintCode = G[BestIdx].first;
6118 OpInfo.ConstraintType = G[BestIdx].second;
6119 }
6120
6121 // 'X' matches anything.
6122 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6123 // Constants are handled elsewhere. For Functions, the type here is the
6124 // type of the result, which is not what we want to look at; leave them
6125 // alone.
6126 Value *v = OpInfo.CallOperandVal;
6127 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6128 return;
6129 }
6130
6131 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6132 OpInfo.ConstraintCode = "i";
6133 return;
6134 }
6135
6136 // Otherwise, try to resolve it to something we know about by looking at
6137 // the actual operand type.
6138 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6139 OpInfo.ConstraintCode = Repl;
6140 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6141 }
6142 }
6143}
6144
6145/// Given an exact SDIV by a constant, create a multiplication
6146/// with the multiplicative inverse of the constant.
6147/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6149 const SDLoc &dl, SelectionDAG &DAG,
6150 SmallVectorImpl<SDNode *> &Created) {
6151 SDValue Op0 = N->getOperand(0);
6152 SDValue Op1 = N->getOperand(1);
6153 EVT VT = N->getValueType(0);
6154 EVT SVT = VT.getScalarType();
6155 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6156 EVT ShSVT = ShVT.getScalarType();
6157
6158 bool UseSRA = false;
6159 SmallVector<SDValue, 16> Shifts, Factors;
6160
6161 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6162 if (C->isZero())
6163 return false;
6164 APInt Divisor = C->getAPIntValue();
6165 unsigned Shift = Divisor.countr_zero();
6166 if (Shift) {
6167 Divisor.ashrInPlace(Shift);
6168 UseSRA = true;
6169 }
6170 APInt Factor = Divisor.multiplicativeInverse();
6171 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6172 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6173 return true;
6174 };
6175
6176 // Collect all magic values from the build vector.
6177 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6178 return SDValue();
6179
6180 SDValue Shift, Factor;
6181 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6182 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6183 Factor = DAG.getBuildVector(VT, dl, Factors);
6184 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6185 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6186 "Expected matchUnaryPredicate to return one element for scalable "
6187 "vectors");
6188 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6189 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6190 } else {
6191 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6192 Shift = Shifts[0];
6193 Factor = Factors[0];
6194 }
6195
6196 SDValue Res = Op0;
6197 if (UseSRA) {
6198 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6199 Created.push_back(Res.getNode());
6200 }
6201
6202 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6203}
6204
6205/// Given an exact UDIV by a constant, create a multiplication
6206/// with the multiplicative inverse of the constant.
6207/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6209 const SDLoc &dl, SelectionDAG &DAG,
6210 SmallVectorImpl<SDNode *> &Created) {
6211 EVT VT = N->getValueType(0);
6212 EVT SVT = VT.getScalarType();
6213 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6214 EVT ShSVT = ShVT.getScalarType();
6215
6216 bool UseSRL = false;
6217 SmallVector<SDValue, 16> Shifts, Factors;
6218
6219 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6220 if (C->isZero())
6221 return false;
6222 APInt Divisor = C->getAPIntValue();
6223 unsigned Shift = Divisor.countr_zero();
6224 if (Shift) {
6225 Divisor.lshrInPlace(Shift);
6226 UseSRL = true;
6227 }
6228 // Calculate the multiplicative inverse modulo BW.
6229 APInt Factor = Divisor.multiplicativeInverse();
6230 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6231 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6232 return true;
6233 };
6234
6235 SDValue Op1 = N->getOperand(1);
6236
6237 // Collect all magic values from the build vector.
6238 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6239 return SDValue();
6240
6241 SDValue Shift, Factor;
6242 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6243 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6244 Factor = DAG.getBuildVector(VT, dl, Factors);
6245 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6246 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6247 "Expected matchUnaryPredicate to return one element for scalable "
6248 "vectors");
6249 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6250 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6251 } else {
6252 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6253 Shift = Shifts[0];
6254 Factor = Factors[0];
6255 }
6256
6257 SDValue Res = N->getOperand(0);
6258 if (UseSRL) {
6259 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6260 Created.push_back(Res.getNode());
6261 }
6262
6263 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6264}
6265
6267 SelectionDAG &DAG,
6268 SmallVectorImpl<SDNode *> &Created) const {
6270 if (isIntDivCheap(N->getValueType(0), Attr))
6271 return SDValue(N, 0); // Lower SDIV as SDIV
6272 return SDValue();
6273}
6274
6275SDValue
6277 SelectionDAG &DAG,
6278 SmallVectorImpl<SDNode *> &Created) const {
6280 if (isIntDivCheap(N->getValueType(0), Attr))
6281 return SDValue(N, 0); // Lower SREM as SREM
6282 return SDValue();
6283}
6284
6285/// Build sdiv by power-of-2 with conditional move instructions
6286/// Ref: "Hacker's Delight" by Henry Warren 10-1
6287/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6288/// bgez x, label
6289/// add x, x, 2**k-1
6290/// label:
6291/// sra res, x, k
6292/// neg res, res (when the divisor is negative)
6294 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6295 SmallVectorImpl<SDNode *> &Created) const {
6296 unsigned Lg2 = Divisor.countr_zero();
6297 EVT VT = N->getValueType(0);
6298
6299 SDLoc DL(N);
6300 SDValue N0 = N->getOperand(0);
6301 SDValue Zero = DAG.getConstant(0, DL, VT);
6302 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6303 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6304
6305 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6306 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6307 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6308 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6309 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6310
6311 Created.push_back(Cmp.getNode());
6312 Created.push_back(Add.getNode());
6313 Created.push_back(CMov.getNode());
6314
6315 // Divide by pow2.
6316 SDValue SRA =
6317 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6318
6319 // If we're dividing by a positive value, we're done. Otherwise, we must
6320 // negate the result.
6321 if (Divisor.isNonNegative())
6322 return SRA;
6323
6324 Created.push_back(SRA.getNode());
6325 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6326}
6327
6328/// Given an ISD::SDIV node expressing a divide by constant,
6329/// return a DAG expression to select that will generate the same value by
6330/// multiplying by a magic number.
6331/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6333 bool IsAfterLegalization,
6334 bool IsAfterLegalTypes,
6335 SmallVectorImpl<SDNode *> &Created) const {
6336 SDLoc dl(N);
6337 EVT VT = N->getValueType(0);
6338 EVT SVT = VT.getScalarType();
6339 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6340 EVT ShSVT = ShVT.getScalarType();
6341 unsigned EltBits = VT.getScalarSizeInBits();
6342 EVT MulVT;
6343
6344 // Check to see if we can do this.
6345 // FIXME: We should be more aggressive here.
6346 if (!isTypeLegal(VT)) {
6347 // Limit this to simple scalars for now.
6348 if (VT.isVector() || !VT.isSimple())
6349 return SDValue();
6350
6351 // If this type will be promoted to a large enough type with a legal
6352 // multiply operation, we can go ahead and do this transform.
6354 return SDValue();
6355
6356 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6357 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6358 !isOperationLegal(ISD::MUL, MulVT))
6359 return SDValue();
6360 }
6361
6362 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6363 if (N->getFlags().hasExact())
6364 return BuildExactSDIV(*this, N, dl, DAG, Created);
6365
6366 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6367
6368 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6369 if (C->isZero())
6370 return false;
6371
6372 const APInt &Divisor = C->getAPIntValue();
6374 int NumeratorFactor = 0;
6375 int ShiftMask = -1;
6376
6377 if (Divisor.isOne() || Divisor.isAllOnes()) {
6378 // If d is +1/-1, we just multiply the numerator by +1/-1.
6379 NumeratorFactor = Divisor.getSExtValue();
6380 magics.Magic = 0;
6381 magics.ShiftAmount = 0;
6382 ShiftMask = 0;
6383 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6384 // If d > 0 and m < 0, add the numerator.
6385 NumeratorFactor = 1;
6386 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6387 // If d < 0 and m > 0, subtract the numerator.
6388 NumeratorFactor = -1;
6389 }
6390
6391 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6392 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6393 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6394 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6395 return true;
6396 };
6397
6398 SDValue N0 = N->getOperand(0);
6399 SDValue N1 = N->getOperand(1);
6400
6401 // Collect the shifts / magic values from each element.
6402 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6403 return SDValue();
6404
6405 SDValue MagicFactor, Factor, Shift, ShiftMask;
6406 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6407 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6408 Factor = DAG.getBuildVector(VT, dl, Factors);
6409 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6410 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6411 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6412 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6413 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6414 "Expected matchUnaryPredicate to return one element for scalable "
6415 "vectors");
6416 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6417 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6418 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6419 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6420 } else {
6421 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6422 MagicFactor = MagicFactors[0];
6423 Factor = Factors[0];
6424 Shift = Shifts[0];
6425 ShiftMask = ShiftMasks[0];
6426 }
6427
6428 // Multiply the numerator (operand 0) by the magic value.
6429 // FIXME: We should support doing a MUL in a wider type.
6430 auto GetMULHS = [&](SDValue X, SDValue Y) {
6431 // If the type isn't legal, use a wider mul of the type calculated
6432 // earlier.
6433 if (!isTypeLegal(VT)) {
6434 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6435 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6436 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6437 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6438 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6439 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6440 }
6441
6442 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6443 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6444 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6445 SDValue LoHi =
6446 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6447 return SDValue(LoHi.getNode(), 1);
6448 }
6449 // If type twice as wide legal, widen and use a mul plus a shift.
6450 unsigned Size = VT.getScalarSizeInBits();
6451 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6452 if (VT.isVector())
6453 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6455 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6456 // custom lowered. This is very expensive so avoid it at all costs for
6457 // constant divisors.
6458 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6461 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6462 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6463 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6464 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6465 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6466 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6467 }
6468 return SDValue();
6469 };
6470
6471 SDValue Q = GetMULHS(N0, MagicFactor);
6472 if (!Q)
6473 return SDValue();
6474
6475 Created.push_back(Q.getNode());
6476
6477 // (Optionally) Add/subtract the numerator using Factor.
6478 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6479 Created.push_back(Factor.getNode());
6480 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6481 Created.push_back(Q.getNode());
6482
6483 // Shift right algebraic by shift value.
6484 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6485 Created.push_back(Q.getNode());
6486
6487 // Extract the sign bit, mask it and add it to the quotient.
6488 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6489 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6490 Created.push_back(T.getNode());
6491 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6492 Created.push_back(T.getNode());
6493 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6494}
6495
6496/// Given an ISD::UDIV node expressing a divide by constant,
6497/// return a DAG expression to select that will generate the same value by
6498/// multiplying by a magic number.
6499/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6501 bool IsAfterLegalization,
6502 bool IsAfterLegalTypes,
6503 SmallVectorImpl<SDNode *> &Created) const {
6504 SDLoc dl(N);
6505 EVT VT = N->getValueType(0);
6506 EVT SVT = VT.getScalarType();
6507 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6508 EVT ShSVT = ShVT.getScalarType();
6509 unsigned EltBits = VT.getScalarSizeInBits();
6510 EVT MulVT;
6511
6512 // Check to see if we can do this.
6513 // FIXME: We should be more aggressive here.
6514 if (!isTypeLegal(VT)) {
6515 // Limit this to simple scalars for now.
6516 if (VT.isVector() || !VT.isSimple())
6517 return SDValue();
6518
6519 // If this type will be promoted to a large enough type with a legal
6520 // multiply operation, we can go ahead and do this transform.
6522 return SDValue();
6523
6524 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6525 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6526 !isOperationLegal(ISD::MUL, MulVT))
6527 return SDValue();
6528 }
6529
6530 // If the udiv has an 'exact' bit we can use a simpler lowering.
6531 if (N->getFlags().hasExact())
6532 return BuildExactUDIV(*this, N, dl, DAG, Created);
6533
6534 SDValue N0 = N->getOperand(0);
6535 SDValue N1 = N->getOperand(1);
6536
6537 // Try to use leading zeros of the dividend to reduce the multiplier and
6538 // avoid expensive fixups.
6539 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6540
6541 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6542 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6543
6544 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6545 if (C->isZero())
6546 return false;
6547 const APInt& Divisor = C->getAPIntValue();
6548
6549 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6550
6551 // Magic algorithm doesn't work for division by 1. We need to emit a select
6552 // at the end.
6553 if (Divisor.isOne()) {
6554 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6555 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6556 } else {
6559 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6560
6561 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6562
6563 assert(magics.PreShift < Divisor.getBitWidth() &&
6564 "We shouldn't generate an undefined shift!");
6565 assert(magics.PostShift < Divisor.getBitWidth() &&
6566 "We shouldn't generate an undefined shift!");
6567 assert((!magics.IsAdd || magics.PreShift == 0) &&
6568 "Unexpected pre-shift");
6569 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6570 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6571 NPQFactor = DAG.getConstant(
6572 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6573 : APInt::getZero(EltBits),
6574 dl, SVT);
6575 UseNPQ |= magics.IsAdd;
6576 UsePreShift |= magics.PreShift != 0;
6577 UsePostShift |= magics.PostShift != 0;
6578 }
6579
6580 PreShifts.push_back(PreShift);
6581 MagicFactors.push_back(MagicFactor);
6582 NPQFactors.push_back(NPQFactor);
6583 PostShifts.push_back(PostShift);
6584 return true;
6585 };
6586
6587 // Collect the shifts/magic values from each element.
6588 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6589 return SDValue();
6590
6591 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6592 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6593 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6594 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6595 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6596 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6597 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6598 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6599 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6600 "Expected matchUnaryPredicate to return one for scalable vectors");
6601 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6602 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6603 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6604 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6605 } else {
6606 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6607 PreShift = PreShifts[0];
6608 MagicFactor = MagicFactors[0];
6609 PostShift = PostShifts[0];
6610 }
6611
6612 SDValue Q = N0;
6613 if (UsePreShift) {
6614 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6615 Created.push_back(Q.getNode());
6616 }
6617
6618 // FIXME: We should support doing a MUL in a wider type.
6619 auto GetMULHU = [&](SDValue X, SDValue Y) {
6620 // If the type isn't legal, use a wider mul of the type calculated
6621 // earlier.
6622 if (!isTypeLegal(VT)) {
6623 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6624 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6625 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6626 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6627 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6628 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6629 }
6630
6631 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6632 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6633 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6634 SDValue LoHi =
6635 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6636 return SDValue(LoHi.getNode(), 1);
6637 }
6638 // If type twice as wide legal, widen and use a mul plus a shift.
6639 unsigned Size = VT.getScalarSizeInBits();
6640 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6641 if (VT.isVector())
6642 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6644 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6645 // custom lowered. This is very expensive so avoid it at all costs for
6646 // constant divisors.
6647 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6650 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6651 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6652 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6653 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6654 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6655 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6656 }
6657 return SDValue(); // No mulhu or equivalent
6658 };
6659
6660 // Multiply the numerator (operand 0) by the magic value.
6661 Q = GetMULHU(Q, MagicFactor);
6662 if (!Q)
6663 return SDValue();
6664
6665 Created.push_back(Q.getNode());
6666
6667 if (UseNPQ) {
6668 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6669 Created.push_back(NPQ.getNode());
6670
6671 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6672 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6673 if (VT.isVector())
6674 NPQ = GetMULHU(NPQ, NPQFactor);
6675 else
6676 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6677
6678 Created.push_back(NPQ.getNode());
6679
6680 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6681 Created.push_back(Q.getNode());
6682 }
6683
6684 if (UsePostShift) {
6685 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6686 Created.push_back(Q.getNode());
6687 }
6688
6689 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6690
6691 SDValue One = DAG.getConstant(1, dl, VT);
6692 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6693 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6694}
6695
6696/// If all values in Values that *don't* match the predicate are same 'splat'
6697/// value, then replace all values with that splat value.
6698/// Else, if AlternativeReplacement was provided, then replace all values that
6699/// do match predicate with AlternativeReplacement value.
6700static void
6702 std::function<bool(SDValue)> Predicate,
6703 SDValue AlternativeReplacement = SDValue()) {
6704 SDValue Replacement;
6705 // Is there a value for which the Predicate does *NOT* match? What is it?
6706 auto SplatValue = llvm::find_if_not(Values, Predicate);
6707 if (SplatValue != Values.end()) {
6708 // Does Values consist only of SplatValue's and values matching Predicate?
6709 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6710 return Value == *SplatValue || Predicate(Value);
6711 })) // Then we shall replace values matching predicate with SplatValue.
6712 Replacement = *SplatValue;
6713 }
6714 if (!Replacement) {
6715 // Oops, we did not find the "baseline" splat value.
6716 if (!AlternativeReplacement)
6717 return; // Nothing to do.
6718 // Let's replace with provided value then.
6719 Replacement = AlternativeReplacement;
6720 }
6721 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6722}
6723
6724/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6725/// where the divisor is constant and the comparison target is zero,
6726/// return a DAG expression that will generate the same comparison result
6727/// using only multiplications, additions and shifts/rotations.
6728/// Ref: "Hacker's Delight" 10-17.
6729SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6730 SDValue CompTargetNode,
6732 DAGCombinerInfo &DCI,
6733 const SDLoc &DL) const {
6735 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6736 DCI, DL, Built)) {
6737 for (SDNode *N : Built)
6738 DCI.AddToWorklist(N);
6739 return Folded;
6740 }
6741
6742 return SDValue();
6743}
6744
6745SDValue
6746TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6747 SDValue CompTargetNode, ISD::CondCode Cond,
6748 DAGCombinerInfo &DCI, const SDLoc &DL,
6749 SmallVectorImpl<SDNode *> &Created) const {
6750 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6751 // - D must be constant, with D = D0 * 2^K where D0 is odd
6752 // - P is the multiplicative inverse of D0 modulo 2^W
6753 // - Q = floor(((2^W) - 1) / D)
6754 // where W is the width of the common type of N and D.
6755 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6756 "Only applicable for (in)equality comparisons.");
6757
6758 SelectionDAG &DAG = DCI.DAG;
6759
6760 EVT VT = REMNode.getValueType();
6761 EVT SVT = VT.getScalarType();
6762 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6763 EVT ShSVT = ShVT.getScalarType();
6764
6765 // If MUL is unavailable, we cannot proceed in any case.
6766 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6767 return SDValue();
6768
6769 bool ComparingWithAllZeros = true;
6770 bool AllComparisonsWithNonZerosAreTautological = true;
6771 bool HadTautologicalLanes = false;
6772 bool AllLanesAreTautological = true;
6773 bool HadEvenDivisor = false;
6774 bool AllDivisorsArePowerOfTwo = true;
6775 bool HadTautologicalInvertedLanes = false;
6776 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6777
6778 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6779 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6780 if (CDiv->isZero())
6781 return false;
6782
6783 const APInt &D = CDiv->getAPIntValue();
6784 const APInt &Cmp = CCmp->getAPIntValue();
6785
6786 ComparingWithAllZeros &= Cmp.isZero();
6787
6788 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6789 // if C2 is not less than C1, the comparison is always false.
6790 // But we will only be able to produce the comparison that will give the
6791 // opposive tautological answer. So this lane would need to be fixed up.
6792 bool TautologicalInvertedLane = D.ule(Cmp);
6793 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6794
6795 // If all lanes are tautological (either all divisors are ones, or divisor
6796 // is not greater than the constant we are comparing with),
6797 // we will prefer to avoid the fold.
6798 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6799 HadTautologicalLanes |= TautologicalLane;
6800 AllLanesAreTautological &= TautologicalLane;
6801
6802 // If we are comparing with non-zero, we need'll need to subtract said
6803 // comparison value from the LHS. But there is no point in doing that if
6804 // every lane where we are comparing with non-zero is tautological..
6805 if (!Cmp.isZero())
6806 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6807
6808 // Decompose D into D0 * 2^K
6809 unsigned K = D.countr_zero();
6810 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6811 APInt D0 = D.lshr(K);
6812
6813 // D is even if it has trailing zeros.
6814 HadEvenDivisor |= (K != 0);
6815 // D is a power-of-two if D0 is one.
6816 // If all divisors are power-of-two, we will prefer to avoid the fold.
6817 AllDivisorsArePowerOfTwo &= D0.isOne();
6818
6819 // P = inv(D0, 2^W)
6820 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6821 unsigned W = D.getBitWidth();
6823 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6824
6825 // Q = floor((2^W - 1) u/ D)
6826 // R = ((2^W - 1) u% D)
6827 APInt Q, R;
6829
6830 // If we are comparing with zero, then that comparison constant is okay,
6831 // else it may need to be one less than that.
6832 if (Cmp.ugt(R))
6833 Q -= 1;
6834
6836 "We are expecting that K is always less than all-ones for ShSVT");
6837
6838 // If the lane is tautological the result can be constant-folded.
6839 if (TautologicalLane) {
6840 // Set P and K amount to a bogus values so we can try to splat them.
6841 P = 0;
6842 K = -1;
6843 // And ensure that comparison constant is tautological,
6844 // it will always compare true/false.
6845 Q = -1;
6846 }
6847
6848 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6849 KAmts.push_back(
6850 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6851 /*implicitTrunc=*/true),
6852 DL, ShSVT));
6853 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6854 return true;
6855 };
6856
6857 SDValue N = REMNode.getOperand(0);
6858 SDValue D = REMNode.getOperand(1);
6859
6860 // Collect the values from each element.
6861 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6862 return SDValue();
6863
6864 // If all lanes are tautological, the result can be constant-folded.
6865 if (AllLanesAreTautological)
6866 return SDValue();
6867
6868 // If this is a urem by a powers-of-two, avoid the fold since it can be
6869 // best implemented as a bit test.
6870 if (AllDivisorsArePowerOfTwo)
6871 return SDValue();
6872
6873 SDValue PVal, KVal, QVal;
6874 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6875 if (HadTautologicalLanes) {
6876 // Try to turn PAmts into a splat, since we don't care about the values
6877 // that are currently '0'. If we can't, just keep '0'`s.
6879 // Try to turn KAmts into a splat, since we don't care about the values
6880 // that are currently '-1'. If we can't, change them to '0'`s.
6882 DAG.getConstant(0, DL, ShSVT));
6883 }
6884
6885 PVal = DAG.getBuildVector(VT, DL, PAmts);
6886 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6887 QVal = DAG.getBuildVector(VT, DL, QAmts);
6888 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6889 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6890 "Expected matchBinaryPredicate to return one element for "
6891 "SPLAT_VECTORs");
6892 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6893 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6894 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6895 } else {
6896 PVal = PAmts[0];
6897 KVal = KAmts[0];
6898 QVal = QAmts[0];
6899 }
6900
6901 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6902 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6903 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6904 assert(CompTargetNode.getValueType() == N.getValueType() &&
6905 "Expecting that the types on LHS and RHS of comparisons match.");
6906 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6907 }
6908
6909 // (mul N, P)
6910 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6911 Created.push_back(Op0.getNode());
6912
6913 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6914 // divisors as a performance improvement, since rotating by 0 is a no-op.
6915 if (HadEvenDivisor) {
6916 // We need ROTR to do this.
6917 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6918 return SDValue();
6919 // UREM: (rotr (mul N, P), K)
6920 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6921 Created.push_back(Op0.getNode());
6922 }
6923
6924 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6925 SDValue NewCC =
6926 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6928 if (!HadTautologicalInvertedLanes)
6929 return NewCC;
6930
6931 // If any lanes previously compared always-false, the NewCC will give
6932 // always-true result for them, so we need to fixup those lanes.
6933 // Or the other way around for inequality predicate.
6934 assert(VT.isVector() && "Can/should only get here for vectors.");
6935 Created.push_back(NewCC.getNode());
6936
6937 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6938 // if C2 is not less than C1, the comparison is always false.
6939 // But we have produced the comparison that will give the
6940 // opposive tautological answer. So these lanes would need to be fixed up.
6941 SDValue TautologicalInvertedChannels =
6942 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6943 Created.push_back(TautologicalInvertedChannels.getNode());
6944
6945 // NOTE: we avoid letting illegal types through even if we're before legalize
6946 // ops – legalization has a hard time producing good code for this.
6947 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6948 // If we have a vector select, let's replace the comparison results in the
6949 // affected lanes with the correct tautological result.
6950 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6951 DL, SETCCVT, SETCCVT);
6952 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6953 Replacement, NewCC);
6954 }
6955
6956 // Else, we can just invert the comparison result in the appropriate lanes.
6957 //
6958 // NOTE: see the note above VSELECT above.
6959 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6960 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6961 TautologicalInvertedChannels);
6962
6963 return SDValue(); // Don't know how to lower.
6964}
6965
6966/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6967/// where the divisor is constant and the comparison target is zero,
6968/// return a DAG expression that will generate the same comparison result
6969/// using only multiplications, additions and shifts/rotations.
6970/// Ref: "Hacker's Delight" 10-17.
6971SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6972 SDValue CompTargetNode,
6974 DAGCombinerInfo &DCI,
6975 const SDLoc &DL) const {
6977 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6978 DCI, DL, Built)) {
6979 assert(Built.size() <= 7 && "Max size prediction failed.");
6980 for (SDNode *N : Built)
6981 DCI.AddToWorklist(N);
6982 return Folded;
6983 }
6984
6985 return SDValue();
6986}
6987
6988SDValue
6989TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6990 SDValue CompTargetNode, ISD::CondCode Cond,
6991 DAGCombinerInfo &DCI, const SDLoc &DL,
6992 SmallVectorImpl<SDNode *> &Created) const {
6993 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6994 // Fold:
6995 // (seteq/ne (srem N, D), 0)
6996 // To:
6997 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6998 //
6999 // - D must be constant, with D = D0 * 2^K where D0 is odd
7000 // - P is the multiplicative inverse of D0 modulo 2^W
7001 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7002 // - Q = floor((2 * A) / (2^K))
7003 // where W is the width of the common type of N and D.
7004 //
7005 // When D is a power of two (and thus D0 is 1), the normal
7006 // formula for A and Q don't apply, because the derivation
7007 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7008 // does not apply. This specifically fails when N = INT_MIN.
7009 //
7010 // Instead, for power-of-two D, we use:
7011 // - A = 2^(W-1)
7012 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7013 // - Q = 2^(W-K) - 1
7014 // |-> Test that the top K bits are zero after rotation
7015 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7016 "Only applicable for (in)equality comparisons.");
7017
7018 SelectionDAG &DAG = DCI.DAG;
7019
7020 EVT VT = REMNode.getValueType();
7021 EVT SVT = VT.getScalarType();
7022 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7023 EVT ShSVT = ShVT.getScalarType();
7024
7025 // If we are after ops legalization, and MUL is unavailable, we can not
7026 // proceed.
7027 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7028 return SDValue();
7029
7030 // TODO: Could support comparing with non-zero too.
7031 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7032 if (!CompTarget || !CompTarget->isZero())
7033 return SDValue();
7034
7035 bool HadIntMinDivisor = false;
7036 bool HadOneDivisor = false;
7037 bool AllDivisorsAreOnes = true;
7038 bool HadEvenDivisor = false;
7039 bool NeedToApplyOffset = false;
7040 bool AllDivisorsArePowerOfTwo = true;
7041 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7042
7043 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7044 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7045 if (C->isZero())
7046 return false;
7047
7048 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7049
7050 // WARNING: this fold is only valid for positive divisors!
7051 APInt D = C->getAPIntValue();
7052 if (D.isNegative())
7053 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7054
7055 HadIntMinDivisor |= D.isMinSignedValue();
7056
7057 // If all divisors are ones, we will prefer to avoid the fold.
7058 HadOneDivisor |= D.isOne();
7059 AllDivisorsAreOnes &= D.isOne();
7060
7061 // Decompose D into D0 * 2^K
7062 unsigned K = D.countr_zero();
7063 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7064 APInt D0 = D.lshr(K);
7065
7066 if (!D.isMinSignedValue()) {
7067 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7068 // we don't care about this lane in this fold, we'll special-handle it.
7069 HadEvenDivisor |= (K != 0);
7070 }
7071
7072 // D is a power-of-two if D0 is one. This includes INT_MIN.
7073 // If all divisors are power-of-two, we will prefer to avoid the fold.
7074 AllDivisorsArePowerOfTwo &= D0.isOne();
7075
7076 // P = inv(D0, 2^W)
7077 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7078 unsigned W = D.getBitWidth();
7080 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7081
7082 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7084 A.clearLowBits(K);
7085
7086 if (!D.isMinSignedValue()) {
7087 // If divisor INT_MIN, then we don't care about this lane in this fold,
7088 // we'll special-handle it.
7089 NeedToApplyOffset |= A != 0;
7090 }
7091
7092 // Q = floor((2 * A) / (2^K))
7093 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7094
7096 "We are expecting that A is always less than all-ones for SVT");
7098 "We are expecting that K is always less than all-ones for ShSVT");
7099
7100 // If D was a power of two, apply the alternate constant derivation.
7101 if (D0.isOne()) {
7102 // A = 2^(W-1)
7104 // - Q = 2^(W-K) - 1
7105 Q = APInt::getAllOnes(W - K).zext(W);
7106 }
7107
7108 // If the divisor is 1 the result can be constant-folded. Likewise, we
7109 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7110 if (D.isOne()) {
7111 // Set P, A and K to a bogus values so we can try to splat them.
7112 P = 0;
7113 A = -1;
7114 K = -1;
7115
7116 // x ?% 1 == 0 <--> true <--> x u<= -1
7117 Q = -1;
7118 }
7119
7120 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7121 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7122 KAmts.push_back(
7123 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7124 /*implicitTrunc=*/true),
7125 DL, ShSVT));
7126 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7127 return true;
7128 };
7129
7130 SDValue N = REMNode.getOperand(0);
7131 SDValue D = REMNode.getOperand(1);
7132
7133 // Collect the values from each element.
7134 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7135 return SDValue();
7136
7137 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7138 if (AllDivisorsAreOnes)
7139 return SDValue();
7140
7141 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7142 // since it can be best implemented as a bit test.
7143 if (AllDivisorsArePowerOfTwo)
7144 return SDValue();
7145
7146 SDValue PVal, AVal, KVal, QVal;
7147 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7148 if (HadOneDivisor) {
7149 // Try to turn PAmts into a splat, since we don't care about the values
7150 // that are currently '0'. If we can't, just keep '0'`s.
7152 // Try to turn AAmts into a splat, since we don't care about the
7153 // values that are currently '-1'. If we can't, change them to '0'`s.
7155 DAG.getConstant(0, DL, SVT));
7156 // Try to turn KAmts into a splat, since we don't care about the values
7157 // that are currently '-1'. If we can't, change them to '0'`s.
7159 DAG.getConstant(0, DL, ShSVT));
7160 }
7161
7162 PVal = DAG.getBuildVector(VT, DL, PAmts);
7163 AVal = DAG.getBuildVector(VT, DL, AAmts);
7164 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7165 QVal = DAG.getBuildVector(VT, DL, QAmts);
7166 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7167 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7168 QAmts.size() == 1 &&
7169 "Expected matchUnaryPredicate to return one element for scalable "
7170 "vectors");
7171 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7172 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7173 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7174 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7175 } else {
7176 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7177 PVal = PAmts[0];
7178 AVal = AAmts[0];
7179 KVal = KAmts[0];
7180 QVal = QAmts[0];
7181 }
7182
7183 // (mul N, P)
7184 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7185 Created.push_back(Op0.getNode());
7186
7187 if (NeedToApplyOffset) {
7188 // We need ADD to do this.
7189 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7190 return SDValue();
7191
7192 // (add (mul N, P), A)
7193 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7194 Created.push_back(Op0.getNode());
7195 }
7196
7197 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7198 // divisors as a performance improvement, since rotating by 0 is a no-op.
7199 if (HadEvenDivisor) {
7200 // We need ROTR to do this.
7201 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7202 return SDValue();
7203 // SREM: (rotr (add (mul N, P), A), K)
7204 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7205 Created.push_back(Op0.getNode());
7206 }
7207
7208 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7209 SDValue Fold =
7210 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7212
7213 // If we didn't have lanes with INT_MIN divisor, then we're done.
7214 if (!HadIntMinDivisor)
7215 return Fold;
7216
7217 // That fold is only valid for positive divisors. Which effectively means,
7218 // it is invalid for INT_MIN divisors. So if we have such a lane,
7219 // we must fix-up results for said lanes.
7220 assert(VT.isVector() && "Can/should only get here for vectors.");
7221
7222 // NOTE: we avoid letting illegal types through even if we're before legalize
7223 // ops – legalization has a hard time producing good code for the code that
7224 // follows.
7225 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7229 return SDValue();
7230
7231 Created.push_back(Fold.getNode());
7232
7233 SDValue IntMin = DAG.getConstant(
7235 SDValue IntMax = DAG.getConstant(
7237 SDValue Zero =
7239
7240 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7241 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7242 Created.push_back(DivisorIsIntMin.getNode());
7243
7244 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7245 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7246 Created.push_back(Masked.getNode());
7247 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7248 Created.push_back(MaskedIsZero.getNode());
7249
7250 // To produce final result we need to blend 2 vectors: 'SetCC' and
7251 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7252 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7253 // constant-folded, select can get lowered to a shuffle with constant mask.
7254 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7255 MaskedIsZero, Fold);
7256
7257 return Blended;
7258}
7259
7262 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7263 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7264 "be a constant integer");
7265 return true;
7266 }
7267
7268 return false;
7269}
7270
7272 const DenormalMode &Mode) const {
7273 SDLoc DL(Op);
7274 EVT VT = Op.getValueType();
7275 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7276 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7277
7278 // This is specifically a check for the handling of denormal inputs, not the
7279 // result.
7280 if (Mode.Input == DenormalMode::PreserveSign ||
7281 Mode.Input == DenormalMode::PositiveZero) {
7282 // Test = X == 0.0
7283 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7284 }
7285
7286 // Testing it with denormal inputs to avoid wrong estimate.
7287 //
7288 // Test = fabs(X) < SmallestNormal
7289 const fltSemantics &FltSem = VT.getFltSemantics();
7290 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7291 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7292 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7293 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7294}
7295
7297 bool LegalOps, bool OptForSize,
7299 unsigned Depth) const {
7300 // fneg is removable even if it has multiple uses.
7301 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7303 return Op.getOperand(0);
7304 }
7305
7306 // Don't recurse exponentially.
7308 return SDValue();
7309
7310 // Pre-increment recursion depth for use in recursive calls.
7311 ++Depth;
7312 const SDNodeFlags Flags = Op->getFlags();
7313 const TargetOptions &Options = DAG.getTarget().Options;
7314 EVT VT = Op.getValueType();
7315 unsigned Opcode = Op.getOpcode();
7316
7317 // Don't allow anything with multiple uses unless we know it is free.
7318 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7319 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7320 isFPExtFree(VT, Op.getOperand(0).getValueType());
7321 if (!IsFreeExtend)
7322 return SDValue();
7323 }
7324
7325 auto RemoveDeadNode = [&](SDValue N) {
7326 if (N && N.getNode()->use_empty())
7327 DAG.RemoveDeadNode(N.getNode());
7328 };
7329
7330 SDLoc DL(Op);
7331
7332 // Because getNegatedExpression can delete nodes we need a handle to keep
7333 // temporary nodes alive in case the recursion manages to create an identical
7334 // node.
7335 std::list<HandleSDNode> Handles;
7336
7337 switch (Opcode) {
7338 case ISD::ConstantFP: {
7339 // Don't invert constant FP values after legalization unless the target says
7340 // the negated constant is legal.
7341 bool IsOpLegal =
7343 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7344 OptForSize);
7345
7346 if (LegalOps && !IsOpLegal)
7347 break;
7348
7349 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7350 V.changeSign();
7351 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7352
7353 // If we already have the use of the negated floating constant, it is free
7354 // to negate it even it has multiple uses.
7355 if (!Op.hasOneUse() && CFP.use_empty())
7356 break;
7358 return CFP;
7359 }
7360 case ISD::BUILD_VECTOR: {
7361 // Only permit BUILD_VECTOR of constants.
7362 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7363 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7364 }))
7365 break;
7366
7367 bool IsOpLegal =
7370 llvm::all_of(Op->op_values(), [&](SDValue N) {
7371 return N.isUndef() ||
7372 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7373 OptForSize);
7374 });
7375
7376 if (LegalOps && !IsOpLegal)
7377 break;
7378
7380 for (SDValue C : Op->op_values()) {
7381 if (C.isUndef()) {
7382 Ops.push_back(C);
7383 continue;
7384 }
7385 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7386 V.changeSign();
7387 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7388 }
7390 return DAG.getBuildVector(VT, DL, Ops);
7391 }
7392 case ISD::FADD: {
7393 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7394 break;
7395
7396 // After operation legalization, it might not be legal to create new FSUBs.
7397 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7398 break;
7399 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7400
7401 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7403 SDValue NegX =
7404 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7405 // Prevent this node from being deleted by the next call.
7406 if (NegX)
7407 Handles.emplace_back(NegX);
7408
7409 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7411 SDValue NegY =
7412 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7413
7414 // We're done with the handles.
7415 Handles.clear();
7416
7417 // Negate the X if its cost is less or equal than Y.
7418 if (NegX && (CostX <= CostY)) {
7419 Cost = CostX;
7420 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7421 if (NegY != N)
7422 RemoveDeadNode(NegY);
7423 return N;
7424 }
7425
7426 // Negate the Y if it is not expensive.
7427 if (NegY) {
7428 Cost = CostY;
7429 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7430 if (NegX != N)
7431 RemoveDeadNode(NegX);
7432 return N;
7433 }
7434 break;
7435 }
7436 case ISD::FSUB: {
7437 // We can't turn -(A-B) into B-A when we honor signed zeros.
7438 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7439 break;
7440
7441 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7442 // fold (fneg (fsub 0, Y)) -> Y
7443 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7444 if (C->isZero()) {
7446 return Y;
7447 }
7448
7449 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7451 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7452 }
7453 case ISD::FMUL:
7454 case ISD::FDIV: {
7455 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7456
7457 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7459 SDValue NegX =
7460 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7461 // Prevent this node from being deleted by the next call.
7462 if (NegX)
7463 Handles.emplace_back(NegX);
7464
7465 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7467 SDValue NegY =
7468 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7469
7470 // We're done with the handles.
7471 Handles.clear();
7472
7473 // Negate the X if its cost is less or equal than Y.
7474 if (NegX && (CostX <= CostY)) {
7475 Cost = CostX;
7476 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7477 if (NegY != N)
7478 RemoveDeadNode(NegY);
7479 return N;
7480 }
7481
7482 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7483 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7484 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7485 break;
7486
7487 // Negate the Y if it is not expensive.
7488 if (NegY) {
7489 Cost = CostY;
7490 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7491 if (NegX != N)
7492 RemoveDeadNode(NegX);
7493 return N;
7494 }
7495 break;
7496 }
7497 case ISD::FMA:
7498 case ISD::FMAD: {
7499 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7500 break;
7501
7502 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7504 SDValue NegZ =
7505 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7506 // Give up if fail to negate the Z.
7507 if (!NegZ)
7508 break;
7509
7510 // Prevent this node from being deleted by the next two calls.
7511 Handles.emplace_back(NegZ);
7512
7513 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7515 SDValue NegX =
7516 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7517 // Prevent this node from being deleted by the next call.
7518 if (NegX)
7519 Handles.emplace_back(NegX);
7520
7521 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7523 SDValue NegY =
7524 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7525
7526 // We're done with the handles.
7527 Handles.clear();
7528
7529 // Negate the X if its cost is less or equal than Y.
7530 if (NegX && (CostX <= CostY)) {
7531 Cost = std::min(CostX, CostZ);
7532 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7533 if (NegY != N)
7534 RemoveDeadNode(NegY);
7535 return N;
7536 }
7537
7538 // Negate the Y if it is not expensive.
7539 if (NegY) {
7540 Cost = std::min(CostY, CostZ);
7541 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7542 if (NegX != N)
7543 RemoveDeadNode(NegX);
7544 return N;
7545 }
7546 break;
7547 }
7548
7549 case ISD::FP_EXTEND:
7550 case ISD::FSIN:
7551 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7552 OptForSize, Cost, Depth))
7553 return DAG.getNode(Opcode, DL, VT, NegV);
7554 break;
7555 case ISD::FP_ROUND:
7556 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7557 OptForSize, Cost, Depth))
7558 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7559 break;
7560 case ISD::SELECT:
7561 case ISD::VSELECT: {
7562 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7563 // iff at least one cost is cheaper and the other is neutral/cheaper
7564 SDValue LHS = Op.getOperand(1);
7566 SDValue NegLHS =
7567 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7568 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7569 RemoveDeadNode(NegLHS);
7570 break;
7571 }
7572
7573 // Prevent this node from being deleted by the next call.
7574 Handles.emplace_back(NegLHS);
7575
7576 SDValue RHS = Op.getOperand(2);
7578 SDValue NegRHS =
7579 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7580
7581 // We're done with the handles.
7582 Handles.clear();
7583
7584 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7585 (CostLHS != NegatibleCost::Cheaper &&
7586 CostRHS != NegatibleCost::Cheaper)) {
7587 RemoveDeadNode(NegLHS);
7588 RemoveDeadNode(NegRHS);
7589 break;
7590 }
7591
7592 Cost = std::min(CostLHS, CostRHS);
7593 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7594 }
7595 }
7596
7597 return SDValue();
7598}
7599
7600//===----------------------------------------------------------------------===//
7601// Legalization Utilities
7602//===----------------------------------------------------------------------===//
7603
7604bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7605 SDValue LHS, SDValue RHS,
7607 EVT HiLoVT, SelectionDAG &DAG,
7608 MulExpansionKind Kind, SDValue LL,
7609 SDValue LH, SDValue RL, SDValue RH) const {
7610 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7611 Opcode == ISD::SMUL_LOHI);
7612
7613 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7615 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7617 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7619 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7621
7622 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7623 return false;
7624
7625 unsigned OuterBitSize = VT.getScalarSizeInBits();
7626 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7627
7628 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7629 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7630 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7631
7632 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7633 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7634 bool Signed) -> bool {
7635 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7636 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7637 Hi = SDValue(Lo.getNode(), 1);
7638 return true;
7639 }
7640 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7641 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7642 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7643 return true;
7644 }
7645 return false;
7646 };
7647
7648 SDValue Lo, Hi;
7649
7650 if (!LL.getNode() && !RL.getNode() &&
7652 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7653 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7654 }
7655
7656 if (!LL.getNode())
7657 return false;
7658
7659 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7660 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7661 DAG.MaskedValueIsZero(RHS, HighMask)) {
7662 // The inputs are both zero-extended.
7663 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7664 Result.push_back(Lo);
7665 Result.push_back(Hi);
7666 if (Opcode != ISD::MUL) {
7667 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7668 Result.push_back(Zero);
7669 Result.push_back(Zero);
7670 }
7671 return true;
7672 }
7673 }
7674
7675 if (!VT.isVector() && Opcode == ISD::MUL &&
7676 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7677 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7678 // The input values are both sign-extended.
7679 // TODO non-MUL case?
7680 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7681 Result.push_back(Lo);
7682 Result.push_back(Hi);
7683 return true;
7684 }
7685 }
7686
7687 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7688 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7689
7690 if (!LH.getNode() && !RH.getNode() &&
7693 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7694 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7695 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7696 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7697 }
7698
7699 if (!LH.getNode())
7700 return false;
7701
7702 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7703 return false;
7704
7705 Result.push_back(Lo);
7706
7707 if (Opcode == ISD::MUL) {
7708 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7709 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7710 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7711 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7712 Result.push_back(Hi);
7713 return true;
7714 }
7715
7716 // Compute the full width result.
7717 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7718 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7719 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7720 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7721 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7722 };
7723
7724 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7725 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7726 return false;
7727
7728 // This is effectively the add part of a multiply-add of half-sized operands,
7729 // so it cannot overflow.
7730 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7731
7732 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7733 return false;
7734
7735 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7736 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7737
7738 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7740 if (UseGlue)
7741 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7742 Merge(Lo, Hi));
7743 else
7744 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7745 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7746
7747 SDValue Carry = Next.getValue(1);
7748 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7749 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7750
7751 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7752 return false;
7753
7754 if (UseGlue)
7755 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7756 Carry);
7757 else
7758 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7759 Zero, Carry);
7760
7761 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7762
7763 if (Opcode == ISD::SMUL_LOHI) {
7764 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7765 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7766 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7767
7768 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7769 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7770 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7771 }
7772
7773 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7774 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7775 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7776 return true;
7777}
7778
7780 SelectionDAG &DAG, MulExpansionKind Kind,
7781 SDValue LL, SDValue LH, SDValue RL,
7782 SDValue RH) const {
7784 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7785 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7786 DAG, Kind, LL, LH, RL, RH);
7787 if (Ok) {
7788 assert(Result.size() == 2);
7789 Lo = Result[0];
7790 Hi = Result[1];
7791 }
7792 return Ok;
7793}
7794
7795// Optimize unsigned division or remainder by constants for types twice as large
7796// as a legal VT.
7797//
7798// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7799// can be computed
7800// as:
7801// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7802// Remainder = Sum % Constant
7803// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7804//
7805// For division, we can compute the remainder using the algorithm described
7806// above, subtract it from the dividend to get an exact multiple of Constant.
7807// Then multiply that exact multiply by the multiplicative inverse modulo
7808// (1 << (BitWidth / 2)) to get the quotient.
7809
7810// If Constant is even, we can shift right the dividend and the divisor by the
7811// number of trailing zeros in Constant before applying the remainder algorithm.
7812// If we're after the quotient, we can subtract this value from the shifted
7813// dividend and multiply by the multiplicative inverse of the shifted divisor.
7814// If we want the remainder, we shift the value left by the number of trailing
7815// zeros and add the bits that were shifted out of the dividend.
7818 EVT HiLoVT, SelectionDAG &DAG,
7819 SDValue LL, SDValue LH) const {
7820 unsigned Opcode = N->getOpcode();
7821 EVT VT = N->getValueType(0);
7822
7823 // TODO: Support signed division/remainder.
7824 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7825 return false;
7826 assert(
7827 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7828 "Unexpected opcode");
7829
7830 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7831 if (!CN)
7832 return false;
7833
7834 APInt Divisor = CN->getAPIntValue();
7835 unsigned BitWidth = Divisor.getBitWidth();
7836 unsigned HBitWidth = BitWidth / 2;
7838 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7839
7840 // Divisor needs to less than (1 << HBitWidth).
7841 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7842 if (Divisor.uge(HalfMaxPlus1))
7843 return false;
7844
7845 // We depend on the UREM by constant optimization in DAGCombiner that requires
7846 // high multiply.
7847 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7849 return false;
7850
7851 // Don't expand if optimizing for size.
7852 if (DAG.shouldOptForSize())
7853 return false;
7854
7855 // Early out for 0 or 1 divisors.
7856 if (Divisor.ule(1))
7857 return false;
7858
7859 // If the divisor is even, shift it until it becomes odd.
7860 unsigned TrailingZeros = 0;
7861 if (!Divisor[0]) {
7862 TrailingZeros = Divisor.countr_zero();
7863 Divisor.lshrInPlace(TrailingZeros);
7864 }
7865
7866 SDLoc dl(N);
7867 SDValue Sum;
7868 SDValue PartialRem;
7869
7870 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7871 // then add in the carry.
7872 // TODO: If we can't split it in half, we might be able to split into 3 or
7873 // more pieces using a smaller bit width.
7874 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7875 assert(!LL == !LH && "Expected both input halves or no input halves!");
7876 if (!LL)
7877 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7878
7879 // Shift the input by the number of TrailingZeros in the divisor. The
7880 // shifted out bits will be added to the remainder later.
7881 if (TrailingZeros) {
7882 // Save the shifted off bits if we need the remainder.
7883 if (Opcode != ISD::UDIV) {
7884 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7885 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7886 DAG.getConstant(Mask, dl, HiLoVT));
7887 }
7888
7889 LL = DAG.getNode(
7890 ISD::OR, dl, HiLoVT,
7891 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7892 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7893 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7894 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7895 HiLoVT, dl)));
7896 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7897 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7898 }
7899
7900 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7901 EVT SetCCType =
7902 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7904 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7905 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7906 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7907 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7908 } else {
7909 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7910 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7911 // If the boolean for the target is 0 or 1, we can add the setcc result
7912 // directly.
7913 if (getBooleanContents(HiLoVT) ==
7915 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7916 else
7917 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7918 DAG.getConstant(0, dl, HiLoVT));
7919 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7920 }
7921 }
7922
7923 // If we didn't find a sum, we can't do the expansion.
7924 if (!Sum)
7925 return false;
7926
7927 // Perform a HiLoVT urem on the Sum using truncated divisor.
7928 SDValue RemL =
7929 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7930 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7931 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7932
7933 if (Opcode != ISD::UREM) {
7934 // Subtract the remainder from the shifted dividend.
7935 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7936 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7937
7938 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7939
7940 // Multiply by the multiplicative inverse of the divisor modulo
7941 // (1 << BitWidth).
7942 APInt MulFactor = Divisor.multiplicativeInverse();
7943
7944 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7945 DAG.getConstant(MulFactor, dl, VT));
7946
7947 // Split the quotient into low and high parts.
7948 SDValue QuotL, QuotH;
7949 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7950 Result.push_back(QuotL);
7951 Result.push_back(QuotH);
7952 }
7953
7954 if (Opcode != ISD::UDIV) {
7955 // If we shifted the input, shift the remainder left and add the bits we
7956 // shifted off the input.
7957 if (TrailingZeros) {
7958 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7959 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7960 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7961 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7962 }
7963 Result.push_back(RemL);
7964 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7965 }
7966
7967 return true;
7968}
7969
7970// Check that (every element of) Z is undef or not an exact multiple of BW.
7971static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7973 Z,
7974 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7975 true);
7976}
7977
7979 EVT VT = Node->getValueType(0);
7980 SDValue ShX, ShY;
7981 SDValue ShAmt, InvShAmt;
7982 SDValue X = Node->getOperand(0);
7983 SDValue Y = Node->getOperand(1);
7984 SDValue Z = Node->getOperand(2);
7985 SDValue Mask = Node->getOperand(3);
7986 SDValue VL = Node->getOperand(4);
7987
7988 unsigned BW = VT.getScalarSizeInBits();
7989 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7990 SDLoc DL(SDValue(Node, 0));
7991
7992 EVT ShVT = Z.getValueType();
7993 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7994 // fshl: X << C | Y >> (BW - C)
7995 // fshr: X << (BW - C) | Y >> C
7996 // where C = Z % BW is not zero
7997 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7998 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7999 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8000 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8001 VL);
8002 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8003 VL);
8004 } else {
8005 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8006 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8007 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8008 if (isPowerOf2_32(BW)) {
8009 // Z % BW -> Z & (BW - 1)
8010 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8011 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8012 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8013 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8014 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8015 } else {
8016 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8017 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8018 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8019 }
8020
8021 SDValue One = DAG.getConstant(1, DL, ShVT);
8022 if (IsFSHL) {
8023 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8024 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8025 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8026 } else {
8027 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8028 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8029 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8030 }
8031 }
8032 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8033}
8034
8036 SelectionDAG &DAG) const {
8037 if (Node->isVPOpcode())
8038 return expandVPFunnelShift(Node, DAG);
8039
8040 EVT VT = Node->getValueType(0);
8041
8042 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8046 return SDValue();
8047
8048 SDValue X = Node->getOperand(0);
8049 SDValue Y = Node->getOperand(1);
8050 SDValue Z = Node->getOperand(2);
8051
8052 unsigned BW = VT.getScalarSizeInBits();
8053 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8054 SDLoc DL(SDValue(Node, 0));
8055
8056 EVT ShVT = Z.getValueType();
8057
8058 // If a funnel shift in the other direction is more supported, use it.
8059 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8060 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8061 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8062 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8063 // fshl X, Y, Z -> fshr X, Y, -Z
8064 // fshr X, Y, Z -> fshl X, Y, -Z
8065 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8066 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8067 } else {
8068 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8069 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8070 SDValue One = DAG.getConstant(1, DL, ShVT);
8071 if (IsFSHL) {
8072 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8073 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8074 } else {
8075 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8076 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8077 }
8078 Z = DAG.getNOT(DL, Z, ShVT);
8079 }
8080 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8081 }
8082
8083 SDValue ShX, ShY;
8084 SDValue ShAmt, InvShAmt;
8085 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8086 // fshl: X << C | Y >> (BW - C)
8087 // fshr: X << (BW - C) | Y >> C
8088 // where C = Z % BW is not zero
8089 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8090 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8091 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8092 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8093 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8094 } else {
8095 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8096 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8097 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8098 if (isPowerOf2_32(BW)) {
8099 // Z % BW -> Z & (BW - 1)
8100 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8101 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8102 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8103 } else {
8104 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8105 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8106 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8107 }
8108
8109 SDValue One = DAG.getConstant(1, DL, ShVT);
8110 if (IsFSHL) {
8111 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8112 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8113 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8114 } else {
8115 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8116 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8117 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8118 }
8119 }
8120 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8121}
8122
8123// TODO: Merge with expandFunnelShift.
8124SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8125 SelectionDAG &DAG) const {
8126 EVT VT = Node->getValueType(0);
8127 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8128 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8129 SDValue Op0 = Node->getOperand(0);
8130 SDValue Op1 = Node->getOperand(1);
8131 SDLoc DL(SDValue(Node, 0));
8132
8133 EVT ShVT = Op1.getValueType();
8134 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8135
8136 // If a rotate in the other direction is more supported, use it.
8137 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8138 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8139 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8140 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8141 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8142 }
8143
8144 if (!AllowVectorOps && VT.isVector() &&
8150 return SDValue();
8151
8152 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8153 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8154 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8155 SDValue ShVal;
8156 SDValue HsVal;
8157 if (isPowerOf2_32(EltSizeInBits)) {
8158 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8159 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8160 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8161 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8162 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8163 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8164 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8165 } else {
8166 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8167 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8168 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8169 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8170 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8171 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8172 SDValue One = DAG.getConstant(1, DL, ShVT);
8173 HsVal =
8174 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8175 }
8176 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8177}
8178
8180 SelectionDAG &DAG) const {
8181 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8182 EVT VT = Node->getValueType(0);
8183 unsigned VTBits = VT.getScalarSizeInBits();
8184 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8185
8186 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8187 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8188 SDValue ShOpLo = Node->getOperand(0);
8189 SDValue ShOpHi = Node->getOperand(1);
8190 SDValue ShAmt = Node->getOperand(2);
8191 EVT ShAmtVT = ShAmt.getValueType();
8192 EVT ShAmtCCVT =
8193 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8194 SDLoc dl(Node);
8195
8196 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8197 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8198 // away during isel.
8199 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8200 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8201 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8202 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8203 : DAG.getConstant(0, dl, VT);
8204
8205 SDValue Tmp2, Tmp3;
8206 if (IsSHL) {
8207 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8208 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8209 } else {
8210 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8211 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8212 }
8213
8214 // If the shift amount is larger or equal than the width of a part we don't
8215 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8216 // values for large shift amounts.
8217 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8218 DAG.getConstant(VTBits, dl, ShAmtVT));
8219 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8220 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8221
8222 if (IsSHL) {
8223 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8224 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8225 } else {
8226 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8227 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8228 }
8229}
8230
8232 SelectionDAG &DAG) const {
8233 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8234 SDValue Src = Node->getOperand(OpNo);
8235 EVT SrcVT = Src.getValueType();
8236 EVT DstVT = Node->getValueType(0);
8237 SDLoc dl(SDValue(Node, 0));
8238
8239 // FIXME: Only f32 to i64 conversions are supported.
8240 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8241 return false;
8242
8243 if (Node->isStrictFPOpcode())
8244 // When a NaN is converted to an integer a trap is allowed. We can't
8245 // use this expansion here because it would eliminate that trap. Other
8246 // traps are also allowed and cannot be eliminated. See
8247 // IEEE 754-2008 sec 5.8.
8248 return false;
8249
8250 // Expand f32 -> i64 conversion
8251 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8252 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8253 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8254 EVT IntVT = SrcVT.changeTypeToInteger();
8255 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8256
8257 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8258 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8259 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8260 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8261 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8262 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8263
8264 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8265
8266 SDValue ExponentBits = DAG.getNode(
8267 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8268 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8269 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8270
8271 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8272 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8273 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8274 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8275
8276 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8277 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8278 DAG.getConstant(0x00800000, dl, IntVT));
8279
8280 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8281
8282 R = DAG.getSelectCC(
8283 dl, Exponent, ExponentLoBit,
8284 DAG.getNode(ISD::SHL, dl, DstVT, R,
8285 DAG.getZExtOrTrunc(
8286 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8287 dl, IntShVT)),
8288 DAG.getNode(ISD::SRL, dl, DstVT, R,
8289 DAG.getZExtOrTrunc(
8290 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8291 dl, IntShVT)),
8292 ISD::SETGT);
8293
8294 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8295 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8296
8297 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8298 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8299 return true;
8300}
8301
8303 SDValue &Chain,
8304 SelectionDAG &DAG) const {
8305 SDLoc dl(SDValue(Node, 0));
8306 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8307 SDValue Src = Node->getOperand(OpNo);
8308
8309 EVT SrcVT = Src.getValueType();
8310 EVT DstVT = Node->getValueType(0);
8311 EVT SetCCVT =
8312 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8313 EVT DstSetCCVT =
8314 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8315
8316 // Only expand vector types if we have the appropriate vector bit operations.
8317 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8319 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8321 return false;
8322
8323 // If the maximum float value is smaller then the signed integer range,
8324 // the destination signmask can't be represented by the float, so we can
8325 // just use FP_TO_SINT directly.
8326 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8327 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8328 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8330 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8331 if (Node->isStrictFPOpcode()) {
8332 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8333 { Node->getOperand(0), Src });
8334 Chain = Result.getValue(1);
8335 } else
8336 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8337 return true;
8338 }
8339
8340 // Don't expand it if there isn't cheap fsub instruction.
8342 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8343 return false;
8344
8345 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8346 SDValue Sel;
8347
8348 if (Node->isStrictFPOpcode()) {
8349 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8350 Node->getOperand(0), /*IsSignaling*/ true);
8351 Chain = Sel.getValue(1);
8352 } else {
8353 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8354 }
8355
8356 bool Strict = Node->isStrictFPOpcode() ||
8357 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8358
8359 if (Strict) {
8360 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8361 // signmask then offset (the result of which should be fully representable).
8362 // Sel = Src < 0x8000000000000000
8363 // FltOfs = select Sel, 0, 0x8000000000000000
8364 // IntOfs = select Sel, 0, 0x8000000000000000
8365 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8366
8367 // TODO: Should any fast-math-flags be set for the FSUB?
8368 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8369 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8370 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8371 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8372 DAG.getConstant(0, dl, DstVT),
8373 DAG.getConstant(SignMask, dl, DstVT));
8374 SDValue SInt;
8375 if (Node->isStrictFPOpcode()) {
8376 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8377 { Chain, Src, FltOfs });
8378 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8379 { Val.getValue(1), Val });
8380 Chain = SInt.getValue(1);
8381 } else {
8382 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8383 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8384 }
8385 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8386 } else {
8387 // Expand based on maximum range of FP_TO_SINT:
8388 // True = fp_to_sint(Src)
8389 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8390 // Result = select (Src < 0x8000000000000000), True, False
8391
8392 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8393 // TODO: Should any fast-math-flags be set for the FSUB?
8394 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8395 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8396 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8397 DAG.getConstant(SignMask, dl, DstVT));
8398 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8399 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8400 }
8401 return true;
8402}
8403
8405 SDValue &Chain, SelectionDAG &DAG) const {
8406 // This transform is not correct for converting 0 when rounding mode is set
8407 // to round toward negative infinity which will produce -0.0. So disable
8408 // under strictfp.
8409 if (Node->isStrictFPOpcode())
8410 return false;
8411
8412 SDValue Src = Node->getOperand(0);
8413 EVT SrcVT = Src.getValueType();
8414 EVT DstVT = Node->getValueType(0);
8415
8416 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8417 // it.
8418 if (Node->getFlags().hasNonNeg() &&
8420 Result =
8421 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8422 return true;
8423 }
8424
8425 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8426 return false;
8427
8428 // Only expand vector types if we have the appropriate vector bit
8429 // operations.
8430 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8435 return false;
8436
8437 SDLoc dl(SDValue(Node, 0));
8438 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8439
8440 // Implementation of unsigned i64 to f64 following the algorithm in
8441 // __floatundidf in compiler_rt. This implementation performs rounding
8442 // correctly in all rounding modes with the exception of converting 0
8443 // when rounding toward negative infinity. In that case the fsub will
8444 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8445 // incorrect.
8446 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8447 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8448 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8449 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8450 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8451 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8452
8453 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8454 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8455 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8456 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8457 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8458 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8459 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8460 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8461 return true;
8462}
8463
8464SDValue
8466 SelectionDAG &DAG) const {
8467 unsigned Opcode = Node->getOpcode();
8468 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8469 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8470 "Wrong opcode");
8471
8472 if (Node->getFlags().hasNoNaNs()) {
8473 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8474 EVT VT = Node->getValueType(0);
8475 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8477 VT.isVector())
8478 return SDValue();
8479 SDValue Op1 = Node->getOperand(0);
8480 SDValue Op2 = Node->getOperand(1);
8481 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8482 // Copy FMF flags, but always set the no-signed-zeros flag
8483 // as this is implied by the FMINNUM/FMAXNUM semantics.
8484 SelCC->setFlags(Node->getFlags() | SDNodeFlags::NoSignedZeros);
8485 return SelCC;
8486 }
8487
8488 return SDValue();
8489}
8490
8492 SelectionDAG &DAG) const {
8493 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8494 return Expanded;
8495
8496 EVT VT = Node->getValueType(0);
8497 if (VT.isScalableVector())
8499 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8500
8501 SDLoc dl(Node);
8502 unsigned NewOp =
8503 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8504
8505 if (isOperationLegalOrCustom(NewOp, VT)) {
8506 SDValue Quiet0 = Node->getOperand(0);
8507 SDValue Quiet1 = Node->getOperand(1);
8508
8509 if (!Node->getFlags().hasNoNaNs()) {
8510 // Insert canonicalizes if it's possible we need to quiet to get correct
8511 // sNaN behavior.
8512 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8513 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8514 Node->getFlags());
8515 }
8516 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8517 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8518 Node->getFlags());
8519 }
8520 }
8521
8522 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8523 }
8524
8525 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8526 // instead if there are no NaNs and there can't be an incompatible zero
8527 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8528 if ((Node->getFlags().hasNoNaNs() ||
8529 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8530 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8531 (Node->getFlags().hasNoSignedZeros() ||
8532 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8533 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8534 unsigned IEEE2018Op =
8535 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8536 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8537 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8538 Node->getOperand(1), Node->getFlags());
8539 }
8540
8541 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8542 return SelCC;
8543
8544 return SDValue();
8545}
8546
8548 SelectionDAG &DAG) const {
8549 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8550 return Expanded;
8551
8552 SDLoc DL(N);
8553 SDValue LHS = N->getOperand(0);
8554 SDValue RHS = N->getOperand(1);
8555 unsigned Opc = N->getOpcode();
8556 EVT VT = N->getValueType(0);
8557 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8558 bool IsMax = Opc == ISD::FMAXIMUM;
8559 SDNodeFlags Flags = N->getFlags();
8560
8561 // First, implement comparison not propagating NaN. If no native fmin or fmax
8562 // available, use plain select with setcc instead.
8564 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8565 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8566
8567 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8568 // signed zero behavior.
8569 bool MinMaxMustRespectOrderedZero = false;
8570
8571 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8572 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8573 MinMaxMustRespectOrderedZero = true;
8574 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8575 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8576 } else {
8578 return DAG.UnrollVectorOp(N);
8579
8580 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8581 SDValue Compare =
8582 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8583 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8584 }
8585
8586 // Propagate any NaN of both operands
8587 if (!N->getFlags().hasNoNaNs() &&
8588 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8589 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8591 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8592 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8593 }
8594
8595 // fminimum/fmaximum requires -0.0 less than +0.0
8596 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8598 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8599 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8600 SDValue TestZero =
8601 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8602 SDValue LCmp = DAG.getSelect(
8603 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8604 MinMax, Flags);
8605 SDValue RCmp = DAG.getSelect(
8606 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8607 LCmp, Flags);
8608 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8609 }
8610
8611 return MinMax;
8612}
8613
8615 SelectionDAG &DAG) const {
8616 SDLoc DL(Node);
8617 SDValue LHS = Node->getOperand(0);
8618 SDValue RHS = Node->getOperand(1);
8619 unsigned Opc = Node->getOpcode();
8620 EVT VT = Node->getValueType(0);
8621 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8622 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8623 const TargetOptions &Options = DAG.getTarget().Options;
8624 SDNodeFlags Flags = Node->getFlags();
8625
8626 unsigned NewOp =
8628
8629 if (isOperationLegalOrCustom(NewOp, VT)) {
8630 if (!Flags.hasNoNaNs()) {
8631 // Insert canonicalizes if it's possible we need to quiet to get correct
8632 // sNaN behavior.
8633 if (!DAG.isKnownNeverSNaN(LHS)) {
8634 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8635 }
8636 if (!DAG.isKnownNeverSNaN(RHS)) {
8637 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8638 }
8639 }
8640
8641 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8642 }
8643
8644 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8645 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8646 if (Flags.hasNoNaNs() ||
8647 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8648 unsigned IEEE2019Op =
8650 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8651 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8652 }
8653
8654 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8655 // either one for +0.0 vs -0.0.
8656 if ((Flags.hasNoNaNs() ||
8657 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8658 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8659 DAG.isKnownNeverZeroFloat(RHS))) {
8660 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8661 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8662 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8663 }
8664
8666 return DAG.UnrollVectorOp(Node);
8667
8668 // If only one operand is NaN, override it with another operand.
8669 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8670 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8671 }
8672 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8673 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8674 }
8675
8676 SDValue MinMax =
8677 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8678 // If MinMax is NaN, let's quiet it.
8679 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8680 !DAG.isKnownNeverNaN(RHS)) {
8681 MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8682 }
8683
8684 // Fixup signed zero behavior.
8685 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8687 return MinMax;
8688 }
8689 SDValue TestZero =
8690 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8691 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8692 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8693 SDValue LCmp = DAG.getSelect(
8694 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8695 MinMax, Flags);
8696 SDValue RCmp = DAG.getSelect(
8697 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8698 Flags);
8699 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8700}
8701
8702/// Returns a true value if if this FPClassTest can be performed with an ordered
8703/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8704/// std::nullopt if it cannot be performed as a compare with 0.
8705static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8706 const fltSemantics &Semantics,
8707 const MachineFunction &MF) {
8708 FPClassTest OrderedMask = Test & ~fcNan;
8709 FPClassTest NanTest = Test & fcNan;
8710 bool IsOrdered = NanTest == fcNone;
8711 bool IsUnordered = NanTest == fcNan;
8712
8713 // Skip cases that are testing for only a qnan or snan.
8714 if (!IsOrdered && !IsUnordered)
8715 return std::nullopt;
8716
8717 if (OrderedMask == fcZero &&
8718 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8719 return IsOrdered;
8720 if (OrderedMask == (fcZero | fcSubnormal) &&
8721 MF.getDenormalMode(Semantics).inputsAreZero())
8722 return IsOrdered;
8723 return std::nullopt;
8724}
8725
8727 const FPClassTest OrigTestMask,
8728 SDNodeFlags Flags, const SDLoc &DL,
8729 SelectionDAG &DAG) const {
8730 EVT OperandVT = Op.getValueType();
8731 assert(OperandVT.isFloatingPoint());
8732 FPClassTest Test = OrigTestMask;
8733
8734 // Degenerated cases.
8735 if (Test == fcNone)
8736 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8737 if (Test == fcAllFlags)
8738 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8739
8740 // PPC double double is a pair of doubles, of which the higher part determines
8741 // the value class.
8742 if (OperandVT == MVT::ppcf128) {
8743 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8744 DAG.getConstant(1, DL, MVT::i32));
8745 OperandVT = MVT::f64;
8746 }
8747
8748 // Floating-point type properties.
8749 EVT ScalarFloatVT = OperandVT.getScalarType();
8750 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8751 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8752 bool IsF80 = (ScalarFloatVT == MVT::f80);
8753
8754 // Some checks can be implemented using float comparisons, if floating point
8755 // exceptions are ignored.
8756 if (Flags.hasNoFPExcept() &&
8758 FPClassTest FPTestMask = Test;
8759 bool IsInvertedFP = false;
8760
8761 if (FPClassTest InvertedFPCheck =
8762 invertFPClassTestIfSimpler(FPTestMask, true)) {
8763 FPTestMask = InvertedFPCheck;
8764 IsInvertedFP = true;
8765 }
8766
8767 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8768 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8769
8770 // See if we can fold an | fcNan into an unordered compare.
8771 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8772
8773 // Can't fold the ordered check if we're only testing for snan or qnan
8774 // individually.
8775 if ((FPTestMask & fcNan) != fcNan)
8776 OrderedFPTestMask = FPTestMask;
8777
8778 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8779
8780 if (std::optional<bool> IsCmp0 =
8781 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8782 IsCmp0 && (isCondCodeLegalOrCustom(
8783 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8784 OperandVT.getScalarType().getSimpleVT()))) {
8785
8786 // If denormals could be implicitly treated as 0, this is not equivalent
8787 // to a compare with 0 since it will also be true for denormals.
8788 return DAG.getSetCC(DL, ResultVT, Op,
8789 DAG.getConstantFP(0.0, DL, OperandVT),
8790 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8791 }
8792
8793 if (FPTestMask == fcNan &&
8795 OperandVT.getScalarType().getSimpleVT()))
8796 return DAG.getSetCC(DL, ResultVT, Op, Op,
8797 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8798
8799 bool IsOrderedInf = FPTestMask == fcInf;
8800 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8801 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8802 : UnorderedCmpOpcode,
8803 OperandVT.getScalarType().getSimpleVT()) &&
8806 (OperandVT.isVector() &&
8808 // isinf(x) --> fabs(x) == inf
8809 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8810 SDValue Inf =
8811 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8812 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8813 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8814 }
8815
8816 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8817 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8818 : UnorderedCmpOpcode,
8819 OperandVT.getSimpleVT())) {
8820 // isposinf(x) --> x == inf
8821 // isneginf(x) --> x == -inf
8822 // isposinf(x) || nan --> x u== inf
8823 // isneginf(x) || nan --> x u== -inf
8824
8825 SDValue Inf = DAG.getConstantFP(
8826 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8827 OperandVT);
8828 return DAG.getSetCC(DL, ResultVT, Op, Inf,
8829 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8830 }
8831
8832 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8833 // TODO: Could handle ordered case, but it produces worse code for
8834 // x86. Maybe handle ordered if fabs is free?
8835
8836 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8837 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8838
8839 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8840 OperandVT.getScalarType().getSimpleVT())) {
8841 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8842
8843 // TODO: Maybe only makes sense if fabs is free. Integer test of
8844 // exponent bits seems better for x86.
8845 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8846 SDValue SmallestNormal = DAG.getConstantFP(
8847 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8848 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8849 IsOrdered ? OrderedOp : UnorderedOp);
8850 }
8851 }
8852
8853 if (FPTestMask == fcNormal) {
8854 // TODO: Handle unordered
8855 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8856 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8857
8858 if (isCondCodeLegalOrCustom(IsFiniteOp,
8859 OperandVT.getScalarType().getSimpleVT()) &&
8860 isCondCodeLegalOrCustom(IsNormalOp,
8861 OperandVT.getScalarType().getSimpleVT()) &&
8862 isFAbsFree(OperandVT)) {
8863 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8864 SDValue Inf =
8865 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8866 SDValue SmallestNormal = DAG.getConstantFP(
8867 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8868
8869 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8870 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8871 SDValue IsNormal =
8872 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8873 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8874 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8875 }
8876 }
8877 }
8878
8879 // Some checks may be represented as inversion of simpler check, for example
8880 // "inf|normal|subnormal|zero" => !"nan".
8881 bool IsInverted = false;
8882
8883 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8884 Test = InvertedCheck;
8885 IsInverted = true;
8886 }
8887
8888 // In the general case use integer operations.
8889 unsigned BitSize = OperandVT.getScalarSizeInBits();
8890 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8891 if (OperandVT.isVector())
8892 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8893 OperandVT.getVectorElementCount());
8894 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8895
8896 // Various masks.
8897 APInt SignBit = APInt::getSignMask(BitSize);
8898 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8899 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8900 const unsigned ExplicitIntBitInF80 = 63;
8901 APInt ExpMask = Inf;
8902 if (IsF80)
8903 ExpMask.clearBit(ExplicitIntBitInF80);
8904 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8905 APInt QNaNBitMask =
8906 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8907 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8908
8909 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8910 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8911 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8912 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8913 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8914 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8915
8916 SDValue Res;
8917 const auto appendResult = [&](SDValue PartialRes) {
8918 if (PartialRes) {
8919 if (Res)
8920 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8921 else
8922 Res = PartialRes;
8923 }
8924 };
8925
8926 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8927 const auto getIntBitIsSet = [&]() -> SDValue {
8928 if (!IntBitIsSetV) {
8929 APInt IntBitMask(BitSize, 0);
8930 IntBitMask.setBit(ExplicitIntBitInF80);
8931 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8932 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8933 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8934 }
8935 return IntBitIsSetV;
8936 };
8937
8938 // Split the value into sign bit and absolute value.
8939 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8940 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8941 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8942
8943 // Tests that involve more than one class should be processed first.
8944 SDValue PartialRes;
8945
8946 if (IsF80)
8947 ; // Detect finite numbers of f80 by checking individual classes because
8948 // they have different settings of the explicit integer bit.
8949 else if ((Test & fcFinite) == fcFinite) {
8950 // finite(V) ==> abs(V) < exp_mask
8951 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8952 Test &= ~fcFinite;
8953 } else if ((Test & fcFinite) == fcPosFinite) {
8954 // finite(V) && V > 0 ==> V < exp_mask
8955 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8956 Test &= ~fcPosFinite;
8957 } else if ((Test & fcFinite) == fcNegFinite) {
8958 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8959 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8960 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8961 Test &= ~fcNegFinite;
8962 }
8963 appendResult(PartialRes);
8964
8965 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8966 // fcZero | fcSubnormal => test all exponent bits are 0
8967 // TODO: Handle sign bit specific cases
8968 if (PartialCheck == (fcZero | fcSubnormal)) {
8969 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8970 SDValue ExpIsZero =
8971 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8972 appendResult(ExpIsZero);
8973 Test &= ~PartialCheck & fcAllFlags;
8974 }
8975 }
8976
8977 // Check for individual classes.
8978
8979 if (unsigned PartialCheck = Test & fcZero) {
8980 if (PartialCheck == fcPosZero)
8981 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8982 else if (PartialCheck == fcZero)
8983 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8984 else // ISD::fcNegZero
8985 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8986 appendResult(PartialRes);
8987 }
8988
8989 if (unsigned PartialCheck = Test & fcSubnormal) {
8990 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8991 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8992 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8993 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8994 SDValue VMinusOneV =
8995 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8996 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8997 if (PartialCheck == fcNegSubnormal)
8998 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8999 appendResult(PartialRes);
9000 }
9001
9002 if (unsigned PartialCheck = Test & fcInf) {
9003 if (PartialCheck == fcPosInf)
9004 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9005 else if (PartialCheck == fcInf)
9006 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9007 else { // ISD::fcNegInf
9008 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9009 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9010 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9011 }
9012 appendResult(PartialRes);
9013 }
9014
9015 if (unsigned PartialCheck = Test & fcNan) {
9016 APInt InfWithQnanBit = Inf | QNaNBitMask;
9017 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9018 if (PartialCheck == fcNan) {
9019 // isnan(V) ==> abs(V) > int(inf)
9020 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9021 if (IsF80) {
9022 // Recognize unsupported values as NaNs for compatibility with glibc.
9023 // In them (exp(V)==0) == int_bit.
9024 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9025 SDValue ExpIsZero =
9026 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9027 SDValue IsPseudo =
9028 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9029 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9030 }
9031 } else if (PartialCheck == fcQNan) {
9032 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9033 PartialRes =
9034 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9035 } else { // ISD::fcSNan
9036 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9037 // abs(V) < (unsigned(Inf) | quiet_bit)
9038 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9039 SDValue IsNotQnan =
9040 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9041 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9042 }
9043 appendResult(PartialRes);
9044 }
9045
9046 if (unsigned PartialCheck = Test & fcNormal) {
9047 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9048 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9049 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9050 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9051 APInt ExpLimit = ExpMask - ExpLSB;
9052 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9053 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9054 if (PartialCheck == fcNegNormal)
9055 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9056 else if (PartialCheck == fcPosNormal) {
9057 SDValue PosSignV =
9058 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
9059 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9060 }
9061 if (IsF80)
9062 PartialRes =
9063 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9064 appendResult(PartialRes);
9065 }
9066
9067 if (!Res)
9068 return DAG.getConstant(IsInverted, DL, ResultVT);
9069 if (IsInverted)
9070 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9071 return Res;
9072}
9073
9074// Only expand vector types if we have the appropriate vector bit operations.
9075static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9076 assert(VT.isVector() && "Expected vector type");
9077 unsigned Len = VT.getScalarSizeInBits();
9078 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9081 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9083}
9084
9086 SDLoc dl(Node);
9087 EVT VT = Node->getValueType(0);
9088 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9089 SDValue Op = Node->getOperand(0);
9090 unsigned Len = VT.getScalarSizeInBits();
9091 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9092
9093 // TODO: Add support for irregular type lengths.
9094 if (!(Len <= 128 && Len % 8 == 0))
9095 return SDValue();
9096
9097 // Only expand vector types if we have the appropriate vector bit operations.
9098 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9099 return SDValue();
9100
9101 // This is the "best" algorithm from
9102 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9103 SDValue Mask55 =
9104 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9105 SDValue Mask33 =
9106 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9107 SDValue Mask0F =
9108 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9109
9110 // v = v - ((v >> 1) & 0x55555555...)
9111 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9112 DAG.getNode(ISD::AND, dl, VT,
9113 DAG.getNode(ISD::SRL, dl, VT, Op,
9114 DAG.getConstant(1, dl, ShVT)),
9115 Mask55));
9116 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9117 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9118 DAG.getNode(ISD::AND, dl, VT,
9119 DAG.getNode(ISD::SRL, dl, VT, Op,
9120 DAG.getConstant(2, dl, ShVT)),
9121 Mask33));
9122 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9123 Op = DAG.getNode(ISD::AND, dl, VT,
9124 DAG.getNode(ISD::ADD, dl, VT, Op,
9125 DAG.getNode(ISD::SRL, dl, VT, Op,
9126 DAG.getConstant(4, dl, ShVT))),
9127 Mask0F);
9128
9129 if (Len <= 8)
9130 return Op;
9131
9132 // Avoid the multiply if we only have 2 bytes to add.
9133 // TODO: Only doing this for scalars because vectors weren't as obviously
9134 // improved.
9135 if (Len == 16 && !VT.isVector()) {
9136 // v = (v + (v >> 8)) & 0x00FF;
9137 return DAG.getNode(ISD::AND, dl, VT,
9138 DAG.getNode(ISD::ADD, dl, VT, Op,
9139 DAG.getNode(ISD::SRL, dl, VT, Op,
9140 DAG.getConstant(8, dl, ShVT))),
9141 DAG.getConstant(0xFF, dl, VT));
9142 }
9143
9144 // v = (v * 0x01010101...) >> (Len - 8)
9145 SDValue V;
9148 SDValue Mask01 =
9149 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9150 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9151 } else {
9152 V = Op;
9153 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9154 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9155 V = DAG.getNode(ISD::ADD, dl, VT, V,
9156 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9157 }
9158 }
9159 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9160}
9161
9163 SDLoc dl(Node);
9164 EVT VT = Node->getValueType(0);
9165 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9166 SDValue Op = Node->getOperand(0);
9167 SDValue Mask = Node->getOperand(1);
9168 SDValue VL = Node->getOperand(2);
9169 unsigned Len = VT.getScalarSizeInBits();
9170 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9171
9172 // TODO: Add support for irregular type lengths.
9173 if (!(Len <= 128 && Len % 8 == 0))
9174 return SDValue();
9175
9176 // This is same algorithm of expandCTPOP from
9177 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9178 SDValue Mask55 =
9179 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9180 SDValue Mask33 =
9181 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9182 SDValue Mask0F =
9183 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9184
9185 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9186
9187 // v = v - ((v >> 1) & 0x55555555...)
9188 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9189 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9190 DAG.getConstant(1, dl, ShVT), Mask, VL),
9191 Mask55, Mask, VL);
9192 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9193
9194 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9195 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9196 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9197 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9198 DAG.getConstant(2, dl, ShVT), Mask, VL),
9199 Mask33, Mask, VL);
9200 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9201
9202 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9203 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9204 Mask, VL),
9205 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9206 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9207
9208 if (Len <= 8)
9209 return Op;
9210
9211 // v = (v * 0x01010101...) >> (Len - 8)
9212 SDValue V;
9214 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9215 SDValue Mask01 =
9216 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9217 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9218 } else {
9219 V = Op;
9220 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9221 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9222 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9223 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9224 Mask, VL);
9225 }
9226 }
9227 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9228 Mask, VL);
9229}
9230
9232 SDLoc dl(Node);
9233 EVT VT = Node->getValueType(0);
9234 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9235 SDValue Op = Node->getOperand(0);
9236 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9237
9238 // If the non-ZERO_UNDEF version is supported we can use that instead.
9239 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9241 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9242
9243 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9245 EVT SetCCVT =
9246 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9247 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9248 SDValue Zero = DAG.getConstant(0, dl, VT);
9249 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9250 return DAG.getSelect(dl, VT, SrcIsZero,
9251 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9252 }
9253
9254 // Only expand vector types if we have the appropriate vector bit operations.
9255 // This includes the operations needed to expand CTPOP if it isn't supported.
9256 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9258 !canExpandVectorCTPOP(*this, VT)) ||
9261 return SDValue();
9262
9263 // for now, we do this:
9264 // x = x | (x >> 1);
9265 // x = x | (x >> 2);
9266 // ...
9267 // x = x | (x >>16);
9268 // x = x | (x >>32); // for 64-bit input
9269 // return popcount(~x);
9270 //
9271 // Ref: "Hacker's Delight" by Henry Warren
9272 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9273 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9274 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9275 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9276 }
9277 Op = DAG.getNOT(dl, Op, VT);
9278 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9279}
9280
9282 SDLoc dl(Node);
9283 EVT VT = Node->getValueType(0);
9284 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9285 SDValue Op = Node->getOperand(0);
9286 SDValue Mask = Node->getOperand(1);
9287 SDValue VL = Node->getOperand(2);
9288 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9289
9290 // do this:
9291 // x = x | (x >> 1);
9292 // x = x | (x >> 2);
9293 // ...
9294 // x = x | (x >>16);
9295 // x = x | (x >>32); // for 64-bit input
9296 // return popcount(~x);
9297 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9298 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9299 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9300 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9301 VL);
9302 }
9303 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9304 Mask, VL);
9305 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9306}
9307
9309 const SDLoc &DL, EVT VT, SDValue Op,
9310 unsigned BitWidth) const {
9311 if (BitWidth != 32 && BitWidth != 64)
9312 return SDValue();
9313 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9314 : APInt(64, 0x0218A392CD3D5DBFULL);
9315 const DataLayout &TD = DAG.getDataLayout();
9316 MachinePointerInfo PtrInfo =
9318 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9319 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9320 SDValue Lookup = DAG.getNode(
9321 ISD::SRL, DL, VT,
9322 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9323 DAG.getConstant(DeBruijn, DL, VT)),
9324 DAG.getConstant(ShiftAmt, DL, VT));
9326
9328 for (unsigned i = 0; i < BitWidth; i++) {
9329 APInt Shl = DeBruijn.shl(i);
9330 APInt Lshr = Shl.lshr(ShiftAmt);
9331 Table[Lshr.getZExtValue()] = i;
9332 }
9333
9334 // Create a ConstantArray in Constant Pool
9335 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9336 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9337 TD.getPrefTypeAlign(CA->getType()));
9338 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9339 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9340 PtrInfo, MVT::i8);
9341 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9342 return ExtLoad;
9343
9344 EVT SetCCVT =
9345 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9346 SDValue Zero = DAG.getConstant(0, DL, VT);
9347 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9348 return DAG.getSelect(DL, VT, SrcIsZero,
9349 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9350}
9351
9353 SDLoc dl(Node);
9354 EVT VT = Node->getValueType(0);
9355 SDValue Op = Node->getOperand(0);
9356 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9357
9358 // If the non-ZERO_UNDEF version is supported we can use that instead.
9359 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9361 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9362
9363 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9365 EVT SetCCVT =
9366 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9367 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9368 SDValue Zero = DAG.getConstant(0, dl, VT);
9369 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9370 return DAG.getSelect(dl, VT, SrcIsZero,
9371 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9372 }
9373
9374 // Only expand vector types if we have the appropriate vector bit operations.
9375 // This includes the operations needed to expand CTPOP if it isn't supported.
9376 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9379 !canExpandVectorCTPOP(*this, VT)) ||
9383 return SDValue();
9384
9385 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9386 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9388 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9389 return V;
9390
9391 // for now, we use: { return popcount(~x & (x - 1)); }
9392 // unless the target has ctlz but not ctpop, in which case we use:
9393 // { return 32 - nlz(~x & (x-1)); }
9394 // Ref: "Hacker's Delight" by Henry Warren
9395 SDValue Tmp = DAG.getNode(
9396 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9397 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9398
9399 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9401 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9402 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9403 }
9404
9405 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9406}
9407
9409 SDValue Op = Node->getOperand(0);
9410 SDValue Mask = Node->getOperand(1);
9411 SDValue VL = Node->getOperand(2);
9412 SDLoc dl(Node);
9413 EVT VT = Node->getValueType(0);
9414
9415 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9416 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9417 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9418 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9419 DAG.getConstant(1, dl, VT), Mask, VL);
9420 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9421 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9422}
9423
9425 SelectionDAG &DAG) const {
9426 // %cond = to_bool_vec %source
9427 // %splat = splat /*val=*/VL
9428 // %tz = step_vector
9429 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9430 // %r = vp.reduce.umin %v
9431 SDLoc DL(N);
9432 SDValue Source = N->getOperand(0);
9433 SDValue Mask = N->getOperand(1);
9434 SDValue EVL = N->getOperand(2);
9435 EVT SrcVT = Source.getValueType();
9436 EVT ResVT = N->getValueType(0);
9437 EVT ResVecVT =
9438 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9439
9440 // Convert to boolean vector.
9441 if (SrcVT.getScalarType() != MVT::i1) {
9442 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9443 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9444 SrcVT.getVectorElementCount());
9445 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9446 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9447 }
9448
9449 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9450 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9451 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9452 SDValue Select =
9453 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9454 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9455}
9456
9458 bool IsNegative) const {
9459 SDLoc dl(N);
9460 EVT VT = N->getValueType(0);
9461 SDValue Op = N->getOperand(0);
9462
9463 // abs(x) -> smax(x,sub(0,x))
9464 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9466 SDValue Zero = DAG.getConstant(0, dl, VT);
9467 Op = DAG.getFreeze(Op);
9468 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9469 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9470 }
9471
9472 // abs(x) -> umin(x,sub(0,x))
9473 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9475 SDValue Zero = DAG.getConstant(0, dl, VT);
9476 Op = DAG.getFreeze(Op);
9477 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9478 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9479 }
9480
9481 // 0 - abs(x) -> smin(x, sub(0,x))
9482 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9484 SDValue Zero = DAG.getConstant(0, dl, VT);
9485 Op = DAG.getFreeze(Op);
9486 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9487 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9488 }
9489
9490 // Only expand vector types if we have the appropriate vector operations.
9491 if (VT.isVector() &&
9493 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9494 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9496 return SDValue();
9497
9498 Op = DAG.getFreeze(Op);
9499 SDValue Shift = DAG.getNode(
9500 ISD::SRA, dl, VT, Op,
9501 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9502 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9503
9504 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9505 if (!IsNegative)
9506 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9507
9508 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9509 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9510}
9511
9513 SDLoc dl(N);
9514 EVT VT = N->getValueType(0);
9515 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9516 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9517 bool IsSigned = N->getOpcode() == ISD::ABDS;
9518
9519 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9520 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9521 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9522 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9523 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9524 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9525 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9526 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9527 }
9528
9529 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9530 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9531 return DAG.getNode(ISD::OR, dl, VT,
9532 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9533 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9534
9535 // If the subtract doesn't overflow then just use abs(sub())
9536 // NOTE: don't use frozen operands for value tracking.
9537 bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9538 DAG.SignBitIsZero(N->getOperand(0));
9539
9540 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9541 N->getOperand(1)))
9542 return DAG.getNode(ISD::ABS, dl, VT,
9543 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9544
9545 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9546 N->getOperand(0)))
9547 return DAG.getNode(ISD::ABS, dl, VT,
9548 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9549
9550 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9552 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9553
9554 // Branchless expansion iff cmp result is allbits:
9555 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9556 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9557 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9558 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9559 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9560 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9561 }
9562
9563 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9564 // flag if the (scalar) type is illegal as this is more likely to legalize
9565 // cleanly:
9566 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9567 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9568 SDValue USubO =
9569 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9570 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9571 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9572 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9573 }
9574
9575 // FIXME: Should really try to split the vector in case it's legal on a
9576 // subvector.
9578 return DAG.UnrollVectorOp(N);
9579
9580 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9581 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9582 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9583 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9584}
9585
9587 SDLoc dl(N);
9588 EVT VT = N->getValueType(0);
9589 SDValue LHS = N->getOperand(0);
9590 SDValue RHS = N->getOperand(1);
9591
9592 unsigned Opc = N->getOpcode();
9593 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9594 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9595 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9596 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9597 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9598 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9599 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9600 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9601 "Unknown AVG node");
9602
9603 // If the operands are already extended, we can add+shift.
9604 bool IsExt =
9605 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9606 DAG.ComputeNumSignBits(RHS) >= 2) ||
9607 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9609 if (IsExt) {
9610 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9611 if (!IsFloor)
9612 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9613 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9614 DAG.getShiftAmountConstant(1, VT, dl));
9615 }
9616
9617 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9618 if (VT.isScalarInteger()) {
9619 unsigned BW = VT.getScalarSizeInBits();
9620 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9621 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9622 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9623 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9624 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9625 if (!IsFloor)
9626 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9627 DAG.getConstant(1, dl, ExtVT));
9628 // Just use SRL as we will be truncating away the extended sign bits.
9629 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9630 DAG.getShiftAmountConstant(1, ExtVT, dl));
9631 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9632 }
9633 }
9634
9635 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9636 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9637 SDValue UAddWithOverflow =
9638 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9639
9640 SDValue Sum = UAddWithOverflow.getValue(0);
9641 SDValue Overflow = UAddWithOverflow.getValue(1);
9642
9643 // Right shift the sum by 1
9644 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9645 DAG.getShiftAmountConstant(1, VT, dl));
9646
9647 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9648 SDValue OverflowShl = DAG.getNode(
9649 ISD::SHL, dl, VT, ZeroExtOverflow,
9650 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9651
9652 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9653 }
9654
9655 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9656 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9657 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9658 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9659 LHS = DAG.getFreeze(LHS);
9660 RHS = DAG.getFreeze(RHS);
9661 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9662 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9663 SDValue Shift =
9664 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9665 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9666}
9667
9669 SDLoc dl(N);
9670 EVT VT = N->getValueType(0);
9671 SDValue Op = N->getOperand(0);
9672
9673 if (!VT.isSimple())
9674 return SDValue();
9675
9676 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9677 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9678 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9679 default:
9680 return SDValue();
9681 case MVT::i16:
9682 // Use a rotate by 8. This can be further expanded if necessary.
9683 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9684 case MVT::i32:
9685 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9686 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9687 DAG.getConstant(0xFF00, dl, VT));
9688 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9689 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9690 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9691 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9692 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9693 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9694 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9695 case MVT::i64:
9696 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9697 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9698 DAG.getConstant(255ULL<<8, dl, VT));
9699 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9700 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9701 DAG.getConstant(255ULL<<16, dl, VT));
9702 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9703 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9704 DAG.getConstant(255ULL<<24, dl, VT));
9705 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9706 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9707 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9708 DAG.getConstant(255ULL<<24, dl, VT));
9709 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9710 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9711 DAG.getConstant(255ULL<<16, dl, VT));
9712 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9713 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9714 DAG.getConstant(255ULL<<8, dl, VT));
9715 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9716 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9717 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9718 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9719 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9720 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9721 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9722 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9723 }
9724}
9725
9727 SDLoc dl(N);
9728 EVT VT = N->getValueType(0);
9729 SDValue Op = N->getOperand(0);
9730 SDValue Mask = N->getOperand(1);
9731 SDValue EVL = N->getOperand(2);
9732
9733 if (!VT.isSimple())
9734 return SDValue();
9735
9736 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9737 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9738 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9739 default:
9740 return SDValue();
9741 case MVT::i16:
9742 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9743 Mask, EVL);
9744 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9745 Mask, EVL);
9746 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9747 case MVT::i32:
9748 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9749 Mask, EVL);
9750 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9751 Mask, EVL);
9752 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9753 Mask, EVL);
9754 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9755 Mask, EVL);
9756 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9757 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9758 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9759 Mask, EVL);
9760 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9761 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9762 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9763 case MVT::i64:
9764 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9765 Mask, EVL);
9766 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9767 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9768 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9769 Mask, EVL);
9770 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9771 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9772 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9773 Mask, EVL);
9774 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9775 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9776 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9777 Mask, EVL);
9778 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9779 Mask, EVL);
9780 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9781 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9782 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9783 Mask, EVL);
9784 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9785 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9786 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9787 Mask, EVL);
9788 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9789 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9790 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9791 Mask, EVL);
9792 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9793 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9794 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9795 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9796 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9797 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9798 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9799 }
9800}
9801
9803 SDLoc dl(N);
9804 EVT VT = N->getValueType(0);
9805 SDValue Op = N->getOperand(0);
9806 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9807 unsigned Sz = VT.getScalarSizeInBits();
9808
9809 SDValue Tmp, Tmp2, Tmp3;
9810
9811 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9812 // and finally the i1 pairs.
9813 // TODO: We can easily support i4/i2 legal types if any target ever does.
9814 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9815 // Create the masks - repeating the pattern every byte.
9816 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9817 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9818 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9819
9820 // BSWAP if the type is wider than a single byte.
9821 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9822
9823 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9824 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9825 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9826 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9827 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9828 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9829
9830 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9831 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9832 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9833 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9834 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9835 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9836
9837 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9838 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9839 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9840 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9841 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9842 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9843 return Tmp;
9844 }
9845
9846 Tmp = DAG.getConstant(0, dl, VT);
9847 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9848 if (I < J)
9849 Tmp2 =
9850 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9851 else
9852 Tmp2 =
9853 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9854
9855 APInt Shift = APInt::getOneBitSet(Sz, J);
9856 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9857 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9858 }
9859
9860 return Tmp;
9861}
9862
9864 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9865
9866 SDLoc dl(N);
9867 EVT VT = N->getValueType(0);
9868 SDValue Op = N->getOperand(0);
9869 SDValue Mask = N->getOperand(1);
9870 SDValue EVL = N->getOperand(2);
9871 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9872 unsigned Sz = VT.getScalarSizeInBits();
9873
9874 SDValue Tmp, Tmp2, Tmp3;
9875
9876 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9877 // and finally the i1 pairs.
9878 // TODO: We can easily support i4/i2 legal types if any target ever does.
9879 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9880 // Create the masks - repeating the pattern every byte.
9881 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9882 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9883 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9884
9885 // BSWAP if the type is wider than a single byte.
9886 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9887
9888 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9889 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9890 Mask, EVL);
9891 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9892 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9893 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9894 Mask, EVL);
9895 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9896 Mask, EVL);
9897 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9898
9899 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9900 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9901 Mask, EVL);
9902 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9903 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9904 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9905 Mask, EVL);
9906 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9907 Mask, EVL);
9908 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9909
9910 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9911 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9912 Mask, EVL);
9913 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9914 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9915 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9916 Mask, EVL);
9917 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9918 Mask, EVL);
9919 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9920 return Tmp;
9921 }
9922 return SDValue();
9923}
9924
9925std::pair<SDValue, SDValue>
9927 SelectionDAG &DAG) const {
9928 SDLoc SL(LD);
9929 SDValue Chain = LD->getChain();
9930 SDValue BasePTR = LD->getBasePtr();
9931 EVT SrcVT = LD->getMemoryVT();
9932 EVT DstVT = LD->getValueType(0);
9933 ISD::LoadExtType ExtType = LD->getExtensionType();
9934
9935 if (SrcVT.isScalableVector())
9936 report_fatal_error("Cannot scalarize scalable vector loads");
9937
9938 unsigned NumElem = SrcVT.getVectorNumElements();
9939
9940 EVT SrcEltVT = SrcVT.getScalarType();
9941 EVT DstEltVT = DstVT.getScalarType();
9942
9943 // A vector must always be stored in memory as-is, i.e. without any padding
9944 // between the elements, since various code depend on it, e.g. in the
9945 // handling of a bitcast of a vector type to int, which may be done with a
9946 // vector store followed by an integer load. A vector that does not have
9947 // elements that are byte-sized must therefore be stored as an integer
9948 // built out of the extracted vector elements.
9949 if (!SrcEltVT.isByteSized()) {
9950 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9951 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9952
9953 unsigned NumSrcBits = SrcVT.getSizeInBits();
9954 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9955
9956 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9957 SDValue SrcEltBitMask = DAG.getConstant(
9958 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9959
9960 // Load the whole vector and avoid masking off the top bits as it makes
9961 // the codegen worse.
9962 SDValue Load =
9963 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9964 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9965 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9966
9968 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9969 unsigned ShiftIntoIdx =
9970 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9971 SDValue ShiftAmount = DAG.getShiftAmountConstant(
9972 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9973 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9974 SDValue Elt =
9975 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9976 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9977
9978 if (ExtType != ISD::NON_EXTLOAD) {
9979 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9980 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9981 }
9982
9983 Vals.push_back(Scalar);
9984 }
9985
9986 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9987 return std::make_pair(Value, Load.getValue(1));
9988 }
9989
9990 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9991 assert(SrcEltVT.isByteSized());
9992
9994 SmallVector<SDValue, 8> LoadChains;
9995
9996 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9997 SDValue ScalarLoad =
9998 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9999 LD->getPointerInfo().getWithOffset(Idx * Stride),
10000 SrcEltVT, LD->getOriginalAlign(),
10001 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10002
10003 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10004
10005 Vals.push_back(ScalarLoad.getValue(0));
10006 LoadChains.push_back(ScalarLoad.getValue(1));
10007 }
10008
10009 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10010 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10011
10012 return std::make_pair(Value, NewChain);
10013}
10014
10016 SelectionDAG &DAG) const {
10017 SDLoc SL(ST);
10018
10019 SDValue Chain = ST->getChain();
10020 SDValue BasePtr = ST->getBasePtr();
10021 SDValue Value = ST->getValue();
10022 EVT StVT = ST->getMemoryVT();
10023
10024 if (StVT.isScalableVector())
10025 report_fatal_error("Cannot scalarize scalable vector stores");
10026
10027 // The type of the data we want to save
10028 EVT RegVT = Value.getValueType();
10029 EVT RegSclVT = RegVT.getScalarType();
10030
10031 // The type of data as saved in memory.
10032 EVT MemSclVT = StVT.getScalarType();
10033
10034 unsigned NumElem = StVT.getVectorNumElements();
10035
10036 // A vector must always be stored in memory as-is, i.e. without any padding
10037 // between the elements, since various code depend on it, e.g. in the
10038 // handling of a bitcast of a vector type to int, which may be done with a
10039 // vector store followed by an integer load. A vector that does not have
10040 // elements that are byte-sized must therefore be stored as an integer
10041 // built out of the extracted vector elements.
10042 if (!MemSclVT.isByteSized()) {
10043 unsigned NumBits = StVT.getSizeInBits();
10044 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10045
10046 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10047
10048 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10049 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10050 DAG.getVectorIdxConstant(Idx, SL));
10051 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10052 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10053 unsigned ShiftIntoIdx =
10054 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10055 SDValue ShiftAmount =
10056 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10057 SDValue ShiftedElt =
10058 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10059 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10060 }
10061
10062 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10063 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10064 ST->getAAInfo());
10065 }
10066
10067 // Store Stride in bytes
10068 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10069 assert(Stride && "Zero stride!");
10070 // Extract each of the elements from the original vector and save them into
10071 // memory individually.
10073 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10074 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10075 DAG.getVectorIdxConstant(Idx, SL));
10076
10077 SDValue Ptr =
10078 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10079
10080 // This scalar TruncStore may be illegal, but we legalize it later.
10081 SDValue Store = DAG.getTruncStore(
10082 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10083 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10084 ST->getAAInfo());
10085
10086 Stores.push_back(Store);
10087 }
10088
10089 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10090}
10091
10092std::pair<SDValue, SDValue>
10094 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10095 "unaligned indexed loads not implemented!");
10096 SDValue Chain = LD->getChain();
10097 SDValue Ptr = LD->getBasePtr();
10098 EVT VT = LD->getValueType(0);
10099 EVT LoadedVT = LD->getMemoryVT();
10100 SDLoc dl(LD);
10101 auto &MF = DAG.getMachineFunction();
10102
10103 if (VT.isFloatingPoint() || VT.isVector()) {
10104 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10105 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10106 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10107 LoadedVT.isVector()) {
10108 // Scalarize the load and let the individual components be handled.
10109 return scalarizeVectorLoad(LD, DAG);
10110 }
10111
10112 // Expand to a (misaligned) integer load of the same size,
10113 // then bitconvert to floating point or vector.
10114 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10115 LD->getMemOperand());
10116 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10117 if (LoadedVT != VT)
10118 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10119 ISD::ANY_EXTEND, dl, VT, Result);
10120
10121 return std::make_pair(Result, newLoad.getValue(1));
10122 }
10123
10124 // Copy the value to a (aligned) stack slot using (unaligned) integer
10125 // loads and stores, then do a (aligned) load from the stack slot.
10126 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10127 unsigned LoadedBytes = LoadedVT.getStoreSize();
10128 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10129 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10130
10131 // Make sure the stack slot is also aligned for the register type.
10132 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10133 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10135 SDValue StackPtr = StackBase;
10136 unsigned Offset = 0;
10137
10138 EVT PtrVT = Ptr.getValueType();
10139 EVT StackPtrVT = StackPtr.getValueType();
10140
10141 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10142 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10143
10144 // Do all but one copies using the full register width.
10145 for (unsigned i = 1; i < NumRegs; i++) {
10146 // Load one integer register's worth from the original location.
10147 SDValue Load = DAG.getLoad(
10148 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10149 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10150 LD->getAAInfo());
10151 // Follow the load with a store to the stack slot. Remember the store.
10152 Stores.push_back(DAG.getStore(
10153 Load.getValue(1), dl, Load, StackPtr,
10154 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10155 // Increment the pointers.
10156 Offset += RegBytes;
10157
10158 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10159 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10160 }
10161
10162 // The last copy may be partial. Do an extending load.
10163 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10164 8 * (LoadedBytes - Offset));
10165 SDValue Load =
10166 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10167 LD->getPointerInfo().getWithOffset(Offset), MemVT,
10168 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10169 LD->getAAInfo());
10170 // Follow the load with a store to the stack slot. Remember the store.
10171 // On big-endian machines this requires a truncating store to ensure
10172 // that the bits end up in the right place.
10173 Stores.push_back(DAG.getTruncStore(
10174 Load.getValue(1), dl, Load, StackPtr,
10175 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10176
10177 // The order of the stores doesn't matter - say it with a TokenFactor.
10178 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10179
10180 // Finally, perform the original load only redirected to the stack slot.
10181 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10182 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10183 LoadedVT);
10184
10185 // Callers expect a MERGE_VALUES node.
10186 return std::make_pair(Load, TF);
10187 }
10188
10189 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10190 "Unaligned load of unsupported type.");
10191
10192 // Compute the new VT that is half the size of the old one. This is an
10193 // integer MVT.
10194 unsigned NumBits = LoadedVT.getSizeInBits();
10195 EVT NewLoadedVT;
10196 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10197 NumBits >>= 1;
10198
10199 Align Alignment = LD->getOriginalAlign();
10200 unsigned IncrementSize = NumBits / 8;
10201 ISD::LoadExtType HiExtType = LD->getExtensionType();
10202
10203 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10204 if (HiExtType == ISD::NON_EXTLOAD)
10205 HiExtType = ISD::ZEXTLOAD;
10206
10207 // Load the value in two parts
10208 SDValue Lo, Hi;
10209 if (DAG.getDataLayout().isLittleEndian()) {
10210 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10211 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10212 LD->getAAInfo());
10213
10214 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10215 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10216 LD->getPointerInfo().getWithOffset(IncrementSize),
10217 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10218 LD->getAAInfo());
10219 } else {
10220 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10221 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10222 LD->getAAInfo());
10223
10224 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10225 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10226 LD->getPointerInfo().getWithOffset(IncrementSize),
10227 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10228 LD->getAAInfo());
10229 }
10230
10231 // aggregate the two parts
10232 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10233 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10234 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10235
10236 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10237 Hi.getValue(1));
10238
10239 return std::make_pair(Result, TF);
10240}
10241
10243 SelectionDAG &DAG) const {
10244 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10245 "unaligned indexed stores not implemented!");
10246 SDValue Chain = ST->getChain();
10247 SDValue Ptr = ST->getBasePtr();
10248 SDValue Val = ST->getValue();
10249 EVT VT = Val.getValueType();
10250 Align Alignment = ST->getOriginalAlign();
10251 auto &MF = DAG.getMachineFunction();
10252 EVT StoreMemVT = ST->getMemoryVT();
10253
10254 SDLoc dl(ST);
10255 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10256 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10257 if (isTypeLegal(intVT)) {
10258 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10259 StoreMemVT.isVector()) {
10260 // Scalarize the store and let the individual components be handled.
10261 SDValue Result = scalarizeVectorStore(ST, DAG);
10262 return Result;
10263 }
10264 // Expand to a bitconvert of the value to the integer type of the
10265 // same size, then a (misaligned) int store.
10266 // FIXME: Does not handle truncating floating point stores!
10267 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10268 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10269 Alignment, ST->getMemOperand()->getFlags());
10270 return Result;
10271 }
10272 // Do a (aligned) store to a stack slot, then copy from the stack slot
10273 // to the final destination using (unaligned) integer loads and stores.
10274 MVT RegVT = getRegisterType(
10275 *DAG.getContext(),
10276 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10277 EVT PtrVT = Ptr.getValueType();
10278 unsigned StoredBytes = StoreMemVT.getStoreSize();
10279 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10280 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10281
10282 // Make sure the stack slot is also aligned for the register type.
10283 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10284 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10285
10286 // Perform the original store, only redirected to the stack slot.
10287 SDValue Store = DAG.getTruncStore(
10288 Chain, dl, Val, StackPtr,
10289 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10290
10291 EVT StackPtrVT = StackPtr.getValueType();
10292
10293 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10294 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10296 unsigned Offset = 0;
10297
10298 // Do all but one copies using the full register width.
10299 for (unsigned i = 1; i < NumRegs; i++) {
10300 // Load one integer register's worth from the stack slot.
10301 SDValue Load = DAG.getLoad(
10302 RegVT, dl, Store, StackPtr,
10303 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10304 // Store it to the final location. Remember the store.
10305 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10306 ST->getPointerInfo().getWithOffset(Offset),
10307 ST->getOriginalAlign(),
10308 ST->getMemOperand()->getFlags()));
10309 // Increment the pointers.
10310 Offset += RegBytes;
10311 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10312 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10313 }
10314
10315 // The last store may be partial. Do a truncating store. On big-endian
10316 // machines this requires an extending load from the stack slot to ensure
10317 // that the bits are in the right place.
10318 EVT LoadMemVT =
10319 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10320
10321 // Load from the stack slot.
10322 SDValue Load = DAG.getExtLoad(
10323 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10324 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10325
10326 Stores.push_back(
10327 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10328 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10329 ST->getOriginalAlign(),
10330 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10331 // The order of the stores doesn't matter - say it with a TokenFactor.
10332 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10333 return Result;
10334 }
10335
10336 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10337 "Unaligned store of unknown type.");
10338 // Get the half-size VT
10339 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10340 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10341 unsigned IncrementSize = NumBits / 8;
10342
10343 // Divide the stored value in two parts.
10344 SDValue ShiftAmount =
10345 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10346 SDValue Lo = Val;
10347 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10348 // fold and not use the upper bits. A smaller constant may be easier to
10349 // materialize.
10350 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10351 Lo = DAG.getNode(
10352 ISD::AND, dl, VT, Lo,
10353 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10354 VT));
10355 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10356
10357 // Store the two parts
10358 SDValue Store1, Store2;
10359 Store1 = DAG.getTruncStore(Chain, dl,
10360 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10361 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10362 ST->getMemOperand()->getFlags());
10363
10364 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10365 Store2 = DAG.getTruncStore(
10366 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10367 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10368 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10369
10370 SDValue Result =
10371 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10372 return Result;
10373}
10374
10375SDValue
10377 const SDLoc &DL, EVT DataVT,
10378 SelectionDAG &DAG,
10379 bool IsCompressedMemory) const {
10380 SDValue Increment;
10381 EVT AddrVT = Addr.getValueType();
10382 EVT MaskVT = Mask.getValueType();
10383 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10384 "Incompatible types of Data and Mask");
10385 if (IsCompressedMemory) {
10386 if (DataVT.isScalableVector())
10388 "Cannot currently handle compressed memory with scalable vectors");
10389 // Incrementing the pointer according to number of '1's in the mask.
10390 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10391 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10392 if (MaskIntVT.getSizeInBits() < 32) {
10393 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10394 MaskIntVT = MVT::i32;
10395 }
10396
10397 // Count '1's with POPCNT.
10398 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10399 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10400 // Scale is an element size in bytes.
10401 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10402 AddrVT);
10403 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10404 } else if (DataVT.isScalableVector()) {
10405 Increment = DAG.getVScale(DL, AddrVT,
10406 APInt(AddrVT.getFixedSizeInBits(),
10407 DataVT.getStoreSize().getKnownMinValue()));
10408 } else
10409 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10410
10411 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10412}
10413
10415 EVT VecVT, const SDLoc &dl,
10416 ElementCount SubEC) {
10417 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10418 "Cannot index a scalable vector within a fixed-width vector");
10419
10420 unsigned NElts = VecVT.getVectorMinNumElements();
10421 unsigned NumSubElts = SubEC.getKnownMinValue();
10422 EVT IdxVT = Idx.getValueType();
10423
10424 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10425 // If this is a constant index and we know the value plus the number of the
10426 // elements in the subvector minus one is less than the minimum number of
10427 // elements then it's safe to return Idx.
10428 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10429 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10430 return Idx;
10431 SDValue VS =
10432 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10433 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10434 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10435 DAG.getConstant(NumSubElts, dl, IdxVT));
10436 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10437 }
10438 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10439 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10440 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10441 DAG.getConstant(Imm, dl, IdxVT));
10442 }
10443 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10444 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10445 DAG.getConstant(MaxIndex, dl, IdxVT));
10446}
10447
10449 SDValue VecPtr, EVT VecVT,
10450 SDValue Index) const {
10451 return getVectorSubVecPointer(
10452 DAG, VecPtr, VecVT,
10454 Index);
10455}
10456
10458 SDValue VecPtr, EVT VecVT,
10459 EVT SubVecVT,
10460 SDValue Index) const {
10461 SDLoc dl(Index);
10462 // Make sure the index type is big enough to compute in.
10463 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10464
10465 EVT EltVT = VecVT.getVectorElementType();
10466
10467 // Calculate the element offset and add it to the pointer.
10468 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10469 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10470 "Converting bits to bytes lost precision");
10471 assert(SubVecVT.getVectorElementType() == EltVT &&
10472 "Sub-vector must be a vector with matching element type");
10473 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10474 SubVecVT.getVectorElementCount());
10475
10476 EVT IdxVT = Index.getValueType();
10477 if (SubVecVT.isScalableVector())
10478 Index =
10479 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10480 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10481
10482 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10483 DAG.getConstant(EltSize, dl, IdxVT));
10484 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10485}
10486
10487//===----------------------------------------------------------------------===//
10488// Implementation of Emulated TLS Model
10489//===----------------------------------------------------------------------===//
10490
10492 SelectionDAG &DAG) const {
10493 // Access to address of TLS varialbe xyz is lowered to a function call:
10494 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10495 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10496 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10497 SDLoc dl(GA);
10498
10499 ArgListTy Args;
10500 ArgListEntry Entry;
10501 const GlobalValue *GV =
10502 cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10503 SmallString<32> NameString("__emutls_v.");
10504 NameString += GV->getName();
10505 StringRef EmuTlsVarName(NameString);
10506 const GlobalVariable *EmuTlsVar =
10507 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10508 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10509 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10510 Entry.Ty = VoidPtrType;
10511 Args.push_back(Entry);
10512
10513 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10514
10516 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10517 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10518 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10519
10520 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10521 // At last for X86 targets, maybe good for other targets too?
10523 MFI.setAdjustsStack(true); // Is this only for X86 target?
10524 MFI.setHasCalls(true);
10525
10526 assert((GA->getOffset() == 0) &&
10527 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10528 return CallResult.first;
10529}
10530
10532 SelectionDAG &DAG) const {
10533 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10534 if (!isCtlzFast())
10535 return SDValue();
10536 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10537 SDLoc dl(Op);
10538 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10539 EVT VT = Op.getOperand(0).getValueType();
10540 SDValue Zext = Op.getOperand(0);
10541 if (VT.bitsLT(MVT::i32)) {
10542 VT = MVT::i32;
10543 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10544 }
10545 unsigned Log2b = Log2_32(VT.getSizeInBits());
10546 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10547 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10548 DAG.getConstant(Log2b, dl, MVT::i32));
10549 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10550 }
10551 return SDValue();
10552}
10553
10555 SDValue Op0 = Node->getOperand(0);
10556 SDValue Op1 = Node->getOperand(1);
10557 EVT VT = Op0.getValueType();
10558 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10559 unsigned Opcode = Node->getOpcode();
10560 SDLoc DL(Node);
10561
10562 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10563 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10565 Op0 = DAG.getFreeze(Op0);
10566 SDValue Zero = DAG.getConstant(0, DL, VT);
10567 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10568 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10569 }
10570
10571 // umin(x,y) -> sub(x,usubsat(x,y))
10572 // TODO: Missing freeze(Op0)?
10573 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10575 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10576 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10577 }
10578
10579 // umax(x,y) -> add(x,usubsat(y,x))
10580 // TODO: Missing freeze(Op0)?
10581 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10583 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10584 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10585 }
10586
10587 // FIXME: Should really try to split the vector in case it's legal on a
10588 // subvector.
10590 return DAG.UnrollVectorOp(Node);
10591
10592 // Attempt to find an existing SETCC node that we can reuse.
10593 // TODO: Do we need a generic doesSETCCNodeExist?
10594 // TODO: Missing freeze(Op0)/freeze(Op1)?
10595 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10596 ISD::CondCode PrefCommuteCC,
10597 ISD::CondCode AltCommuteCC) {
10598 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10599 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10600 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10601 {Op0, Op1, DAG.getCondCode(CC)})) {
10602 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10603 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10604 }
10605 }
10606 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10607 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10608 {Op0, Op1, DAG.getCondCode(CC)})) {
10609 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10610 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10611 }
10612 }
10613 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10614 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10615 };
10616
10617 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10618 // -> Y = (A < B) ? B : A
10619 // -> Y = (A >= B) ? A : B
10620 // -> Y = (A <= B) ? B : A
10621 switch (Opcode) {
10622 case ISD::SMAX:
10623 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10624 case ISD::SMIN:
10625 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10626 case ISD::UMAX:
10627 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10628 case ISD::UMIN:
10629 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10630 }
10631
10632 llvm_unreachable("How did we get here?");
10633}
10634
10636 unsigned Opcode = Node->getOpcode();
10637 SDValue LHS = Node->getOperand(0);
10638 SDValue RHS = Node->getOperand(1);
10639 EVT VT = LHS.getValueType();
10640 SDLoc dl(Node);
10641
10642 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10643 assert(VT.isInteger() && "Expected operands to be integers");
10644
10645 // usub.sat(a, b) -> umax(a, b) - b
10646 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10647 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10648 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10649 }
10650
10651 // uadd.sat(a, b) -> umin(a, ~b) + b
10652 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10653 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10654 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10655 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10656 }
10657
10658 unsigned OverflowOp;
10659 switch (Opcode) {
10660 case ISD::SADDSAT:
10661 OverflowOp = ISD::SADDO;
10662 break;
10663 case ISD::UADDSAT:
10664 OverflowOp = ISD::UADDO;
10665 break;
10666 case ISD::SSUBSAT:
10667 OverflowOp = ISD::SSUBO;
10668 break;
10669 case ISD::USUBSAT:
10670 OverflowOp = ISD::USUBO;
10671 break;
10672 default:
10673 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10674 "addition or subtraction node.");
10675 }
10676
10677 // FIXME: Should really try to split the vector in case it's legal on a
10678 // subvector.
10680 return DAG.UnrollVectorOp(Node);
10681
10682 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10683 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10684 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10685 SDValue SumDiff = Result.getValue(0);
10686 SDValue Overflow = Result.getValue(1);
10687 SDValue Zero = DAG.getConstant(0, dl, VT);
10688 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10689
10690 if (Opcode == ISD::UADDSAT) {
10692 // (LHS + RHS) | OverflowMask
10693 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10694 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10695 }
10696 // Overflow ? 0xffff.... : (LHS + RHS)
10697 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10698 }
10699
10700 if (Opcode == ISD::USUBSAT) {
10702 // (LHS - RHS) & ~OverflowMask
10703 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10704 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10705 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10706 }
10707 // Overflow ? 0 : (LHS - RHS)
10708 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10709 }
10710
10711 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10714
10715 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10716 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10717
10718 // If either of the operand signs are known, then they are guaranteed to
10719 // only saturate in one direction. If non-negative they will saturate
10720 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10721 //
10722 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10723 // sign of 'y' has to be flipped.
10724
10725 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10726 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10727 : KnownRHS.isNegative();
10728 if (LHSIsNonNegative || RHSIsNonNegative) {
10729 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10730 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10731 }
10732
10733 bool LHSIsNegative = KnownLHS.isNegative();
10734 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10735 : KnownRHS.isNonNegative();
10736 if (LHSIsNegative || RHSIsNegative) {
10737 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10738 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10739 }
10740 }
10741
10742 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10744 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10745 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10746 DAG.getConstant(BitWidth - 1, dl, VT));
10747 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10748 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10749}
10750
10752 unsigned Opcode = Node->getOpcode();
10753 SDValue LHS = Node->getOperand(0);
10754 SDValue RHS = Node->getOperand(1);
10755 EVT VT = LHS.getValueType();
10756 EVT ResVT = Node->getValueType(0);
10757 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10758 SDLoc dl(Node);
10759
10760 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10761 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10762 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10763 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10764
10765 // We can't perform arithmetic on i1 values. Extending them would
10766 // probably result in worse codegen, so let's just use two selects instead.
10767 // Some targets are also just better off using selects rather than subtraction
10768 // because one of the conditions can be merged with one of the selects.
10769 // And finally, if we don't know the contents of high bits of a boolean value
10770 // we can't perform any arithmetic either.
10771 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10773 SDValue SelectZeroOrOne =
10774 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10775 DAG.getConstant(0, dl, ResVT));
10776 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10777 SelectZeroOrOne);
10778 }
10779
10781 std::swap(IsGT, IsLT);
10782 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10783 ResVT);
10784}
10785
10787 unsigned Opcode = Node->getOpcode();
10788 bool IsSigned = Opcode == ISD::SSHLSAT;
10789 SDValue LHS = Node->getOperand(0);
10790 SDValue RHS = Node->getOperand(1);
10791 EVT VT = LHS.getValueType();
10792 SDLoc dl(Node);
10793
10794 assert((Node->getOpcode() == ISD::SSHLSAT ||
10795 Node->getOpcode() == ISD::USHLSAT) &&
10796 "Expected a SHLSAT opcode");
10797 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10798 assert(VT.isInteger() && "Expected operands to be integers");
10799
10801 return DAG.UnrollVectorOp(Node);
10802
10803 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10804
10805 unsigned BW = VT.getScalarSizeInBits();
10806 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10807 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10808 SDValue Orig =
10809 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10810
10811 SDValue SatVal;
10812 if (IsSigned) {
10813 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10814 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10815 SDValue Cond =
10816 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10817 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10818 } else {
10819 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10820 }
10821 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10822 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10823}
10824
10826 bool Signed, EVT WideVT,
10827 const SDValue LL, const SDValue LH,
10828 const SDValue RL, const SDValue RH,
10829 SDValue &Lo, SDValue &Hi) const {
10830 // We can fall back to a libcall with an illegal type for the MUL if we
10831 // have a libcall big enough.
10832 // Also, we can fall back to a division in some cases, but that's a big
10833 // performance hit in the general case.
10834 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10835 if (WideVT == MVT::i16)
10836 LC = RTLIB::MUL_I16;
10837 else if (WideVT == MVT::i32)
10838 LC = RTLIB::MUL_I32;
10839 else if (WideVT == MVT::i64)
10840 LC = RTLIB::MUL_I64;
10841 else if (WideVT == MVT::i128)
10842 LC = RTLIB::MUL_I128;
10843
10844 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10845 // We'll expand the multiplication by brute force because we have no other
10846 // options. This is a trivially-generalized version of the code from
10847 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10848 // 4.3.1).
10849 EVT VT = LL.getValueType();
10850 unsigned Bits = VT.getSizeInBits();
10851 unsigned HalfBits = Bits >> 1;
10852 SDValue Mask =
10853 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10854 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10855 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10856
10857 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10858 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10859
10860 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10861 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10862 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10863 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10864
10865 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10866 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10867 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10868 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10869
10870 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10871 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10872 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10873
10874 SDValue W =
10875 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10876 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10877 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10878 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10879
10880 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10881 DAG.getNode(ISD::ADD, dl, VT,
10882 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10883 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10884 } else {
10885 // Attempt a libcall.
10886 SDValue Ret;
10888 CallOptions.setIsSigned(Signed);
10889 CallOptions.setIsPostTypeLegalization(true);
10890 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10891 // Halves of WideVT are packed into registers in different order
10892 // depending on platform endianness. This is usually handled by
10893 // the C calling convention, but we can't defer to it in
10894 // the legalizer.
10895 SDValue Args[] = {LL, LH, RL, RH};
10896 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10897 } else {
10898 SDValue Args[] = {LH, LL, RH, RL};
10899 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10900 }
10901 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10902 "Ret value is a collection of constituent nodes holding result.");
10903 if (DAG.getDataLayout().isLittleEndian()) {
10904 // Same as above.
10905 Lo = Ret.getOperand(0);
10906 Hi = Ret.getOperand(1);
10907 } else {
10908 Lo = Ret.getOperand(1);
10909 Hi = Ret.getOperand(0);
10910 }
10911 }
10912}
10913
10915 bool Signed, const SDValue LHS,
10916 const SDValue RHS, SDValue &Lo,
10917 SDValue &Hi) const {
10918 EVT VT = LHS.getValueType();
10919 assert(RHS.getValueType() == VT && "Mismatching operand types");
10920
10921 SDValue HiLHS;
10922 SDValue HiRHS;
10923 if (Signed) {
10924 // The high part is obtained by SRA'ing all but one of the bits of low
10925 // part.
10926 unsigned LoSize = VT.getFixedSizeInBits();
10927 HiLHS = DAG.getNode(
10928 ISD::SRA, dl, VT, LHS,
10929 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10930 HiRHS = DAG.getNode(
10931 ISD::SRA, dl, VT, RHS,
10932 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10933 } else {
10934 HiLHS = DAG.getConstant(0, dl, VT);
10935 HiRHS = DAG.getConstant(0, dl, VT);
10936 }
10937 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10938 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10939}
10940
10941SDValue
10943 assert((Node->getOpcode() == ISD::SMULFIX ||
10944 Node->getOpcode() == ISD::UMULFIX ||
10945 Node->getOpcode() == ISD::SMULFIXSAT ||
10946 Node->getOpcode() == ISD::UMULFIXSAT) &&
10947 "Expected a fixed point multiplication opcode");
10948
10949 SDLoc dl(Node);
10950 SDValue LHS = Node->getOperand(0);
10951 SDValue RHS = Node->getOperand(1);
10952 EVT VT = LHS.getValueType();
10953 unsigned Scale = Node->getConstantOperandVal(2);
10954 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10955 Node->getOpcode() == ISD::UMULFIXSAT);
10956 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10957 Node->getOpcode() == ISD::SMULFIXSAT);
10958 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10959 unsigned VTSize = VT.getScalarSizeInBits();
10960
10961 if (!Scale) {
10962 // [us]mul.fix(a, b, 0) -> mul(a, b)
10963 if (!Saturating) {
10965 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10966 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10967 SDValue Result =
10968 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10969 SDValue Product = Result.getValue(0);
10970 SDValue Overflow = Result.getValue(1);
10971 SDValue Zero = DAG.getConstant(0, dl, VT);
10972
10973 APInt MinVal = APInt::getSignedMinValue(VTSize);
10974 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10975 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10976 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10977 // Xor the inputs, if resulting sign bit is 0 the product will be
10978 // positive, else negative.
10979 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10980 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10981 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10982 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10983 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10984 SDValue Result =
10985 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10986 SDValue Product = Result.getValue(0);
10987 SDValue Overflow = Result.getValue(1);
10988
10989 APInt MaxVal = APInt::getMaxValue(VTSize);
10990 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10991 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10992 }
10993 }
10994
10995 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10996 "Expected scale to be less than the number of bits if signed or at "
10997 "most the number of bits if unsigned.");
10998 assert(LHS.getValueType() == RHS.getValueType() &&
10999 "Expected both operands to be the same type");
11000
11001 // Get the upper and lower bits of the result.
11002 SDValue Lo, Hi;
11003 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11004 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11005 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11006 if (VT.isVector())
11007 WideVT =
11009 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11010 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11011 Lo = Result.getValue(0);
11012 Hi = Result.getValue(1);
11013 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11014 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11015 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11016 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11017 // Try for a multiplication using a wider type.
11018 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11019 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11020 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11021 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11022 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11023 SDValue Shifted =
11024 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11025 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11026 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11027 } else if (VT.isVector()) {
11028 return SDValue();
11029 } else {
11030 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11031 }
11032
11033 if (Scale == VTSize)
11034 // Result is just the top half since we'd be shifting by the width of the
11035 // operand. Overflow impossible so this works for both UMULFIX and
11036 // UMULFIXSAT.
11037 return Hi;
11038
11039 // The result will need to be shifted right by the scale since both operands
11040 // are scaled. The result is given to us in 2 halves, so we only want part of
11041 // both in the result.
11042 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11043 DAG.getShiftAmountConstant(Scale, VT, dl));
11044 if (!Saturating)
11045 return Result;
11046
11047 if (!Signed) {
11048 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11049 // widened multiplication) aren't all zeroes.
11050
11051 // Saturate to max if ((Hi >> Scale) != 0),
11052 // which is the same as if (Hi > ((1 << Scale) - 1))
11053 APInt MaxVal = APInt::getMaxValue(VTSize);
11054 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11055 dl, VT);
11056 Result = DAG.getSelectCC(dl, Hi, LowMask,
11057 DAG.getConstant(MaxVal, dl, VT), Result,
11058 ISD::SETUGT);
11059
11060 return Result;
11061 }
11062
11063 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11064 // widened multiplication) aren't all ones or all zeroes.
11065
11066 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11067 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11068
11069 if (Scale == 0) {
11070 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11071 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11072 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11073 // Saturated to SatMin if wide product is negative, and SatMax if wide
11074 // product is positive ...
11075 SDValue Zero = DAG.getConstant(0, dl, VT);
11076 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11077 ISD::SETLT);
11078 // ... but only if we overflowed.
11079 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11080 }
11081
11082 // We handled Scale==0 above so all the bits to examine is in Hi.
11083
11084 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11085 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11086 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11087 dl, VT);
11088 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11089 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11090 // which is the same as if (HI < (-1 << (Scale - 1))
11091 SDValue HighMask =
11092 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11093 dl, VT);
11094 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11095 return Result;
11096}
11097
11098SDValue
11100 SDValue LHS, SDValue RHS,
11101 unsigned Scale, SelectionDAG &DAG) const {
11102 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11103 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11104 "Expected a fixed point division opcode");
11105
11106 EVT VT = LHS.getValueType();
11107 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11108 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11109 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11110
11111 // If there is enough room in the type to upscale the LHS or downscale the
11112 // RHS before the division, we can perform it in this type without having to
11113 // resize. For signed operations, the LHS headroom is the number of
11114 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11115 // The headroom for the RHS is the number of trailing zeroes.
11116 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11118 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11119
11120 // For signed saturating operations, we need to be able to detect true integer
11121 // division overflow; that is, when you have MIN / -EPS. However, this
11122 // is undefined behavior and if we emit divisions that could take such
11123 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11124 // example).
11125 // Avoid this by requiring an extra bit so that we never get this case.
11126 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11127 // signed saturating division, we need to emit a whopping 32-bit division.
11128 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11129 return SDValue();
11130
11131 unsigned LHSShift = std::min(LHSLead, Scale);
11132 unsigned RHSShift = Scale - LHSShift;
11133
11134 // At this point, we know that if we shift the LHS up by LHSShift and the
11135 // RHS down by RHSShift, we can emit a regular division with a final scaling
11136 // factor of Scale.
11137
11138 if (LHSShift)
11139 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11140 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11141 if (RHSShift)
11142 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11143 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11144
11145 SDValue Quot;
11146 if (Signed) {
11147 // For signed operations, if the resulting quotient is negative and the
11148 // remainder is nonzero, subtract 1 from the quotient to round towards
11149 // negative infinity.
11150 SDValue Rem;
11151 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11152 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11153 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11154 if (isTypeLegal(VT) &&
11156 Quot = DAG.getNode(ISD::SDIVREM, dl,
11157 DAG.getVTList(VT, VT),
11158 LHS, RHS);
11159 Rem = Quot.getValue(1);
11160 Quot = Quot.getValue(0);
11161 } else {
11162 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11163 LHS, RHS);
11164 Rem = DAG.getNode(ISD::SREM, dl, VT,
11165 LHS, RHS);
11166 }
11167 SDValue Zero = DAG.getConstant(0, dl, VT);
11168 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11169 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11170 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11171 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11172 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11173 DAG.getConstant(1, dl, VT));
11174 Quot = DAG.getSelect(dl, VT,
11175 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11176 Sub1, Quot);
11177 } else
11178 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11179 LHS, RHS);
11180
11181 return Quot;
11182}
11183
11185 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11186 SDLoc dl(Node);
11187 SDValue LHS = Node->getOperand(0);
11188 SDValue RHS = Node->getOperand(1);
11189 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11190
11191 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11192 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11193 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11194 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11195 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11196 { LHS, RHS, CarryIn });
11197 Result = SDValue(NodeCarry.getNode(), 0);
11198 Overflow = SDValue(NodeCarry.getNode(), 1);
11199 return;
11200 }
11201
11202 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11203 LHS.getValueType(), LHS, RHS);
11204
11205 EVT ResultType = Node->getValueType(1);
11206 EVT SetCCType = getSetCCResultType(
11207 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11208 SDValue SetCC;
11209 if (IsAdd && isOneConstant(RHS)) {
11210 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11211 // the live range of X. We assume comparing with 0 is cheap.
11212 // The general case (X + C) < C is not necessarily beneficial. Although we
11213 // reduce the live range of X, we may introduce the materialization of
11214 // constant C.
11215 SetCC =
11216 DAG.getSetCC(dl, SetCCType, Result,
11217 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11218 } else if (IsAdd && isAllOnesConstant(RHS)) {
11219 // Special case: uaddo X, -1 overflows if X != 0.
11220 SetCC =
11221 DAG.getSetCC(dl, SetCCType, LHS,
11222 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11223 } else {
11225 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11226 }
11227 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11228}
11229
11231 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11232 SDLoc dl(Node);
11233 SDValue LHS = Node->getOperand(0);
11234 SDValue RHS = Node->getOperand(1);
11235 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11236
11237 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11238 LHS.getValueType(), LHS, RHS);
11239
11240 EVT ResultType = Node->getValueType(1);
11241 EVT OType = getSetCCResultType(
11242 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11243
11244 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11245 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11246 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11247 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11248 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11249 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11250 return;
11251 }
11252
11253 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11254
11255 // For an addition, the result should be less than one of the operands (LHS)
11256 // if and only if the other operand (RHS) is negative, otherwise there will
11257 // be overflow.
11258 // For a subtraction, the result should be less than one of the operands
11259 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11260 // otherwise there will be overflow.
11261 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11262 SDValue ConditionRHS =
11263 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11264
11265 Overflow = DAG.getBoolExtOrTrunc(
11266 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11267 ResultType, ResultType);
11268}
11269
11271 SDValue &Overflow, SelectionDAG &DAG) const {
11272 SDLoc dl(Node);
11273 EVT VT = Node->getValueType(0);
11274 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11275 SDValue LHS = Node->getOperand(0);
11276 SDValue RHS = Node->getOperand(1);
11277 bool isSigned = Node->getOpcode() == ISD::SMULO;
11278
11279 // For power-of-two multiplications we can use a simpler shift expansion.
11280 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11281 const APInt &C = RHSC->getAPIntValue();
11282 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11283 if (C.isPowerOf2()) {
11284 // smulo(x, signed_min) is same as umulo(x, signed_min).
11285 bool UseArithShift = isSigned && !C.isMinSignedValue();
11286 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11287 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11288 Overflow = DAG.getSetCC(dl, SetCCVT,
11289 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11290 dl, VT, Result, ShiftAmt),
11291 LHS, ISD::SETNE);
11292 return true;
11293 }
11294 }
11295
11296 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11297 if (VT.isVector())
11298 WideVT =
11300
11301 SDValue BottomHalf;
11302 SDValue TopHalf;
11303 static const unsigned Ops[2][3] =
11306 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11307 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11308 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11309 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11310 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11311 RHS);
11312 TopHalf = BottomHalf.getValue(1);
11313 } else if (isTypeLegal(WideVT)) {
11314 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11315 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11316 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11317 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11318 SDValue ShiftAmt =
11319 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11320 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11321 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11322 } else {
11323 if (VT.isVector())
11324 return false;
11325
11326 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11327 }
11328
11329 Result = BottomHalf;
11330 if (isSigned) {
11331 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11332 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11333 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11334 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11335 } else {
11336 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11337 DAG.getConstant(0, dl, VT), ISD::SETNE);
11338 }
11339
11340 // Truncate the result if SetCC returns a larger type than needed.
11341 EVT RType = Node->getValueType(1);
11342 if (RType.bitsLT(Overflow.getValueType()))
11343 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11344
11345 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11346 "Unexpected result type for S/UMULO legalization");
11347 return true;
11348}
11349
11351 SDLoc dl(Node);
11352 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11353 SDValue Op = Node->getOperand(0);
11354 EVT VT = Op.getValueType();
11355
11356 if (VT.isScalableVector())
11358 "Expanding reductions for scalable vectors is undefined.");
11359
11360 // Try to use a shuffle reduction for power of two vectors.
11361 if (VT.isPow2VectorType()) {
11362 while (VT.getVectorNumElements() > 1) {
11363 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11364 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11365 break;
11366
11367 SDValue Lo, Hi;
11368 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11369 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11370 VT = HalfVT;
11371 }
11372 }
11373
11374 EVT EltVT = VT.getVectorElementType();
11375 unsigned NumElts = VT.getVectorNumElements();
11376
11378 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11379
11380 SDValue Res = Ops[0];
11381 for (unsigned i = 1; i < NumElts; i++)
11382 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11383
11384 // Result type may be wider than element type.
11385 if (EltVT != Node->getValueType(0))
11386 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11387 return Res;
11388}
11389
11391 SDLoc dl(Node);
11392 SDValue AccOp = Node->getOperand(0);
11393 SDValue VecOp = Node->getOperand(1);
11394 SDNodeFlags Flags = Node->getFlags();
11395
11396 EVT VT = VecOp.getValueType();
11397 EVT EltVT = VT.getVectorElementType();
11398
11399 if (VT.isScalableVector())
11401 "Expanding reductions for scalable vectors is undefined.");
11402
11403 unsigned NumElts = VT.getVectorNumElements();
11404
11406 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11407
11408 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11409
11410 SDValue Res = AccOp;
11411 for (unsigned i = 0; i < NumElts; i++)
11412 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11413
11414 return Res;
11415}
11416
11418 SelectionDAG &DAG) const {
11419 EVT VT = Node->getValueType(0);
11420 SDLoc dl(Node);
11421 bool isSigned = Node->getOpcode() == ISD::SREM;
11422 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11423 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11424 SDValue Dividend = Node->getOperand(0);
11425 SDValue Divisor = Node->getOperand(1);
11426 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11427 SDVTList VTs = DAG.getVTList(VT, VT);
11428 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11429 return true;
11430 }
11431 if (isOperationLegalOrCustom(DivOpc, VT)) {
11432 // X % Y -> X-X/Y*Y
11433 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11434 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11435 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11436 return true;
11437 }
11438 return false;
11439}
11440
11442 SelectionDAG &DAG) const {
11443 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11444 SDLoc dl(SDValue(Node, 0));
11445 SDValue Src = Node->getOperand(0);
11446
11447 // DstVT is the result type, while SatVT is the size to which we saturate
11448 EVT SrcVT = Src.getValueType();
11449 EVT DstVT = Node->getValueType(0);
11450
11451 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11452 unsigned SatWidth = SatVT.getScalarSizeInBits();
11453 unsigned DstWidth = DstVT.getScalarSizeInBits();
11454 assert(SatWidth <= DstWidth &&
11455 "Expected saturation width smaller than result width");
11456
11457 // Determine minimum and maximum integer values and their corresponding
11458 // floating-point values.
11459 APInt MinInt, MaxInt;
11460 if (IsSigned) {
11461 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11462 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11463 } else {
11464 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11465 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11466 }
11467
11468 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11469 // libcall emission cannot handle this. Large result types will fail.
11470 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11471 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11472 SrcVT = Src.getValueType();
11473 }
11474
11475 const fltSemantics &Sem = SrcVT.getFltSemantics();
11476 APFloat MinFloat(Sem);
11477 APFloat MaxFloat(Sem);
11478
11479 APFloat::opStatus MinStatus =
11480 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11481 APFloat::opStatus MaxStatus =
11482 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11483 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11484 !(MaxStatus & APFloat::opStatus::opInexact);
11485
11486 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11487 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11488
11489 // If the integer bounds are exactly representable as floats and min/max are
11490 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11491 // of comparisons and selects.
11492 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11494 if (AreExactFloatBounds && MinMaxLegal) {
11495 SDValue Clamped = Src;
11496
11497 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11498 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11499 // Clamp by MaxFloat from above. NaN cannot occur.
11500 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11501 // Convert clamped value to integer.
11502 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11503 dl, DstVT, Clamped);
11504
11505 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11506 // which will cast to zero.
11507 if (!IsSigned)
11508 return FpToInt;
11509
11510 // Otherwise, select 0 if Src is NaN.
11511 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11512 EVT SetCCVT =
11513 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11514 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11515 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11516 }
11517
11518 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11519 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11520
11521 // Result of direct conversion. The assumption here is that the operation is
11522 // non-trapping and it's fine to apply it to an out-of-range value if we
11523 // select it away later.
11524 SDValue FpToInt =
11525 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11526
11527 SDValue Select = FpToInt;
11528
11529 EVT SetCCVT =
11530 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11531
11532 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11533 // MinInt if Src is NaN.
11534 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11535 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11536 // If Src OGT MaxFloat, select MaxInt.
11537 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11538 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11539
11540 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11541 // is already zero.
11542 if (!IsSigned)
11543 return Select;
11544
11545 // Otherwise, select 0 if Src is NaN.
11546 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11547 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11548 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11549}
11550
11552 const SDLoc &dl,
11553 SelectionDAG &DAG) const {
11554 EVT OperandVT = Op.getValueType();
11555 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11556 return Op;
11557 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11558 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11559 // can induce double-rounding which may alter the results. We can
11560 // correct for this using a trick explained in: Boldo, Sylvie, and
11561 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11562 // World Congress. 2005.
11563 unsigned BitSize = OperandVT.getScalarSizeInBits();
11564 EVT WideIntVT = OperandVT.changeTypeToInteger();
11565 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11566 SDValue SignBit =
11567 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11568 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11569 SDValue AbsWide;
11570 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11571 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11572 } else {
11573 SDValue ClearedSign = DAG.getNode(
11574 ISD::AND, dl, WideIntVT, OpAsInt,
11575 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11576 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11577 }
11578 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11579 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11580
11581 // We can keep the narrow value as-is if narrowing was exact (no
11582 // rounding error), the wide value was NaN (the narrow value is also
11583 // NaN and should be preserved) or if we rounded to the odd value.
11584 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11585 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11586 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11587 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11588 EVT ResultIntVTCCVT = getSetCCResultType(
11589 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11590 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11591 // The result is already odd so we don't need to do anything.
11592 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11593
11594 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11595 AbsWide.getValueType());
11596 // We keep results which are exact, odd or NaN.
11597 SDValue KeepNarrow =
11598 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11599 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11600 // We morally performed a round-down if AbsNarrow is smaller than
11601 // AbsWide.
11602 SDValue NarrowIsRd =
11603 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11604 // If the narrow value is odd or exact, pick it.
11605 // Otherwise, narrow is even and corresponds to either the rounded-up
11606 // or rounded-down value. If narrow is the rounded-down value, we want
11607 // the rounded-up value as it will be odd.
11608 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11609 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11610 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11611 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11612 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11613 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11614 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11615 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11616 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11617}
11618
11620 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11621 SDValue Op = Node->getOperand(0);
11622 EVT VT = Node->getValueType(0);
11623 SDLoc dl(Node);
11624 if (VT.getScalarType() == MVT::bf16) {
11625 if (Node->getConstantOperandVal(1) == 1) {
11626 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11627 }
11628 EVT OperandVT = Op.getValueType();
11629 SDValue IsNaN = DAG.getSetCC(
11630 dl,
11631 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11632 Op, Op, ISD::SETUO);
11633
11634 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11635 // can induce double-rounding which may alter the results. We can
11636 // correct for this using a trick explained in: Boldo, Sylvie, and
11637 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11638 // World Congress. 2005.
11639 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11640 EVT I32 = F32.changeTypeToInteger();
11641 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11642 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11643
11644 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11645 // turning into infinities.
11646 SDValue NaN =
11647 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11648
11649 // Factor in the contribution of the low 16 bits.
11650 SDValue One = DAG.getConstant(1, dl, I32);
11651 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11652 DAG.getShiftAmountConstant(16, I32, dl));
11653 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11654 SDValue RoundingBias =
11655 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11656 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11657
11658 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11659 // 0x80000000.
11660 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11661
11662 // Now that we have rounded, shift the bits into position.
11663 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11664 DAG.getShiftAmountConstant(16, I32, dl));
11665 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11666 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11667 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11668 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11669 }
11670 return SDValue();
11671}
11672
11674 SelectionDAG &DAG) const {
11675 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11676 assert(Node->getValueType(0).isScalableVector() &&
11677 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11678
11679 EVT VT = Node->getValueType(0);
11680 SDValue V1 = Node->getOperand(0);
11681 SDValue V2 = Node->getOperand(1);
11682 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11683 SDLoc DL(Node);
11684
11685 // Expand through memory thusly:
11686 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11687 // Store V1, Ptr
11688 // Store V2, Ptr + sizeof(V1)
11689 // If (Imm < 0)
11690 // TrailingElts = -Imm
11691 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11692 // else
11693 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11694 // Res = Load Ptr
11695
11696 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11697
11699 VT.getVectorElementCount() * 2);
11700 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11701 EVT PtrVT = StackPtr.getValueType();
11702 auto &MF = DAG.getMachineFunction();
11703 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11704 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11705
11706 // Store the lo part of CONCAT_VECTORS(V1, V2)
11707 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11708 // Store the hi part of CONCAT_VECTORS(V1, V2)
11709 SDValue OffsetToV2 = DAG.getVScale(
11710 DL, PtrVT,
11712 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11713 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11714
11715 if (Imm >= 0) {
11716 // Load back the required element. getVectorElementPointer takes care of
11717 // clamping the index if it's out-of-bounds.
11718 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11719 // Load the spliced result
11720 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11722 }
11723
11724 uint64_t TrailingElts = -Imm;
11725
11726 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11727 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11728 SDValue TrailingBytes =
11729 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11730
11731 if (TrailingElts > VT.getVectorMinNumElements()) {
11732 SDValue VLBytes =
11733 DAG.getVScale(DL, PtrVT,
11734 APInt(PtrVT.getFixedSizeInBits(),
11736 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11737 }
11738
11739 // Calculate the start address of the spliced result.
11740 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11741
11742 // Load the spliced result
11743 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11745}
11746
11748 SelectionDAG &DAG) const {
11749 SDLoc DL(Node);
11750 SDValue Vec = Node->getOperand(0);
11751 SDValue Mask = Node->getOperand(1);
11752 SDValue Passthru = Node->getOperand(2);
11753
11754 EVT VecVT = Vec.getValueType();
11755 EVT ScalarVT = VecVT.getScalarType();
11756 EVT MaskVT = Mask.getValueType();
11757 EVT MaskScalarVT = MaskVT.getScalarType();
11758
11759 // Needs to be handled by targets that have scalable vector types.
11760 if (VecVT.isScalableVector())
11761 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11762
11763 SDValue StackPtr = DAG.CreateStackTemporary(
11764 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11765 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11766 MachinePointerInfo PtrInfo =
11768
11769 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11770 SDValue Chain = DAG.getEntryNode();
11771 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11772
11773 bool HasPassthru = !Passthru.isUndef();
11774
11775 // If we have a passthru vector, store it on the stack, overwrite the matching
11776 // positions and then re-write the last element that was potentially
11777 // overwritten even though mask[i] = false.
11778 if (HasPassthru)
11779 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11780
11781 SDValue LastWriteVal;
11782 APInt PassthruSplatVal;
11783 bool IsSplatPassthru =
11784 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11785
11786 if (IsSplatPassthru) {
11787 // As we do not know which position we wrote to last, we cannot simply
11788 // access that index from the passthru vector. So we first check if passthru
11789 // is a splat vector, to use any element ...
11790 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11791 } else if (HasPassthru) {
11792 // ... if it is not a splat vector, we need to get the passthru value at
11793 // position = popcount(mask) and re-load it from the stack before it is
11794 // overwritten in the loop below.
11795 EVT PopcountVT = ScalarVT.changeTypeToInteger();
11796 SDValue Popcount = DAG.getNode(
11797 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11798 Popcount =
11800 MaskVT.changeVectorElementType(PopcountVT), Popcount);
11801 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11802 SDValue LastElmtPtr =
11803 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11804 LastWriteVal = DAG.getLoad(
11805 ScalarVT, DL, Chain, LastElmtPtr,
11807 Chain = LastWriteVal.getValue(1);
11808 }
11809
11810 unsigned NumElms = VecVT.getVectorNumElements();
11811 for (unsigned I = 0; I < NumElms; I++) {
11813
11814 SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11815 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11816 Chain = DAG.getStore(
11817 Chain, DL, ValI, OutPtr,
11819
11820 // Get the mask value and add it to the current output position. This
11821 // either increments by 1 if MaskI is true or adds 0 otherwise.
11822 // Freeze in case we have poison/undef mask entries.
11823 SDValue MaskI = DAG.getFreeze(
11824 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11825 MaskI = DAG.getFreeze(MaskI);
11826 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11827 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11828 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11829
11830 if (HasPassthru && I == NumElms - 1) {
11831 SDValue EndOfVector =
11832 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11833 SDValue AllLanesSelected =
11834 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11835 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11836 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11837
11838 // Re-write the last ValI if all lanes were selected. Otherwise,
11839 // overwrite the last write it with the passthru value.
11840 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11841 LastWriteVal, SDNodeFlags::Unpredictable);
11842 Chain = DAG.getStore(
11843 Chain, DL, LastWriteVal, OutPtr,
11845 }
11846 }
11847
11848 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11849}
11850
11852 SDValue &LHS, SDValue &RHS,
11853 SDValue &CC, SDValue Mask,
11854 SDValue EVL, bool &NeedInvert,
11855 const SDLoc &dl, SDValue &Chain,
11856 bool IsSignaling) const {
11857 MVT OpVT = LHS.getSimpleValueType();
11858 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11859 NeedInvert = false;
11860 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11861 bool IsNonVP = !EVL;
11862 switch (getCondCodeAction(CCCode, OpVT)) {
11863 default:
11864 llvm_unreachable("Unknown condition code action!");
11866 // Nothing to do.
11867 break;
11870 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11871 std::swap(LHS, RHS);
11872 CC = DAG.getCondCode(InvCC);
11873 return true;
11874 }
11875 // Swapping operands didn't work. Try inverting the condition.
11876 bool NeedSwap = false;
11877 InvCC = getSetCCInverse(CCCode, OpVT);
11878 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11879 // If inverting the condition is not enough, try swapping operands
11880 // on top of it.
11881 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11882 NeedSwap = true;
11883 }
11884 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11885 CC = DAG.getCondCode(InvCC);
11886 NeedInvert = true;
11887 if (NeedSwap)
11888 std::swap(LHS, RHS);
11889 return true;
11890 }
11891
11892 // Special case: expand i1 comparisons using logical operations.
11893 if (OpVT == MVT::i1) {
11894 SDValue Ret;
11895 switch (CCCode) {
11896 default:
11897 llvm_unreachable("Unknown integer setcc!");
11898 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
11899 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
11900 MVT::i1);
11901 break;
11902 case ISD::SETNE: // X != Y --> (X ^ Y)
11903 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
11904 break;
11905 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11906 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11907 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
11908 DAG.getNOT(dl, LHS, MVT::i1));
11909 break;
11910 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11911 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11912 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
11913 DAG.getNOT(dl, RHS, MVT::i1));
11914 break;
11915 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11916 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11917 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
11918 DAG.getNOT(dl, LHS, MVT::i1));
11919 break;
11920 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11921 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11922 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
11923 DAG.getNOT(dl, RHS, MVT::i1));
11924 break;
11925 }
11926
11927 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
11928 RHS = SDValue();
11929 CC = SDValue();
11930 return true;
11931 }
11932
11934 unsigned Opc = 0;
11935 switch (CCCode) {
11936 default:
11937 llvm_unreachable("Don't know how to expand this condition!");
11938 case ISD::SETUO:
11939 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11940 CC1 = ISD::SETUNE;
11941 CC2 = ISD::SETUNE;
11942 Opc = ISD::OR;
11943 break;
11944 }
11946 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11947 NeedInvert = true;
11948 [[fallthrough]];
11949 case ISD::SETO:
11951 "If SETO is expanded, SETOEQ must be legal!");
11952 CC1 = ISD::SETOEQ;
11953 CC2 = ISD::SETOEQ;
11954 Opc = ISD::AND;
11955 break;
11956 case ISD::SETONE:
11957 case ISD::SETUEQ:
11958 // If the SETUO or SETO CC isn't legal, we might be able to use
11959 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11960 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11961 // the operands.
11962 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11963 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11964 isCondCodeLegal(ISD::SETOLT, OpVT))) {
11965 CC1 = ISD::SETOGT;
11966 CC2 = ISD::SETOLT;
11967 Opc = ISD::OR;
11968 NeedInvert = ((unsigned)CCCode & 0x8U);
11969 break;
11970 }
11971 [[fallthrough]];
11972 case ISD::SETOEQ:
11973 case ISD::SETOGT:
11974 case ISD::SETOGE:
11975 case ISD::SETOLT:
11976 case ISD::SETOLE:
11977 case ISD::SETUNE:
11978 case ISD::SETUGT:
11979 case ISD::SETUGE:
11980 case ISD::SETULT:
11981 case ISD::SETULE:
11982 // If we are floating point, assign and break, otherwise fall through.
11983 if (!OpVT.isInteger()) {
11984 // We can use the 4th bit to tell if we are the unordered
11985 // or ordered version of the opcode.
11986 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11987 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11988 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11989 break;
11990 }
11991 // Fallthrough if we are unsigned integer.
11992 [[fallthrough]];
11993 case ISD::SETLE:
11994 case ISD::SETGT:
11995 case ISD::SETGE:
11996 case ISD::SETLT:
11997 case ISD::SETNE:
11998 case ISD::SETEQ:
11999 // If all combinations of inverting the condition and swapping operands
12000 // didn't work then we have no means to expand the condition.
12001 llvm_unreachable("Don't know how to expand this condition!");
12002 }
12003
12004 SDValue SetCC1, SetCC2;
12005 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12006 // If we aren't the ordered or unorder operation,
12007 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12008 if (IsNonVP) {
12009 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12010 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12011 } else {
12012 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12013 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12014 }
12015 } else {
12016 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12017 if (IsNonVP) {
12018 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12019 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12020 } else {
12021 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12022 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12023 }
12024 }
12025 if (Chain)
12026 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12027 SetCC2.getValue(1));
12028 if (IsNonVP)
12029 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12030 else {
12031 // Transform the binary opcode to the VP equivalent.
12032 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12033 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12034 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12035 }
12036 RHS = SDValue();
12037 CC = SDValue();
12038 return true;
12039 }
12040 }
12041 return false;
12042}
12043
12045 SelectionDAG &DAG) const {
12046 EVT VT = Node->getValueType(0);
12047 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12048 // split into two equal parts.
12049 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12050 return SDValue();
12051
12052 // Restrict expansion to cases where both parts can be concatenated.
12053 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12054 if (LoVT != HiVT || !isTypeLegal(LoVT))
12055 return SDValue();
12056
12057 SDLoc DL(Node);
12058 unsigned Opcode = Node->getOpcode();
12059
12060 // Don't expand if the result is likely to be unrolled anyway.
12061 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12062 return SDValue();
12063
12064 SmallVector<SDValue, 4> LoOps, HiOps;
12065 for (const SDValue &V : Node->op_values()) {
12066 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12067 LoOps.push_back(Lo);
12068 HiOps.push_back(Hi);
12069 }
12070
12071 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12072 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12073 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12074}
unsigned const MachineRegisterInfo * MRI
static const LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:533
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1329
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1155
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1135
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1095
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1106
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1547
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1732
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1392
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1340
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
APInt reverseBits() const
Definition: APInt.cpp:741
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:834
void negate()
Negate this APInt in place.
Definition: APInt.h:1450
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
unsigned countLeadingZeros() const
Definition: APInt.h:1585
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
unsigned logBase2() const
Definition: APInt.h:1739
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1319
APInt multiplicativeInverse() const
Definition: APInt.cpp:1248
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:405
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1150
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1367
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
APInt byteSwap() const
Definition: APInt.cpp:719
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition: APInt.h:1424
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1343
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1119
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:709
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:462
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
Class to represent pointers.
Definition: DerivedTypes.h:670
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDNodeIterator end(const SDNode *N)
static SDNodeIterator begin(const SDNode *N)
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:980
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:456
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:888
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:794
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:904
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:571
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
iterator end() const
Definition: StringRef.h:118
Class to represent struct types.
Definition: DerivedTypes.h:218
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:735
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:295
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition: Value.cpp:698
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2982
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:512
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:502
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:387
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1123
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1660
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1665
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1635
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1602
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1582
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1771
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1610
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1535
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:301
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:313
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:320
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:293
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:178
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:247
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:211
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:79
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:234
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:281
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:225
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:187
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:313
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:240
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:215
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:60
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:97
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:804
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:159
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:278
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:205
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setIsSigned(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...