LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
752 for (unsigned i = 0; i != Scale; ++i) {
753 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
754 unsigned BitOffset = EltOffset * NumSrcEltBits;
755 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
756 if (!Sub.isZero()) {
757 DemandedSrcBits |= Sub;
758 for (unsigned j = 0; j != NumElts; ++j)
759 if (DemandedElts[j])
760 DemandedSrcElts.setBit((j * Scale) + i);
761 }
762 }
763
765 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
766 return DAG.getBitcast(DstVT, V);
767 }
768
769 // TODO - bigendian once we have test coverage.
770 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
771 unsigned Scale = NumSrcEltBits / NumDstEltBits;
772 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
773 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
774 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
775 for (unsigned i = 0; i != NumElts; ++i)
776 if (DemandedElts[i]) {
777 unsigned Offset = (i % Scale) * NumDstEltBits;
778 DemandedSrcBits.insertBits(DemandedBits, Offset);
779 DemandedSrcElts.setBit(i / Scale);
780 }
781
783 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
784 return DAG.getBitcast(DstVT, V);
785 }
786
787 break;
788 }
789 case ISD::AND: {
790 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
791 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
792
793 // If all of the demanded bits are known 1 on one side, return the other.
794 // These bits cannot contribute to the result of the 'and' in this
795 // context.
796 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
797 return Op.getOperand(0);
798 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
799 return Op.getOperand(1);
800 break;
801 }
802 case ISD::OR: {
803 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
804 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
805
806 // If all of the demanded bits are known zero on one side, return the
807 // other. These bits cannot contribute to the result of the 'or' in this
808 // context.
809 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
810 return Op.getOperand(0);
811 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
812 return Op.getOperand(1);
813 break;
814 }
815 case ISD::XOR: {
816 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
817 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
818
819 // If all of the demanded bits are known zero on one side, return the
820 // other.
821 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
822 return Op.getOperand(0);
823 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
824 return Op.getOperand(1);
825 break;
826 }
827 case ISD::ADD: {
828 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
829 if (RHSKnown.isZero())
830 return Op.getOperand(0);
831
832 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
833 if (LHSKnown.isZero())
834 return Op.getOperand(1);
835 break;
836 }
837 case ISD::SHL: {
838 // If we are only demanding sign bits then we can use the shift source
839 // directly.
840 if (std::optional<unsigned> MaxSA =
841 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
842 SDValue Op0 = Op.getOperand(0);
843 unsigned ShAmt = *MaxSA;
844 unsigned NumSignBits =
845 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
846 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
847 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
848 return Op0;
849 }
850 break;
851 }
852 case ISD::SRL: {
853 // If we are only demanding sign bits then we can use the shift source
854 // directly.
855 if (std::optional<unsigned> MaxSA =
856 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
857 SDValue Op0 = Op.getOperand(0);
858 unsigned ShAmt = *MaxSA;
859 // Must already be signbits in DemandedBits bounds, and can't demand any
860 // shifted in zeroes.
861 if (DemandedBits.countl_zero() >= ShAmt) {
862 unsigned NumSignBits =
863 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
864 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
865 return Op0;
866 }
867 }
868 break;
869 }
870 case ISD::SETCC: {
871 SDValue Op0 = Op.getOperand(0);
872 SDValue Op1 = Op.getOperand(1);
873 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
874 // If (1) we only need the sign-bit, (2) the setcc operands are the same
875 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
876 // -1, we may be able to bypass the setcc.
877 if (DemandedBits.isSignMask() &&
881 // If we're testing X < 0, then this compare isn't needed - just use X!
882 // FIXME: We're limiting to integer types here, but this should also work
883 // if we don't care about FP signed-zero. The use of SETLT with FP means
884 // that we don't care about NaNs.
885 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
887 return Op0;
888 }
889 break;
890 }
892 // If none of the extended bits are demanded, eliminate the sextinreg.
893 SDValue Op0 = Op.getOperand(0);
894 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
895 unsigned ExBits = ExVT.getScalarSizeInBits();
896 if (DemandedBits.getActiveBits() <= ExBits &&
898 return Op0;
899 // If the input is already sign extended, just drop the extension.
900 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
901 if (NumSignBits >= (BitWidth - ExBits + 1))
902 return Op0;
903 break;
904 }
908 if (VT.isScalableVector())
909 return SDValue();
910
911 // If we only want the lowest element and none of extended bits, then we can
912 // return the bitcasted source vector.
913 SDValue Src = Op.getOperand(0);
914 EVT SrcVT = Src.getValueType();
915 EVT DstVT = Op.getValueType();
916 if (IsLE && DemandedElts == 1 &&
917 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
918 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
919 return DAG.getBitcast(DstVT, Src);
920 }
921 break;
922 }
924 if (VT.isScalableVector())
925 return SDValue();
926
927 // If we don't demand the inserted element, return the base vector.
928 SDValue Vec = Op.getOperand(0);
929 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
930 EVT VecVT = Vec.getValueType();
931 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
932 !DemandedElts[CIdx->getZExtValue()])
933 return Vec;
934 break;
935 }
937 if (VT.isScalableVector())
938 return SDValue();
939
940 SDValue Vec = Op.getOperand(0);
941 SDValue Sub = Op.getOperand(1);
942 uint64_t Idx = Op.getConstantOperandVal(2);
943 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
944 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
945 // If we don't demand the inserted subvector, return the base vector.
946 if (DemandedSubElts == 0)
947 return Vec;
948 break;
949 }
950 case ISD::VECTOR_SHUFFLE: {
952 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
953
954 // If all the demanded elts are from one operand and are inline,
955 // then we can use the operand directly.
956 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
957 for (unsigned i = 0; i != NumElts; ++i) {
958 int M = ShuffleMask[i];
959 if (M < 0 || !DemandedElts[i])
960 continue;
961 AllUndef = false;
962 IdentityLHS &= (M == (int)i);
963 IdentityRHS &= ((M - NumElts) == i);
964 }
965
966 if (AllUndef)
967 return DAG.getUNDEF(Op.getValueType());
968 if (IdentityLHS)
969 return Op.getOperand(0);
970 if (IdentityRHS)
971 return Op.getOperand(1);
972 break;
973 }
974 default:
975 // TODO: Probably okay to remove after audit; here to reduce change size
976 // in initial enablement patch for scalable vectors
977 if (VT.isScalableVector())
978 return SDValue();
979
980 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
982 Op, DemandedBits, DemandedElts, DAG, Depth))
983 return V;
984 break;
985 }
986 return SDValue();
987}
988
991 unsigned Depth) const {
992 EVT VT = Op.getValueType();
993 // Since the number of lanes in a scalable vector is unknown at compile time,
994 // we track one bit which is implicitly broadcast to all lanes. This means
995 // that all lanes in a scalable vector are considered demanded.
996 APInt DemandedElts = VT.isFixedLengthVector()
998 : APInt(1, 1);
999 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1000 Depth);
1001}
1002
1004 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1005 unsigned Depth) const {
1006 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1007 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1008 Depth);
1009}
1010
1011// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1012// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1015 const TargetLowering &TLI,
1016 const APInt &DemandedBits,
1017 const APInt &DemandedElts, unsigned Depth) {
1018 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1019 "SRL or SRA node is required here!");
1020 // Is the right shift using an immediate value of 1?
1021 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1022 if (!N1C || !N1C->isOne())
1023 return SDValue();
1024
1025 // We are looking for an avgfloor
1026 // add(ext, ext)
1027 // or one of these as a avgceil
1028 // add(add(ext, ext), 1)
1029 // add(add(ext, 1), ext)
1030 // add(ext, add(ext, 1))
1031 SDValue Add = Op.getOperand(0);
1032 if (Add.getOpcode() != ISD::ADD)
1033 return SDValue();
1034
1035 SDValue ExtOpA = Add.getOperand(0);
1036 SDValue ExtOpB = Add.getOperand(1);
1037 SDValue Add2;
1038 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1039 ConstantSDNode *ConstOp;
1040 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1041 ConstOp->isOne()) {
1042 ExtOpA = Op1;
1043 ExtOpB = Op3;
1044 Add2 = A;
1045 return true;
1046 }
1047 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1048 ConstOp->isOne()) {
1049 ExtOpA = Op1;
1050 ExtOpB = Op2;
1051 Add2 = A;
1052 return true;
1053 }
1054 return false;
1055 };
1056 bool IsCeil =
1057 (ExtOpA.getOpcode() == ISD::ADD &&
1058 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1059 (ExtOpB.getOpcode() == ISD::ADD &&
1060 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1061
1062 // If the shift is signed (sra):
1063 // - Needs >= 2 sign bit for both operands.
1064 // - Needs >= 2 zero bits.
1065 // If the shift is unsigned (srl):
1066 // - Needs >= 1 zero bit for both operands.
1067 // - Needs 1 demanded bit zero and >= 2 sign bits.
1068 SelectionDAG &DAG = TLO.DAG;
1069 unsigned ShiftOpc = Op.getOpcode();
1070 bool IsSigned = false;
1071 unsigned KnownBits;
1072 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1073 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1074 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1075 unsigned NumZeroA =
1076 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1077 unsigned NumZeroB =
1078 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1079 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1080
1081 switch (ShiftOpc) {
1082 default:
1083 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1084 case ISD::SRA: {
1085 if (NumZero >= 2 && NumSigned < NumZero) {
1086 IsSigned = false;
1087 KnownBits = NumZero;
1088 break;
1089 }
1090 if (NumSigned >= 1) {
1091 IsSigned = true;
1092 KnownBits = NumSigned;
1093 break;
1094 }
1095 return SDValue();
1096 }
1097 case ISD::SRL: {
1098 if (NumZero >= 1 && NumSigned < NumZero) {
1099 IsSigned = false;
1100 KnownBits = NumZero;
1101 break;
1102 }
1103 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1104 IsSigned = true;
1105 KnownBits = NumSigned;
1106 break;
1107 }
1108 return SDValue();
1109 }
1110 }
1111
1112 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1113 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1114
1115 // Find the smallest power-2 type that is legal for this vector size and
1116 // operation, given the original type size and the number of known sign/zero
1117 // bits.
1118 EVT VT = Op.getValueType();
1119 unsigned MinWidth =
1120 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1121 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1123 return SDValue();
1124 if (VT.isVector())
1125 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1126 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1127 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1128 // larger type size to do the transform.
1129 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1130 return SDValue();
1131 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1132 Add.getOperand(1)) &&
1133 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1134 Add2.getOperand(1))))
1135 NVT = VT;
1136 else
1137 return SDValue();
1138 }
1139
1140 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1141 // this is likely to stop other folds (reassociation, value tracking etc.)
1142 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1143 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1144 return SDValue();
1145
1146 SDLoc DL(Op);
1147 SDValue ResultAVG =
1148 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1149 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1150 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1151}
1152
1153/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1154/// result of Op are ever used downstream. If we can use this information to
1155/// simplify Op, create a new simplified DAG node and return true, returning the
1156/// original and new nodes in Old and New. Otherwise, analyze the expression and
1157/// return a mask of Known bits for the expression (used to simplify the
1158/// caller). The Known bits may only be accurate for those bits in the
1159/// OriginalDemandedBits and OriginalDemandedElts.
1161 SDValue Op, const APInt &OriginalDemandedBits,
1162 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1163 unsigned Depth, bool AssumeSingleUse) const {
1164 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1165 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1166 "Mask size mismatches value type size!");
1167
1168 // Don't know anything.
1169 Known = KnownBits(BitWidth);
1170
1171 EVT VT = Op.getValueType();
1172 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1173 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1174 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1175 "Unexpected vector size");
1176
1177 APInt DemandedBits = OriginalDemandedBits;
1178 APInt DemandedElts = OriginalDemandedElts;
1179 SDLoc dl(Op);
1180
1181 // Undef operand.
1182 if (Op.isUndef())
1183 return false;
1184
1185 // We can't simplify target constants.
1186 if (Op.getOpcode() == ISD::TargetConstant)
1187 return false;
1188
1189 if (Op.getOpcode() == ISD::Constant) {
1190 // We know all of the bits for a constant!
1191 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1192 return false;
1193 }
1194
1195 if (Op.getOpcode() == ISD::ConstantFP) {
1196 // We know all of the bits for a floating point constant!
1198 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1199 return false;
1200 }
1201
1202 // Other users may use these bits.
1203 bool HasMultiUse = false;
1204 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1206 // Limit search depth.
1207 return false;
1208 }
1209 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1211 DemandedElts = APInt::getAllOnes(NumElts);
1212 HasMultiUse = true;
1213 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1214 // Not demanding any bits/elts from Op.
1215 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1216 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1217 // Limit search depth.
1218 return false;
1219 }
1220
1221 KnownBits Known2;
1222 switch (Op.getOpcode()) {
1223 case ISD::SCALAR_TO_VECTOR: {
1224 if (VT.isScalableVector())
1225 return false;
1226 if (!DemandedElts[0])
1227 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1228
1229 KnownBits SrcKnown;
1230 SDValue Src = Op.getOperand(0);
1231 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1232 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1233 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1234 return true;
1235
1236 // Upper elements are undef, so only get the knownbits if we just demand
1237 // the bottom element.
1238 if (DemandedElts == 1)
1239 Known = SrcKnown.anyextOrTrunc(BitWidth);
1240 break;
1241 }
1242 case ISD::BUILD_VECTOR:
1243 // Collect the known bits that are shared by every demanded element.
1244 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1245 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1246 return false; // Don't fall through, will infinitely loop.
1247 case ISD::SPLAT_VECTOR: {
1248 SDValue Scl = Op.getOperand(0);
1249 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1250 KnownBits KnownScl;
1251 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1252 return true;
1253
1254 // Implicitly truncate the bits to match the official semantics of
1255 // SPLAT_VECTOR.
1256 Known = KnownScl.trunc(BitWidth);
1257 break;
1258 }
1259 case ISD::LOAD: {
1260 auto *LD = cast<LoadSDNode>(Op);
1261 if (getTargetConstantFromLoad(LD)) {
1262 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1263 return false; // Don't fall through, will infinitely loop.
1264 }
1265 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1266 // If this is a ZEXTLoad and we are looking at the loaded value.
1267 EVT MemVT = LD->getMemoryVT();
1268 unsigned MemBits = MemVT.getScalarSizeInBits();
1269 Known.Zero.setBitsFrom(MemBits);
1270 return false; // Don't fall through, will infinitely loop.
1271 }
1272 break;
1273 }
1275 if (VT.isScalableVector())
1276 return false;
1277 SDValue Vec = Op.getOperand(0);
1278 SDValue Scl = Op.getOperand(1);
1279 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1280 EVT VecVT = Vec.getValueType();
1281
1282 // If index isn't constant, assume we need all vector elements AND the
1283 // inserted element.
1284 APInt DemandedVecElts(DemandedElts);
1285 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1286 unsigned Idx = CIdx->getZExtValue();
1287 DemandedVecElts.clearBit(Idx);
1288
1289 // Inserted element is not required.
1290 if (!DemandedElts[Idx])
1291 return TLO.CombineTo(Op, Vec);
1292 }
1293
1294 KnownBits KnownScl;
1295 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1296 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1297 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1298 return true;
1299
1300 Known = KnownScl.anyextOrTrunc(BitWidth);
1301
1302 KnownBits KnownVec;
1303 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1304 Depth + 1))
1305 return true;
1306
1307 if (!!DemandedVecElts)
1308 Known = Known.intersectWith(KnownVec);
1309
1310 return false;
1311 }
1312 case ISD::INSERT_SUBVECTOR: {
1313 if (VT.isScalableVector())
1314 return false;
1315 // Demand any elements from the subvector and the remainder from the src its
1316 // inserted into.
1317 SDValue Src = Op.getOperand(0);
1318 SDValue Sub = Op.getOperand(1);
1319 uint64_t Idx = Op.getConstantOperandVal(2);
1320 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1321 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1322 APInt DemandedSrcElts = DemandedElts;
1323 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1324
1325 KnownBits KnownSub, KnownSrc;
1326 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1327 Depth + 1))
1328 return true;
1329 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1330 Depth + 1))
1331 return true;
1332
1333 Known.setAllConflict();
1334 if (!!DemandedSubElts)
1335 Known = Known.intersectWith(KnownSub);
1336 if (!!DemandedSrcElts)
1337 Known = Known.intersectWith(KnownSrc);
1338
1339 // Attempt to avoid multi-use src if we don't need anything from it.
1340 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1341 !DemandedSrcElts.isAllOnes()) {
1343 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1345 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1346 if (NewSub || NewSrc) {
1347 NewSub = NewSub ? NewSub : Sub;
1348 NewSrc = NewSrc ? NewSrc : Src;
1349 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1350 Op.getOperand(2));
1351 return TLO.CombineTo(Op, NewOp);
1352 }
1353 }
1354 break;
1355 }
1357 if (VT.isScalableVector())
1358 return false;
1359 // Offset the demanded elts by the subvector index.
1360 SDValue Src = Op.getOperand(0);
1361 if (Src.getValueType().isScalableVector())
1362 break;
1363 uint64_t Idx = Op.getConstantOperandVal(1);
1364 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1365 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1366
1367 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1368 Depth + 1))
1369 return true;
1370
1371 // Attempt to avoid multi-use src if we don't need anything from it.
1372 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1374 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1375 if (DemandedSrc) {
1376 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1377 Op.getOperand(1));
1378 return TLO.CombineTo(Op, NewOp);
1379 }
1380 }
1381 break;
1382 }
1383 case ISD::CONCAT_VECTORS: {
1384 if (VT.isScalableVector())
1385 return false;
1386 Known.setAllConflict();
1387 EVT SubVT = Op.getOperand(0).getValueType();
1388 unsigned NumSubVecs = Op.getNumOperands();
1389 unsigned NumSubElts = SubVT.getVectorNumElements();
1390 for (unsigned i = 0; i != NumSubVecs; ++i) {
1391 APInt DemandedSubElts =
1392 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1393 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1394 Known2, TLO, Depth + 1))
1395 return true;
1396 // Known bits are shared by every demanded subvector element.
1397 if (!!DemandedSubElts)
1398 Known = Known.intersectWith(Known2);
1399 }
1400 break;
1401 }
1402 case ISD::VECTOR_SHUFFLE: {
1403 assert(!VT.isScalableVector());
1404 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1405
1406 // Collect demanded elements from shuffle operands..
1407 APInt DemandedLHS, DemandedRHS;
1408 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1409 DemandedRHS))
1410 break;
1411
1412 if (!!DemandedLHS || !!DemandedRHS) {
1413 SDValue Op0 = Op.getOperand(0);
1414 SDValue Op1 = Op.getOperand(1);
1415
1416 Known.setAllConflict();
1417 if (!!DemandedLHS) {
1418 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1419 Depth + 1))
1420 return true;
1421 Known = Known.intersectWith(Known2);
1422 }
1423 if (!!DemandedRHS) {
1424 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1425 Depth + 1))
1426 return true;
1427 Known = Known.intersectWith(Known2);
1428 }
1429
1430 // Attempt to avoid multi-use ops if we don't need anything from them.
1432 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1434 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1435 if (DemandedOp0 || DemandedOp1) {
1436 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1437 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1438 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1439 return TLO.CombineTo(Op, NewOp);
1440 }
1441 }
1442 break;
1443 }
1444 case ISD::AND: {
1445 SDValue Op0 = Op.getOperand(0);
1446 SDValue Op1 = Op.getOperand(1);
1447
1448 // If the RHS is a constant, check to see if the LHS would be zero without
1449 // using the bits from the RHS. Below, we use knowledge about the RHS to
1450 // simplify the LHS, here we're using information from the LHS to simplify
1451 // the RHS.
1452 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1453 // Do not increment Depth here; that can cause an infinite loop.
1454 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1455 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1456 if ((LHSKnown.Zero & DemandedBits) ==
1457 (~RHSC->getAPIntValue() & DemandedBits))
1458 return TLO.CombineTo(Op, Op0);
1459
1460 // If any of the set bits in the RHS are known zero on the LHS, shrink
1461 // the constant.
1462 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1463 DemandedElts, TLO))
1464 return true;
1465
1466 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1467 // constant, but if this 'and' is only clearing bits that were just set by
1468 // the xor, then this 'and' can be eliminated by shrinking the mask of
1469 // the xor. For example, for a 32-bit X:
1470 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1471 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1472 LHSKnown.One == ~RHSC->getAPIntValue()) {
1473 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1474 return TLO.CombineTo(Op, Xor);
1475 }
1476 }
1477
1478 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1479 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1480 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1481 (Op0.getOperand(0).isUndef() ||
1483 Op0->hasOneUse()) {
1484 unsigned NumSubElts =
1486 unsigned SubIdx = Op0.getConstantOperandVal(2);
1487 APInt DemandedSub =
1488 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1489 KnownBits KnownSubMask =
1490 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1491 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1492 SDValue NewAnd =
1493 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1494 SDValue NewInsert =
1495 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1496 Op0.getOperand(1), Op0.getOperand(2));
1497 return TLO.CombineTo(Op, NewInsert);
1498 }
1499 }
1500
1501 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1502 Depth + 1))
1503 return true;
1504 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1505 Known2, TLO, Depth + 1))
1506 return true;
1507
1508 // If all of the demanded bits are known one on one side, return the other.
1509 // These bits cannot contribute to the result of the 'and'.
1510 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1511 return TLO.CombineTo(Op, Op0);
1512 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1513 return TLO.CombineTo(Op, Op1);
1514 // If all of the demanded bits in the inputs are known zeros, return zero.
1515 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1516 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1517 // If the RHS is a constant, see if we can simplify it.
1518 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1519 TLO))
1520 return true;
1521 // If the operation can be done in a smaller type, do so.
1523 return true;
1524
1525 // Attempt to avoid multi-use ops if we don't need anything from them.
1526 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1528 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1530 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1531 if (DemandedOp0 || DemandedOp1) {
1532 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1533 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1534 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1535 return TLO.CombineTo(Op, NewOp);
1536 }
1537 }
1538
1539 Known &= Known2;
1540 break;
1541 }
1542 case ISD::OR: {
1543 SDValue Op0 = Op.getOperand(0);
1544 SDValue Op1 = Op.getOperand(1);
1545 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1546 Depth + 1)) {
1547 Op->dropFlags(SDNodeFlags::Disjoint);
1548 return true;
1549 }
1550
1551 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1552 Known2, TLO, Depth + 1)) {
1553 Op->dropFlags(SDNodeFlags::Disjoint);
1554 return true;
1555 }
1556
1557 // If all of the demanded bits are known zero on one side, return the other.
1558 // These bits cannot contribute to the result of the 'or'.
1559 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1560 return TLO.CombineTo(Op, Op0);
1561 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1562 return TLO.CombineTo(Op, Op1);
1563 // If the RHS is a constant, see if we can simplify it.
1564 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1565 return true;
1566 // If the operation can be done in a smaller type, do so.
1568 return true;
1569
1570 // Attempt to avoid multi-use ops if we don't need anything from them.
1571 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1573 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1575 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1576 if (DemandedOp0 || DemandedOp1) {
1577 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1578 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1579 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1580 return TLO.CombineTo(Op, NewOp);
1581 }
1582 }
1583
1584 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1585 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1586 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1587 Op0->hasOneUse() && Op1->hasOneUse()) {
1588 // Attempt to match all commutations - m_c_Or would've been useful!
1589 for (int I = 0; I != 2; ++I) {
1590 SDValue X = Op.getOperand(I).getOperand(0);
1591 SDValue C1 = Op.getOperand(I).getOperand(1);
1592 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1593 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1594 if (Alt.getOpcode() == ISD::OR) {
1595 for (int J = 0; J != 2; ++J) {
1596 if (X == Alt.getOperand(J)) {
1597 SDValue Y = Alt.getOperand(1 - J);
1598 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1599 {C1, C2})) {
1600 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1601 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1602 return TLO.CombineTo(
1603 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1604 }
1605 }
1606 }
1607 }
1608 }
1609 }
1610
1611 Known |= Known2;
1612 break;
1613 }
1614 case ISD::XOR: {
1615 SDValue Op0 = Op.getOperand(0);
1616 SDValue Op1 = Op.getOperand(1);
1617
1618 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1619 Depth + 1))
1620 return true;
1621 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1622 Depth + 1))
1623 return true;
1624
1625 // If all of the demanded bits are known zero on one side, return the other.
1626 // These bits cannot contribute to the result of the 'xor'.
1627 if (DemandedBits.isSubsetOf(Known.Zero))
1628 return TLO.CombineTo(Op, Op0);
1629 if (DemandedBits.isSubsetOf(Known2.Zero))
1630 return TLO.CombineTo(Op, Op1);
1631 // If the operation can be done in a smaller type, do so.
1633 return true;
1634
1635 // If all of the unknown bits are known to be zero on one side or the other
1636 // turn this into an *inclusive* or.
1637 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1638 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1639 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1640
1641 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1642 if (C) {
1643 // If one side is a constant, and all of the set bits in the constant are
1644 // also known set on the other side, turn this into an AND, as we know
1645 // the bits will be cleared.
1646 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1647 // NB: it is okay if more bits are known than are requested
1648 if (C->getAPIntValue() == Known2.One) {
1649 SDValue ANDC =
1650 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1651 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1652 }
1653
1654 // If the RHS is a constant, see if we can change it. Don't alter a -1
1655 // constant because that's a 'not' op, and that is better for combining
1656 // and codegen.
1657 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1658 // We're flipping all demanded bits. Flip the undemanded bits too.
1659 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1660 return TLO.CombineTo(Op, New);
1661 }
1662
1663 unsigned Op0Opcode = Op0.getOpcode();
1664 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1665 if (ConstantSDNode *ShiftC =
1666 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1667 // Don't crash on an oversized shift. We can not guarantee that a
1668 // bogus shift has been simplified to undef.
1669 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1670 uint64_t ShiftAmt = ShiftC->getZExtValue();
1672 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1673 : Ones.lshr(ShiftAmt);
1674 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1676 // If the xor constant is a demanded mask, do a 'not' before the
1677 // shift:
1678 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1679 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1680 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1681 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1682 Op0.getOperand(1)));
1683 }
1684 }
1685 }
1686 }
1687 }
1688
1689 // If we can't turn this into a 'not', try to shrink the constant.
1690 if (!C || !C->isAllOnes())
1691 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1692 return true;
1693
1694 // Attempt to avoid multi-use ops if we don't need anything from them.
1695 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1697 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1699 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1700 if (DemandedOp0 || DemandedOp1) {
1701 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1702 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1703 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1704 return TLO.CombineTo(Op, NewOp);
1705 }
1706 }
1707
1708 Known ^= Known2;
1709 break;
1710 }
1711 case ISD::SELECT:
1712 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1713 Known, TLO, Depth + 1))
1714 return true;
1715 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1716 Known2, TLO, Depth + 1))
1717 return true;
1718
1719 // If the operands are constants, see if we can simplify them.
1720 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1721 return true;
1722
1723 // Only known if known in both the LHS and RHS.
1724 Known = Known.intersectWith(Known2);
1725 break;
1726 case ISD::VSELECT:
1727 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1728 Known, TLO, Depth + 1))
1729 return true;
1730 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1731 Known2, TLO, Depth + 1))
1732 return true;
1733
1734 // Only known if known in both the LHS and RHS.
1735 Known = Known.intersectWith(Known2);
1736 break;
1737 case ISD::SELECT_CC:
1738 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1739 Known, TLO, Depth + 1))
1740 return true;
1741 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1742 Known2, TLO, Depth + 1))
1743 return true;
1744
1745 // If the operands are constants, see if we can simplify them.
1746 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1747 return true;
1748
1749 // Only known if known in both the LHS and RHS.
1750 Known = Known.intersectWith(Known2);
1751 break;
1752 case ISD::SETCC: {
1753 SDValue Op0 = Op.getOperand(0);
1754 SDValue Op1 = Op.getOperand(1);
1755 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1756 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1757 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1758 // -1, we may be able to bypass the setcc.
1759 if (DemandedBits.isSignMask() &&
1763 // If we're testing X < 0, then this compare isn't needed - just use X!
1764 // FIXME: We're limiting to integer types here, but this should also work
1765 // if we don't care about FP signed-zero. The use of SETLT with FP means
1766 // that we don't care about NaNs.
1767 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1769 return TLO.CombineTo(Op, Op0);
1770
1771 // TODO: Should we check for other forms of sign-bit comparisons?
1772 // Examples: X <= -1, X >= 0
1773 }
1774 if (getBooleanContents(Op0.getValueType()) ==
1776 BitWidth > 1)
1777 Known.Zero.setBitsFrom(1);
1778 break;
1779 }
1780 case ISD::SHL: {
1781 SDValue Op0 = Op.getOperand(0);
1782 SDValue Op1 = Op.getOperand(1);
1783 EVT ShiftVT = Op1.getValueType();
1784
1785 if (std::optional<unsigned> KnownSA =
1786 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1787 unsigned ShAmt = *KnownSA;
1788 if (ShAmt == 0)
1789 return TLO.CombineTo(Op, Op0);
1790
1791 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1792 // single shift. We can do this if the bottom bits (which are shifted
1793 // out) are never demanded.
1794 // TODO - support non-uniform vector amounts.
1795 if (Op0.getOpcode() == ISD::SRL) {
1796 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1797 if (std::optional<unsigned> InnerSA =
1798 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1799 unsigned C1 = *InnerSA;
1800 unsigned Opc = ISD::SHL;
1801 int Diff = ShAmt - C1;
1802 if (Diff < 0) {
1803 Diff = -Diff;
1804 Opc = ISD::SRL;
1805 }
1806 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1807 return TLO.CombineTo(
1808 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1809 }
1810 }
1811 }
1812
1813 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1814 // are not demanded. This will likely allow the anyext to be folded away.
1815 // TODO - support non-uniform vector amounts.
1816 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1817 SDValue InnerOp = Op0.getOperand(0);
1818 EVT InnerVT = InnerOp.getValueType();
1819 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1820 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1821 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1822 SDValue NarrowShl = TLO.DAG.getNode(
1823 ISD::SHL, dl, InnerVT, InnerOp,
1824 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1825 return TLO.CombineTo(
1826 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1827 }
1828
1829 // Repeat the SHL optimization above in cases where an extension
1830 // intervenes: (shl (anyext (shr x, c1)), c2) to
1831 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1832 // aren't demanded (as above) and that the shifted upper c1 bits of
1833 // x aren't demanded.
1834 // TODO - support non-uniform vector amounts.
1835 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1836 InnerOp.hasOneUse()) {
1837 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1838 InnerOp, DemandedElts, Depth + 2)) {
1839 unsigned InnerShAmt = *SA2;
1840 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1841 DemandedBits.getActiveBits() <=
1842 (InnerBits - InnerShAmt + ShAmt) &&
1843 DemandedBits.countr_zero() >= ShAmt) {
1844 SDValue NewSA =
1845 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1846 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1847 InnerOp.getOperand(0));
1848 return TLO.CombineTo(
1849 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1850 }
1851 }
1852 }
1853 }
1854
1855 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1856 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1857 Depth + 1)) {
1858 // Disable the nsw and nuw flags. We can no longer guarantee that we
1859 // won't wrap after simplification.
1860 Op->dropFlags(SDNodeFlags::NoWrap);
1861 return true;
1862 }
1863 Known <<= ShAmt;
1864 // low bits known zero.
1865 Known.Zero.setLowBits(ShAmt);
1866
1867 // Attempt to avoid multi-use ops if we don't need anything from them.
1868 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1870 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1871 if (DemandedOp0) {
1872 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1873 return TLO.CombineTo(Op, NewOp);
1874 }
1875 }
1876
1877 // TODO: Can we merge this fold with the one below?
1878 // Try shrinking the operation as long as the shift amount will still be
1879 // in range.
1880 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1881 Op.getNode()->hasOneUse()) {
1882 // Search for the smallest integer type with free casts to and from
1883 // Op's type. For expedience, just check power-of-2 integer types.
1884 unsigned DemandedSize = DemandedBits.getActiveBits();
1885 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1886 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1887 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1888 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1889 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1890 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1891 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1892 assert(DemandedSize <= SmallVTBits &&
1893 "Narrowed below demanded bits?");
1894 // We found a type with free casts.
1895 SDValue NarrowShl = TLO.DAG.getNode(
1896 ISD::SHL, dl, SmallVT,
1897 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1898 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1899 return TLO.CombineTo(
1900 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1901 }
1902 }
1903 }
1904
1905 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1906 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1907 // Only do this if we demand the upper half so the knownbits are correct.
1908 unsigned HalfWidth = BitWidth / 2;
1909 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1910 DemandedBits.countLeadingOnes() >= HalfWidth) {
1911 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1912 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1913 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1914 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1915 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1916 // If we're demanding the upper bits at all, we must ensure
1917 // that the upper bits of the shift result are known to be zero,
1918 // which is equivalent to the narrow shift being NUW.
1919 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1920 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1921 SDNodeFlags Flags;
1922 Flags.setNoSignedWrap(IsNSW);
1923 Flags.setNoUnsignedWrap(IsNUW);
1924 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1925 SDValue NewShiftAmt =
1926 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1927 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1928 NewShiftAmt, Flags);
1929 SDValue NewExt =
1930 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1931 return TLO.CombineTo(Op, NewExt);
1932 }
1933 }
1934 }
1935 } else {
1936 // This is a variable shift, so we can't shift the demand mask by a known
1937 // amount. But if we are not demanding high bits, then we are not
1938 // demanding those bits from the pre-shifted operand either.
1939 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1940 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1941 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1942 Depth + 1)) {
1943 // Disable the nsw and nuw flags. We can no longer guarantee that we
1944 // won't wrap after simplification.
1945 Op->dropFlags(SDNodeFlags::NoWrap);
1946 return true;
1947 }
1948 Known.resetAll();
1949 }
1950 }
1951
1952 // If we are only demanding sign bits then we can use the shift source
1953 // directly.
1954 if (std::optional<unsigned> MaxSA =
1955 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1956 unsigned ShAmt = *MaxSA;
1957 unsigned NumSignBits =
1958 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1959 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1960 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1961 return TLO.CombineTo(Op, Op0);
1962 }
1963 break;
1964 }
1965 case ISD::SRL: {
1966 SDValue Op0 = Op.getOperand(0);
1967 SDValue Op1 = Op.getOperand(1);
1968 EVT ShiftVT = Op1.getValueType();
1969
1970 if (std::optional<unsigned> KnownSA =
1971 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1972 unsigned ShAmt = *KnownSA;
1973 if (ShAmt == 0)
1974 return TLO.CombineTo(Op, Op0);
1975
1976 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1977 // single shift. We can do this if the top bits (which are shifted out)
1978 // are never demanded.
1979 // TODO - support non-uniform vector amounts.
1980 if (Op0.getOpcode() == ISD::SHL) {
1981 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1982 if (std::optional<unsigned> InnerSA =
1983 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1984 unsigned C1 = *InnerSA;
1985 unsigned Opc = ISD::SRL;
1986 int Diff = ShAmt - C1;
1987 if (Diff < 0) {
1988 Diff = -Diff;
1989 Opc = ISD::SHL;
1990 }
1991 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1992 return TLO.CombineTo(
1993 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1994 }
1995 }
1996 }
1997
1998 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1999 // single sra. We can do this if the top bits are never demanded.
2000 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2001 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2002 if (std::optional<unsigned> InnerSA =
2003 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2004 unsigned C1 = *InnerSA;
2005 // Clamp the combined shift amount if it exceeds the bit width.
2006 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2007 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2008 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2009 Op0.getOperand(0), NewSA));
2010 }
2011 }
2012 }
2013
2014 APInt InDemandedMask = (DemandedBits << ShAmt);
2015
2016 // If the shift is exact, then it does demand the low bits (and knows that
2017 // they are zero).
2018 if (Op->getFlags().hasExact())
2019 InDemandedMask.setLowBits(ShAmt);
2020
2021 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2022 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2023 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2025 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2026 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2027 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2028 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2029 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2030 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2031 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2032 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2033 SDValue NewShiftAmt =
2034 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2035 SDValue NewShift =
2036 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2037 return TLO.CombineTo(
2038 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2039 }
2040 }
2041
2042 // Compute the new bits that are at the top now.
2043 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2044 Depth + 1))
2045 return true;
2046 Known >>= ShAmt;
2047 // High bits known zero.
2048 Known.Zero.setHighBits(ShAmt);
2049
2050 // Attempt to avoid multi-use ops if we don't need anything from them.
2051 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2053 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2054 if (DemandedOp0) {
2055 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2056 return TLO.CombineTo(Op, NewOp);
2057 }
2058 }
2059 } else {
2060 // Use generic knownbits computation as it has support for non-uniform
2061 // shift amounts.
2062 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2063 }
2064
2065 // If we are only demanding sign bits then we can use the shift source
2066 // directly.
2067 if (std::optional<unsigned> MaxSA =
2068 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2069 unsigned ShAmt = *MaxSA;
2070 // Must already be signbits in DemandedBits bounds, and can't demand any
2071 // shifted in zeroes.
2072 if (DemandedBits.countl_zero() >= ShAmt) {
2073 unsigned NumSignBits =
2074 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2075 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2076 return TLO.CombineTo(Op, Op0);
2077 }
2078 }
2079
2080 // Try to match AVG patterns (after shift simplification).
2081 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2082 DemandedElts, Depth + 1))
2083 return TLO.CombineTo(Op, AVG);
2084
2085 break;
2086 }
2087 case ISD::SRA: {
2088 SDValue Op0 = Op.getOperand(0);
2089 SDValue Op1 = Op.getOperand(1);
2090 EVT ShiftVT = Op1.getValueType();
2091
2092 // If we only want bits that already match the signbit then we don't need
2093 // to shift.
2094 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2095 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2096 NumHiDemandedBits)
2097 return TLO.CombineTo(Op, Op0);
2098
2099 // If this is an arithmetic shift right and only the low-bit is set, we can
2100 // always convert this into a logical shr, even if the shift amount is
2101 // variable. The low bit of the shift cannot be an input sign bit unless
2102 // the shift amount is >= the size of the datatype, which is undefined.
2103 if (DemandedBits.isOne())
2104 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2105
2106 if (std::optional<unsigned> KnownSA =
2107 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2108 unsigned ShAmt = *KnownSA;
2109 if (ShAmt == 0)
2110 return TLO.CombineTo(Op, Op0);
2111
2112 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2113 // supports sext_inreg.
2114 if (Op0.getOpcode() == ISD::SHL) {
2115 if (std::optional<unsigned> InnerSA =
2116 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2117 unsigned LowBits = BitWidth - ShAmt;
2118 EVT ExtVT = VT.changeElementType(
2119 *TLO.DAG.getContext(),
2120 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2121
2122 if (*InnerSA == ShAmt) {
2123 if (!TLO.LegalOperations() ||
2125 return TLO.CombineTo(
2126 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2127 Op0.getOperand(0),
2128 TLO.DAG.getValueType(ExtVT)));
2129
2130 // Even if we can't convert to sext_inreg, we might be able to
2131 // remove this shift pair if the input is already sign extended.
2132 unsigned NumSignBits =
2133 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2134 if (NumSignBits > ShAmt)
2135 return TLO.CombineTo(Op, Op0.getOperand(0));
2136 }
2137 }
2138 }
2139
2140 APInt InDemandedMask = (DemandedBits << ShAmt);
2141
2142 // If the shift is exact, then it does demand the low bits (and knows that
2143 // they are zero).
2144 if (Op->getFlags().hasExact())
2145 InDemandedMask.setLowBits(ShAmt);
2146
2147 // If any of the demanded bits are produced by the sign extension, we also
2148 // demand the input sign bit.
2149 if (DemandedBits.countl_zero() < ShAmt)
2150 InDemandedMask.setSignBit();
2151
2152 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2153 Depth + 1))
2154 return true;
2155 Known >>= ShAmt;
2156
2157 // If the input sign bit is known to be zero, or if none of the top bits
2158 // are demanded, turn this into an unsigned shift right.
2159 if (Known.Zero[BitWidth - ShAmt - 1] ||
2160 DemandedBits.countl_zero() >= ShAmt) {
2161 SDNodeFlags Flags;
2162 Flags.setExact(Op->getFlags().hasExact());
2163 return TLO.CombineTo(
2164 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2165 }
2166
2167 int Log2 = DemandedBits.exactLogBase2();
2168 if (Log2 >= 0) {
2169 // The bit must come from the sign.
2170 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2171 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2172 }
2173
2174 if (Known.One[BitWidth - ShAmt - 1])
2175 // New bits are known one.
2176 Known.One.setHighBits(ShAmt);
2177
2178 // Attempt to avoid multi-use ops if we don't need anything from them.
2179 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2181 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2182 if (DemandedOp0) {
2183 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2184 return TLO.CombineTo(Op, NewOp);
2185 }
2186 }
2187 }
2188
2189 // Try to match AVG patterns (after shift simplification).
2190 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2191 DemandedElts, Depth + 1))
2192 return TLO.CombineTo(Op, AVG);
2193
2194 break;
2195 }
2196 case ISD::FSHL:
2197 case ISD::FSHR: {
2198 SDValue Op0 = Op.getOperand(0);
2199 SDValue Op1 = Op.getOperand(1);
2200 SDValue Op2 = Op.getOperand(2);
2201 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2202
2203 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2204 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2205
2206 // For fshl, 0-shift returns the 1st arg.
2207 // For fshr, 0-shift returns the 2nd arg.
2208 if (Amt == 0) {
2209 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2210 Known, TLO, Depth + 1))
2211 return true;
2212 break;
2213 }
2214
2215 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2216 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2217 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2218 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2219 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2220 Depth + 1))
2221 return true;
2222 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2223 Depth + 1))
2224 return true;
2225
2226 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2227 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2228 Known = Known.unionWith(Known2);
2229
2230 // Attempt to avoid multi-use ops if we don't need anything from them.
2231 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2232 !DemandedElts.isAllOnes()) {
2234 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2236 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2237 if (DemandedOp0 || DemandedOp1) {
2238 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2239 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2240 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2241 DemandedOp1, Op2);
2242 return TLO.CombineTo(Op, NewOp);
2243 }
2244 }
2245 }
2246
2247 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2248 if (isPowerOf2_32(BitWidth)) {
2249 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2250 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2251 Known2, TLO, Depth + 1))
2252 return true;
2253 }
2254 break;
2255 }
2256 case ISD::ROTL:
2257 case ISD::ROTR: {
2258 SDValue Op0 = Op.getOperand(0);
2259 SDValue Op1 = Op.getOperand(1);
2260 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2261
2262 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2263 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2264 return TLO.CombineTo(Op, Op0);
2265
2266 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2267 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2268 unsigned RevAmt = BitWidth - Amt;
2269
2270 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2271 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2272 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2273 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2274 Depth + 1))
2275 return true;
2276
2277 // rot*(x, 0) --> x
2278 if (Amt == 0)
2279 return TLO.CombineTo(Op, Op0);
2280
2281 // See if we don't demand either half of the rotated bits.
2282 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2283 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2284 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2285 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2286 }
2287 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2288 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2289 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2290 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2291 }
2292 }
2293
2294 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2295 if (isPowerOf2_32(BitWidth)) {
2296 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2297 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2298 Depth + 1))
2299 return true;
2300 }
2301 break;
2302 }
2303 case ISD::SMIN:
2304 case ISD::SMAX:
2305 case ISD::UMIN:
2306 case ISD::UMAX: {
2307 unsigned Opc = Op.getOpcode();
2308 SDValue Op0 = Op.getOperand(0);
2309 SDValue Op1 = Op.getOperand(1);
2310
2311 // If we're only demanding signbits, then we can simplify to OR/AND node.
2312 unsigned BitOp =
2313 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2314 unsigned NumSignBits =
2315 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2316 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2317 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2318 if (NumSignBits >= NumDemandedUpperBits)
2319 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2320
2321 // Check if one arg is always less/greater than (or equal) to the other arg.
2322 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2323 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2324 switch (Opc) {
2325 case ISD::SMIN:
2326 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2327 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2328 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2329 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2330 Known = KnownBits::smin(Known0, Known1);
2331 break;
2332 case ISD::SMAX:
2333 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2334 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2335 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2336 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2337 Known = KnownBits::smax(Known0, Known1);
2338 break;
2339 case ISD::UMIN:
2340 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2341 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2342 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2343 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2344 Known = KnownBits::umin(Known0, Known1);
2345 break;
2346 case ISD::UMAX:
2347 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2348 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2349 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2350 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2351 Known = KnownBits::umax(Known0, Known1);
2352 break;
2353 }
2354 break;
2355 }
2356 case ISD::BITREVERSE: {
2357 SDValue Src = Op.getOperand(0);
2358 APInt DemandedSrcBits = DemandedBits.reverseBits();
2359 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2360 Depth + 1))
2361 return true;
2362 Known = Known2.reverseBits();
2363 break;
2364 }
2365 case ISD::BSWAP: {
2366 SDValue Src = Op.getOperand(0);
2367
2368 // If the only bits demanded come from one byte of the bswap result,
2369 // just shift the input byte into position to eliminate the bswap.
2370 unsigned NLZ = DemandedBits.countl_zero();
2371 unsigned NTZ = DemandedBits.countr_zero();
2372
2373 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2374 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2375 // have 14 leading zeros, round to 8.
2376 NLZ = alignDown(NLZ, 8);
2377 NTZ = alignDown(NTZ, 8);
2378 // If we need exactly one byte, we can do this transformation.
2379 if (BitWidth - NLZ - NTZ == 8) {
2380 // Replace this with either a left or right shift to get the byte into
2381 // the right place.
2382 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2383 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2384 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2385 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2386 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2387 return TLO.CombineTo(Op, NewOp);
2388 }
2389 }
2390
2391 APInt DemandedSrcBits = DemandedBits.byteSwap();
2392 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2393 Depth + 1))
2394 return true;
2395 Known = Known2.byteSwap();
2396 break;
2397 }
2398 case ISD::CTPOP: {
2399 // If only 1 bit is demanded, replace with PARITY as long as we're before
2400 // op legalization.
2401 // FIXME: Limit to scalars for now.
2402 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2403 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2404 Op.getOperand(0)));
2405
2406 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2407 break;
2408 }
2410 SDValue Op0 = Op.getOperand(0);
2411 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2412 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2413
2414 // If we only care about the highest bit, don't bother shifting right.
2415 if (DemandedBits.isSignMask()) {
2416 unsigned MinSignedBits =
2417 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2418 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2419 // However if the input is already sign extended we expect the sign
2420 // extension to be dropped altogether later and do not simplify.
2421 if (!AlreadySignExtended) {
2422 // Compute the correct shift amount type, which must be getShiftAmountTy
2423 // for scalar types after legalization.
2424 SDValue ShiftAmt =
2425 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2426 return TLO.CombineTo(Op,
2427 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2428 }
2429 }
2430
2431 // If none of the extended bits are demanded, eliminate the sextinreg.
2432 if (DemandedBits.getActiveBits() <= ExVTBits)
2433 return TLO.CombineTo(Op, Op0);
2434
2435 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2436
2437 // Since the sign extended bits are demanded, we know that the sign
2438 // bit is demanded.
2439 InputDemandedBits.setBit(ExVTBits - 1);
2440
2441 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2442 Depth + 1))
2443 return true;
2444
2445 // If the sign bit of the input is known set or clear, then we know the
2446 // top bits of the result.
2447
2448 // If the input sign bit is known zero, convert this into a zero extension.
2449 if (Known.Zero[ExVTBits - 1])
2450 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2451
2452 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2453 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2454 Known.One.setBitsFrom(ExVTBits);
2455 Known.Zero &= Mask;
2456 } else { // Input sign bit unknown
2457 Known.Zero &= Mask;
2458 Known.One &= Mask;
2459 }
2460 break;
2461 }
2462 case ISD::BUILD_PAIR: {
2463 EVT HalfVT = Op.getOperand(0).getValueType();
2464 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2465
2466 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2467 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2468
2469 KnownBits KnownLo, KnownHi;
2470
2471 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2472 return true;
2473
2474 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2475 return true;
2476
2477 Known = KnownHi.concat(KnownLo);
2478 break;
2479 }
2481 if (VT.isScalableVector())
2482 return false;
2483 [[fallthrough]];
2484 case ISD::ZERO_EXTEND: {
2485 SDValue Src = Op.getOperand(0);
2486 EVT SrcVT = Src.getValueType();
2487 unsigned InBits = SrcVT.getScalarSizeInBits();
2488 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2489 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2490
2491 // If none of the top bits are demanded, convert this into an any_extend.
2492 if (DemandedBits.getActiveBits() <= InBits) {
2493 // If we only need the non-extended bits of the bottom element
2494 // then we can just bitcast to the result.
2495 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2496 VT.getSizeInBits() == SrcVT.getSizeInBits())
2497 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2498
2499 unsigned Opc =
2501 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2502 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2503 }
2504
2505 APInt InDemandedBits = DemandedBits.trunc(InBits);
2506 APInt InDemandedElts = DemandedElts.zext(InElts);
2507 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2508 Depth + 1)) {
2509 Op->dropFlags(SDNodeFlags::NonNeg);
2510 return true;
2511 }
2512 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2513 Known = Known.zext(BitWidth);
2514
2515 // Attempt to avoid multi-use ops if we don't need anything from them.
2517 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2518 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2519 break;
2520 }
2522 if (VT.isScalableVector())
2523 return false;
2524 [[fallthrough]];
2525 case ISD::SIGN_EXTEND: {
2526 SDValue Src = Op.getOperand(0);
2527 EVT SrcVT = Src.getValueType();
2528 unsigned InBits = SrcVT.getScalarSizeInBits();
2529 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2530 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2531
2532 APInt InDemandedElts = DemandedElts.zext(InElts);
2533 APInt InDemandedBits = DemandedBits.trunc(InBits);
2534
2535 // Since some of the sign extended bits are demanded, we know that the sign
2536 // bit is demanded.
2537 InDemandedBits.setBit(InBits - 1);
2538
2539 // If none of the top bits are demanded, convert this into an any_extend.
2540 if (DemandedBits.getActiveBits() <= InBits) {
2541 // If we only need the non-extended bits of the bottom element
2542 // then we can just bitcast to the result.
2543 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2544 VT.getSizeInBits() == SrcVT.getSizeInBits())
2545 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2546
2547 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2549 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2550 InBits) {
2551 unsigned Opc =
2553 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2554 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2555 }
2556 }
2557
2558 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2559 Depth + 1))
2560 return true;
2561 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2562
2563 // If the sign bit is known one, the top bits match.
2564 Known = Known.sext(BitWidth);
2565
2566 // If the sign bit is known zero, convert this to a zero extend.
2567 if (Known.isNonNegative()) {
2568 unsigned Opc =
2570 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2571 SDNodeFlags Flags;
2572 if (!IsVecInReg)
2573 Flags |= SDNodeFlags::NonNeg;
2574 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2575 }
2576 }
2577
2578 // Attempt to avoid multi-use ops if we don't need anything from them.
2580 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2581 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2582 break;
2583 }
2585 if (VT.isScalableVector())
2586 return false;
2587 [[fallthrough]];
2588 case ISD::ANY_EXTEND: {
2589 SDValue Src = Op.getOperand(0);
2590 EVT SrcVT = Src.getValueType();
2591 unsigned InBits = SrcVT.getScalarSizeInBits();
2592 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2593 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2594
2595 // If we only need the bottom element then we can just bitcast.
2596 // TODO: Handle ANY_EXTEND?
2597 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2598 VT.getSizeInBits() == SrcVT.getSizeInBits())
2599 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2600
2601 APInt InDemandedBits = DemandedBits.trunc(InBits);
2602 APInt InDemandedElts = DemandedElts.zext(InElts);
2603 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2604 Depth + 1))
2605 return true;
2606 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2607 Known = Known.anyext(BitWidth);
2608
2609 // Attempt to avoid multi-use ops if we don't need anything from them.
2611 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2612 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2613 break;
2614 }
2615 case ISD::TRUNCATE: {
2616 SDValue Src = Op.getOperand(0);
2617
2618 // Simplify the input, using demanded bit information, and compute the known
2619 // zero/one bits live out.
2620 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2621 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2622 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2623 Depth + 1)) {
2624 // Disable the nsw and nuw flags. We can no longer guarantee that we
2625 // won't wrap after simplification.
2626 Op->dropFlags(SDNodeFlags::NoWrap);
2627 return true;
2628 }
2629 Known = Known.trunc(BitWidth);
2630
2631 // Attempt to avoid multi-use ops if we don't need anything from them.
2633 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2634 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2635
2636 // If the input is only used by this truncate, see if we can shrink it based
2637 // on the known demanded bits.
2638 switch (Src.getOpcode()) {
2639 default:
2640 break;
2641 case ISD::SRL:
2642 // Shrink SRL by a constant if none of the high bits shifted in are
2643 // demanded.
2644 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2645 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2646 // undesirable.
2647 break;
2648
2649 if (Src.getNode()->hasOneUse()) {
2650 if (isTruncateFree(Src, VT) &&
2651 !isTruncateFree(Src.getValueType(), VT)) {
2652 // If truncate is only free at trunc(srl), do not turn it into
2653 // srl(trunc). The check is done by first check the truncate is free
2654 // at Src's opcode(srl), then check the truncate is not done by
2655 // referencing sub-register. In test, if both trunc(srl) and
2656 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2657 // trunc(srl)'s trunc is free, trunc(srl) is better.
2658 break;
2659 }
2660
2661 std::optional<unsigned> ShAmtC =
2662 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2663 if (!ShAmtC || *ShAmtC >= BitWidth)
2664 break;
2665 unsigned ShVal = *ShAmtC;
2666
2667 APInt HighBits =
2668 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2669 HighBits.lshrInPlace(ShVal);
2670 HighBits = HighBits.trunc(BitWidth);
2671 if (!(HighBits & DemandedBits)) {
2672 // None of the shifted in bits are needed. Add a truncate of the
2673 // shift input, then shift it.
2674 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2675 SDValue NewTrunc =
2676 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2677 return TLO.CombineTo(
2678 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2679 }
2680 }
2681 break;
2682 }
2683
2684 break;
2685 }
2686 case ISD::AssertZext: {
2687 // AssertZext demands all of the high bits, plus any of the low bits
2688 // demanded by its users.
2689 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2691 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2692 TLO, Depth + 1))
2693 return true;
2694
2695 Known.Zero |= ~InMask;
2696 Known.One &= (~Known.Zero);
2697 break;
2698 }
2700 SDValue Src = Op.getOperand(0);
2701 SDValue Idx = Op.getOperand(1);
2702 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2703 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2704
2705 if (SrcEltCnt.isScalable())
2706 return false;
2707
2708 // Demand the bits from every vector element without a constant index.
2709 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2710 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2711 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2712 if (CIdx->getAPIntValue().ult(NumSrcElts))
2713 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2714
2715 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2716 // anything about the extended bits.
2717 APInt DemandedSrcBits = DemandedBits;
2718 if (BitWidth > EltBitWidth)
2719 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2720
2721 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2722 Depth + 1))
2723 return true;
2724
2725 // Attempt to avoid multi-use ops if we don't need anything from them.
2726 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2727 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2728 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2729 SDValue NewOp =
2730 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2731 return TLO.CombineTo(Op, NewOp);
2732 }
2733 }
2734
2735 Known = Known2;
2736 if (BitWidth > EltBitWidth)
2737 Known = Known.anyext(BitWidth);
2738 break;
2739 }
2740 case ISD::BITCAST: {
2741 if (VT.isScalableVector())
2742 return false;
2743 SDValue Src = Op.getOperand(0);
2744 EVT SrcVT = Src.getValueType();
2745 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2746
2747 // If this is an FP->Int bitcast and if the sign bit is the only
2748 // thing demanded, turn this into a FGETSIGN.
2749 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2750 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2751 SrcVT.isFloatingPoint()) {
2752 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2753 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2754 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2755 SrcVT != MVT::f128) {
2756 // Cannot eliminate/lower SHL for f128 yet.
2757 EVT Ty = OpVTLegal ? VT : MVT::i32;
2758 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2759 // place. We expect the SHL to be eliminated by other optimizations.
2760 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2761 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2762 if (!OpVTLegal && OpVTSizeInBits > 32)
2763 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2764 unsigned ShVal = Op.getValueSizeInBits() - 1;
2765 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2766 return TLO.CombineTo(Op,
2767 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2768 }
2769 }
2770
2771 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2772 // Demand the elt/bit if any of the original elts/bits are demanded.
2773 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2774 unsigned Scale = BitWidth / NumSrcEltBits;
2775 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2776 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2777 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2778 for (unsigned i = 0; i != Scale; ++i) {
2779 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2780 unsigned BitOffset = EltOffset * NumSrcEltBits;
2781 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2782 if (!Sub.isZero()) {
2783 DemandedSrcBits |= Sub;
2784 for (unsigned j = 0; j != NumElts; ++j)
2785 if (DemandedElts[j])
2786 DemandedSrcElts.setBit((j * Scale) + i);
2787 }
2788 }
2789
2790 APInt KnownSrcUndef, KnownSrcZero;
2791 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2792 KnownSrcZero, TLO, Depth + 1))
2793 return true;
2794
2795 KnownBits KnownSrcBits;
2796 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2797 KnownSrcBits, TLO, Depth + 1))
2798 return true;
2799 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2800 // TODO - bigendian once we have test coverage.
2801 unsigned Scale = NumSrcEltBits / BitWidth;
2802 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2803 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2804 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2805 for (unsigned i = 0; i != NumElts; ++i)
2806 if (DemandedElts[i]) {
2807 unsigned Offset = (i % Scale) * BitWidth;
2808 DemandedSrcBits.insertBits(DemandedBits, Offset);
2809 DemandedSrcElts.setBit(i / Scale);
2810 }
2811
2812 if (SrcVT.isVector()) {
2813 APInt KnownSrcUndef, KnownSrcZero;
2814 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2815 KnownSrcZero, TLO, Depth + 1))
2816 return true;
2817 }
2818
2819 KnownBits KnownSrcBits;
2820 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2821 KnownSrcBits, TLO, Depth + 1))
2822 return true;
2823
2824 // Attempt to avoid multi-use ops if we don't need anything from them.
2825 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2826 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2827 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2828 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2829 return TLO.CombineTo(Op, NewOp);
2830 }
2831 }
2832 }
2833
2834 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2835 // recursive call where Known may be useful to the caller.
2836 if (Depth > 0) {
2837 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2838 return false;
2839 }
2840 break;
2841 }
2842 case ISD::MUL:
2843 if (DemandedBits.isPowerOf2()) {
2844 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2845 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2846 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2847 unsigned CTZ = DemandedBits.countr_zero();
2848 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2849 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2850 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2851 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2852 return TLO.CombineTo(Op, Shl);
2853 }
2854 }
2855 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2856 // X * X is odd iff X is odd.
2857 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2858 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2859 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2860 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2861 return TLO.CombineTo(Op, And1);
2862 }
2863 [[fallthrough]];
2864 case ISD::PTRADD:
2865 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2866 break;
2867 // PTRADD behaves like ADD if pointers are represented as integers.
2868 [[fallthrough]];
2869 case ISD::ADD:
2870 case ISD::SUB: {
2871 // Add, Sub, and Mul don't demand any bits in positions beyond that
2872 // of the highest bit demanded of them.
2873 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2874 SDNodeFlags Flags = Op.getNode()->getFlags();
2875 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2876 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2877 KnownBits KnownOp0, KnownOp1;
2878 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2879 const KnownBits &KnownRHS) {
2880 if (Op.getOpcode() == ISD::MUL)
2881 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2882 return Demanded;
2883 };
2884 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2885 Depth + 1) ||
2886 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2887 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2888 // See if the operation should be performed at a smaller bit width.
2890 // Disable the nsw and nuw flags. We can no longer guarantee that we
2891 // won't wrap after simplification.
2892 Op->dropFlags(SDNodeFlags::NoWrap);
2893 return true;
2894 }
2895
2896 // neg x with only low bit demanded is simply x.
2897 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2898 isNullConstant(Op0))
2899 return TLO.CombineTo(Op, Op1);
2900
2901 // Attempt to avoid multi-use ops if we don't need anything from them.
2902 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2904 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2906 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2907 if (DemandedOp0 || DemandedOp1) {
2908 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2909 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2910 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2911 Flags & ~SDNodeFlags::NoWrap);
2912 return TLO.CombineTo(Op, NewOp);
2913 }
2914 }
2915
2916 // If we have a constant operand, we may be able to turn it into -1 if we
2917 // do not demand the high bits. This can make the constant smaller to
2918 // encode, allow more general folding, or match specialized instruction
2919 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2920 // is probably not useful (and could be detrimental).
2922 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2923 if (C && !C->isAllOnes() && !C->isOne() &&
2924 (C->getAPIntValue() | HighMask).isAllOnes()) {
2925 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2926 // Disable the nsw and nuw flags. We can no longer guarantee that we
2927 // won't wrap after simplification.
2928 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2929 Flags & ~SDNodeFlags::NoWrap);
2930 return TLO.CombineTo(Op, NewOp);
2931 }
2932
2933 // Match a multiply with a disguised negated-power-of-2 and convert to a
2934 // an equivalent shift-left amount.
2935 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2936 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2937 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2938 return 0;
2939
2940 // Don't touch opaque constants. Also, ignore zero and power-of-2
2941 // multiplies. Those will get folded later.
2942 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2943 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2944 !MulC->getAPIntValue().isPowerOf2()) {
2945 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2946 if (UnmaskedC.isNegatedPowerOf2())
2947 return (-UnmaskedC).logBase2();
2948 }
2949 return 0;
2950 };
2951
2952 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2953 unsigned ShlAmt) {
2954 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2955 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2956 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2957 return TLO.CombineTo(Op, Res);
2958 };
2959
2961 if (Op.getOpcode() == ISD::ADD) {
2962 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2963 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2964 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2965 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2966 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2967 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2968 }
2969 if (Op.getOpcode() == ISD::SUB) {
2970 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2971 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2972 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2973 }
2974 }
2975
2976 if (Op.getOpcode() == ISD::MUL) {
2977 Known = KnownBits::mul(KnownOp0, KnownOp1);
2978 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
2980 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
2981 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2982 }
2983 break;
2984 }
2985 case ISD::FABS: {
2986 SDValue Op0 = Op.getOperand(0);
2987 APInt SignMask = APInt::getSignMask(BitWidth);
2988
2989 if (!DemandedBits.intersects(SignMask))
2990 return TLO.CombineTo(Op, Op0);
2991
2992 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2993 Depth + 1))
2994 return true;
2995
2996 if (Known.isNonNegative())
2997 return TLO.CombineTo(Op, Op0);
2998 if (Known.isNegative())
2999 return TLO.CombineTo(
3000 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3001
3002 Known.Zero |= SignMask;
3003 Known.One &= ~SignMask;
3004
3005 break;
3006 }
3007 case ISD::FCOPYSIGN: {
3008 SDValue Op0 = Op.getOperand(0);
3009 SDValue Op1 = Op.getOperand(1);
3010
3011 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3012 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3013 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3014 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3015
3016 if (!DemandedBits.intersects(SignMask0))
3017 return TLO.CombineTo(Op, Op0);
3018
3019 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3020 Known, TLO, Depth + 1) ||
3021 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3022 Depth + 1))
3023 return true;
3024
3025 if (Known2.isNonNegative())
3026 return TLO.CombineTo(
3027 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3028
3029 if (Known2.isNegative())
3030 return TLO.CombineTo(
3031 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3032 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3033
3034 Known.Zero &= ~SignMask0;
3035 Known.One &= ~SignMask0;
3036 break;
3037 }
3038 case ISD::FNEG: {
3039 SDValue Op0 = Op.getOperand(0);
3040 APInt SignMask = APInt::getSignMask(BitWidth);
3041
3042 if (!DemandedBits.intersects(SignMask))
3043 return TLO.CombineTo(Op, Op0);
3044
3045 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3046 Depth + 1))
3047 return true;
3048
3049 if (!Known.isSignUnknown()) {
3050 Known.Zero ^= SignMask;
3051 Known.One ^= SignMask;
3052 }
3053
3054 break;
3055 }
3056 default:
3057 // We also ask the target about intrinsics (which could be specific to it).
3058 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3059 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3060 // TODO: Probably okay to remove after audit; here to reduce change size
3061 // in initial enablement patch for scalable vectors
3062 if (Op.getValueType().isScalableVector())
3063 break;
3065 Known, TLO, Depth))
3066 return true;
3067 break;
3068 }
3069
3070 // Just use computeKnownBits to compute output bits.
3071 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3072 break;
3073 }
3074
3075 // If we know the value of all of the demanded bits, return this as a
3076 // constant.
3078 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3079 // Avoid folding to a constant if any OpaqueConstant is involved.
3080 if (llvm::any_of(Op->ops(), [](SDValue V) {
3081 auto *C = dyn_cast<ConstantSDNode>(V);
3082 return C && C->isOpaque();
3083 }))
3084 return false;
3085 if (VT.isInteger())
3086 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3087 if (VT.isFloatingPoint())
3088 return TLO.CombineTo(
3089 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3090 dl, VT));
3091 }
3092
3093 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3094 // Try again just for the original demanded elts.
3095 // Ensure we do this AFTER constant folding above.
3096 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3097 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3098
3099 return false;
3100}
3101
3103 const APInt &DemandedElts,
3104 DAGCombinerInfo &DCI) const {
3105 SelectionDAG &DAG = DCI.DAG;
3106 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3107 !DCI.isBeforeLegalizeOps());
3108
3109 APInt KnownUndef, KnownZero;
3110 bool Simplified =
3111 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3112 if (Simplified) {
3113 DCI.AddToWorklist(Op.getNode());
3114 DCI.CommitTargetLoweringOpt(TLO);
3115 }
3116
3117 return Simplified;
3118}
3119
3120/// Given a vector binary operation and known undefined elements for each input
3121/// operand, compute whether each element of the output is undefined.
3123 const APInt &UndefOp0,
3124 const APInt &UndefOp1) {
3125 EVT VT = BO.getValueType();
3127 "Vector binop only");
3128
3129 EVT EltVT = VT.getVectorElementType();
3130 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3131 assert(UndefOp0.getBitWidth() == NumElts &&
3132 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3133
3134 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3135 const APInt &UndefVals) {
3136 if (UndefVals[Index])
3137 return DAG.getUNDEF(EltVT);
3138
3139 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3140 // Try hard to make sure that the getNode() call is not creating temporary
3141 // nodes. Ignore opaque integers because they do not constant fold.
3142 SDValue Elt = BV->getOperand(Index);
3143 auto *C = dyn_cast<ConstantSDNode>(Elt);
3144 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3145 return Elt;
3146 }
3147
3148 return SDValue();
3149 };
3150
3151 APInt KnownUndef = APInt::getZero(NumElts);
3152 for (unsigned i = 0; i != NumElts; ++i) {
3153 // If both inputs for this element are either constant or undef and match
3154 // the element type, compute the constant/undef result for this element of
3155 // the vector.
3156 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3157 // not handle FP constants. The code within getNode() should be refactored
3158 // to avoid the danger of creating a bogus temporary node here.
3159 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3160 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3161 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3162 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3163 KnownUndef.setBit(i);
3164 }
3165 return KnownUndef;
3166}
3167
3169 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3170 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3171 bool AssumeSingleUse) const {
3172 EVT VT = Op.getValueType();
3173 unsigned Opcode = Op.getOpcode();
3174 APInt DemandedElts = OriginalDemandedElts;
3175 unsigned NumElts = DemandedElts.getBitWidth();
3176 assert(VT.isVector() && "Expected vector op");
3177
3178 KnownUndef = KnownZero = APInt::getZero(NumElts);
3179
3181 return false;
3182
3183 // TODO: For now we assume we know nothing about scalable vectors.
3184 if (VT.isScalableVector())
3185 return false;
3186
3187 assert(VT.getVectorNumElements() == NumElts &&
3188 "Mask size mismatches value type element count!");
3189
3190 // Undef operand.
3191 if (Op.isUndef()) {
3192 KnownUndef.setAllBits();
3193 return false;
3194 }
3195
3196 // If Op has other users, assume that all elements are needed.
3197 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3198 DemandedElts.setAllBits();
3199
3200 // Not demanding any elements from Op.
3201 if (DemandedElts == 0) {
3202 KnownUndef.setAllBits();
3203 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3204 }
3205
3206 // Limit search depth.
3208 return false;
3209
3210 SDLoc DL(Op);
3211 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3212 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3213
3214 // Helper for demanding the specified elements and all the bits of both binary
3215 // operands.
3216 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3217 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3218 TLO.DAG, Depth + 1);
3219 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3220 TLO.DAG, Depth + 1);
3221 if (NewOp0 || NewOp1) {
3222 SDValue NewOp =
3223 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3224 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3225 return TLO.CombineTo(Op, NewOp);
3226 }
3227 return false;
3228 };
3229
3230 switch (Opcode) {
3231 case ISD::SCALAR_TO_VECTOR: {
3232 if (!DemandedElts[0]) {
3233 KnownUndef.setAllBits();
3234 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3235 }
3236 KnownUndef.setHighBits(NumElts - 1);
3237 break;
3238 }
3239 case ISD::BITCAST: {
3240 SDValue Src = Op.getOperand(0);
3241 EVT SrcVT = Src.getValueType();
3242
3243 if (!SrcVT.isVector()) {
3244 // TODO - bigendian once we have test coverage.
3245 if (IsLE) {
3246 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3247 unsigned EltSize = VT.getScalarSizeInBits();
3248 for (unsigned I = 0; I != NumElts; ++I) {
3249 if (DemandedElts[I]) {
3250 unsigned Offset = I * EltSize;
3251 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3252 }
3253 }
3254 KnownBits Known;
3255 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3256 return true;
3257 }
3258 break;
3259 }
3260
3261 // Fast handling of 'identity' bitcasts.
3262 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3263 if (NumSrcElts == NumElts)
3264 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3265 KnownZero, TLO, Depth + 1);
3266
3267 APInt SrcDemandedElts, SrcZero, SrcUndef;
3268
3269 // Bitcast from 'large element' src vector to 'small element' vector, we
3270 // must demand a source element if any DemandedElt maps to it.
3271 if ((NumElts % NumSrcElts) == 0) {
3272 unsigned Scale = NumElts / NumSrcElts;
3273 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3274 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3275 TLO, Depth + 1))
3276 return true;
3277
3278 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3279 // of the large element.
3280 // TODO - bigendian once we have test coverage.
3281 if (IsLE) {
3282 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3283 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3284 for (unsigned i = 0; i != NumElts; ++i)
3285 if (DemandedElts[i]) {
3286 unsigned Ofs = (i % Scale) * EltSizeInBits;
3287 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3288 }
3289
3290 KnownBits Known;
3291 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3292 TLO, Depth + 1))
3293 return true;
3294
3295 // The bitcast has split each wide element into a number of
3296 // narrow subelements. We have just computed the Known bits
3297 // for wide elements. See if element splitting results in
3298 // some subelements being zero. Only for demanded elements!
3299 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3300 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3301 .isAllOnes())
3302 continue;
3303 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3304 unsigned Elt = Scale * SrcElt + SubElt;
3305 if (DemandedElts[Elt])
3306 KnownZero.setBit(Elt);
3307 }
3308 }
3309 }
3310
3311 // If the src element is zero/undef then all the output elements will be -
3312 // only demanded elements are guaranteed to be correct.
3313 for (unsigned i = 0; i != NumSrcElts; ++i) {
3314 if (SrcDemandedElts[i]) {
3315 if (SrcZero[i])
3316 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3317 if (SrcUndef[i])
3318 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3319 }
3320 }
3321 }
3322
3323 // Bitcast from 'small element' src vector to 'large element' vector, we
3324 // demand all smaller source elements covered by the larger demanded element
3325 // of this vector.
3326 if ((NumSrcElts % NumElts) == 0) {
3327 unsigned Scale = NumSrcElts / NumElts;
3328 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3329 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3330 TLO, Depth + 1))
3331 return true;
3332
3333 // If all the src elements covering an output element are zero/undef, then
3334 // the output element will be as well, assuming it was demanded.
3335 for (unsigned i = 0; i != NumElts; ++i) {
3336 if (DemandedElts[i]) {
3337 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3338 KnownZero.setBit(i);
3339 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3340 KnownUndef.setBit(i);
3341 }
3342 }
3343 }
3344 break;
3345 }
3346 case ISD::FREEZE: {
3347 SDValue N0 = Op.getOperand(0);
3348 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3349 /*PoisonOnly=*/false,
3350 Depth + 1))
3351 return TLO.CombineTo(Op, N0);
3352
3353 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3354 // freeze(op(x, ...)) -> op(freeze(x), ...).
3355 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3356 return TLO.CombineTo(
3358 TLO.DAG.getFreeze(N0.getOperand(0))));
3359 break;
3360 }
3361 case ISD::BUILD_VECTOR: {
3362 // Check all elements and simplify any unused elements with UNDEF.
3363 if (!DemandedElts.isAllOnes()) {
3364 // Don't simplify BROADCASTS.
3365 if (llvm::any_of(Op->op_values(),
3366 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3368 bool Updated = false;
3369 for (unsigned i = 0; i != NumElts; ++i) {
3370 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3371 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3372 KnownUndef.setBit(i);
3373 Updated = true;
3374 }
3375 }
3376 if (Updated)
3377 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3378 }
3379 }
3380 for (unsigned i = 0; i != NumElts; ++i) {
3381 SDValue SrcOp = Op.getOperand(i);
3382 if (SrcOp.isUndef()) {
3383 KnownUndef.setBit(i);
3384 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3386 KnownZero.setBit(i);
3387 }
3388 }
3389 break;
3390 }
3391 case ISD::CONCAT_VECTORS: {
3392 EVT SubVT = Op.getOperand(0).getValueType();
3393 unsigned NumSubVecs = Op.getNumOperands();
3394 unsigned NumSubElts = SubVT.getVectorNumElements();
3395 for (unsigned i = 0; i != NumSubVecs; ++i) {
3396 SDValue SubOp = Op.getOperand(i);
3397 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3398 APInt SubUndef, SubZero;
3399 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3400 Depth + 1))
3401 return true;
3402 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3403 KnownZero.insertBits(SubZero, i * NumSubElts);
3404 }
3405
3406 // Attempt to avoid multi-use ops if we don't need anything from them.
3407 if (!DemandedElts.isAllOnes()) {
3408 bool FoundNewSub = false;
3409 SmallVector<SDValue, 2> DemandedSubOps;
3410 for (unsigned i = 0; i != NumSubVecs; ++i) {
3411 SDValue SubOp = Op.getOperand(i);
3412 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3414 SubOp, SubElts, TLO.DAG, Depth + 1);
3415 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3416 FoundNewSub = NewSubOp ? true : FoundNewSub;
3417 }
3418 if (FoundNewSub) {
3419 SDValue NewOp =
3420 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3421 return TLO.CombineTo(Op, NewOp);
3422 }
3423 }
3424 break;
3425 }
3426 case ISD::INSERT_SUBVECTOR: {
3427 // Demand any elements from the subvector and the remainder from the src it
3428 // is inserted into.
3429 SDValue Src = Op.getOperand(0);
3430 SDValue Sub = Op.getOperand(1);
3431 uint64_t Idx = Op.getConstantOperandVal(2);
3432 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3433 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3434 APInt DemandedSrcElts = DemandedElts;
3435 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3436
3437 // If none of the sub operand elements are demanded, bypass the insert.
3438 if (!DemandedSubElts)
3439 return TLO.CombineTo(Op, Src);
3440
3441 APInt SubUndef, SubZero;
3442 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3443 Depth + 1))
3444 return true;
3445
3446 // If none of the src operand elements are demanded, replace it with undef.
3447 if (!DemandedSrcElts && !Src.isUndef())
3448 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3449 TLO.DAG.getUNDEF(VT), Sub,
3450 Op.getOperand(2)));
3451
3452 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3453 TLO, Depth + 1))
3454 return true;
3455 KnownUndef.insertBits(SubUndef, Idx);
3456 KnownZero.insertBits(SubZero, Idx);
3457
3458 // Attempt to avoid multi-use ops if we don't need anything from them.
3459 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3461 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3463 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3464 if (NewSrc || NewSub) {
3465 NewSrc = NewSrc ? NewSrc : Src;
3466 NewSub = NewSub ? NewSub : Sub;
3467 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3468 NewSub, Op.getOperand(2));
3469 return TLO.CombineTo(Op, NewOp);
3470 }
3471 }
3472 break;
3473 }
3475 // Offset the demanded elts by the subvector index.
3476 SDValue Src = Op.getOperand(0);
3477 if (Src.getValueType().isScalableVector())
3478 break;
3479 uint64_t Idx = Op.getConstantOperandVal(1);
3480 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3481 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3482
3483 APInt SrcUndef, SrcZero;
3484 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3485 Depth + 1))
3486 return true;
3487 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3488 KnownZero = SrcZero.extractBits(NumElts, Idx);
3489
3490 // Attempt to avoid multi-use ops if we don't need anything from them.
3491 if (!DemandedElts.isAllOnes()) {
3493 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3494 if (NewSrc) {
3495 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3496 Op.getOperand(1));
3497 return TLO.CombineTo(Op, NewOp);
3498 }
3499 }
3500 break;
3501 }
3503 SDValue Vec = Op.getOperand(0);
3504 SDValue Scl = Op.getOperand(1);
3505 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3506
3507 // For a legal, constant insertion index, if we don't need this insertion
3508 // then strip it, else remove it from the demanded elts.
3509 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3510 unsigned Idx = CIdx->getZExtValue();
3511 if (!DemandedElts[Idx])
3512 return TLO.CombineTo(Op, Vec);
3513
3514 APInt DemandedVecElts(DemandedElts);
3515 DemandedVecElts.clearBit(Idx);
3516 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3517 KnownZero, TLO, Depth + 1))
3518 return true;
3519
3520 KnownUndef.setBitVal(Idx, Scl.isUndef());
3521
3522 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3523 break;
3524 }
3525
3526 APInt VecUndef, VecZero;
3527 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3528 Depth + 1))
3529 return true;
3530 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3531 break;
3532 }
3533 case ISD::VSELECT: {
3534 SDValue Sel = Op.getOperand(0);
3535 SDValue LHS = Op.getOperand(1);
3536 SDValue RHS = Op.getOperand(2);
3537
3538 // Try to transform the select condition based on the current demanded
3539 // elements.
3540 APInt UndefSel, ZeroSel;
3541 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3542 Depth + 1))
3543 return true;
3544
3545 // See if we can simplify either vselect operand.
3546 APInt DemandedLHS(DemandedElts);
3547 APInt DemandedRHS(DemandedElts);
3548 APInt UndefLHS, ZeroLHS;
3549 APInt UndefRHS, ZeroRHS;
3550 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3551 Depth + 1))
3552 return true;
3553 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3554 Depth + 1))
3555 return true;
3556
3557 KnownUndef = UndefLHS & UndefRHS;
3558 KnownZero = ZeroLHS & ZeroRHS;
3559
3560 // If we know that the selected element is always zero, we don't need the
3561 // select value element.
3562 APInt DemandedSel = DemandedElts & ~KnownZero;
3563 if (DemandedSel != DemandedElts)
3564 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3565 Depth + 1))
3566 return true;
3567
3568 break;
3569 }
3570 case ISD::VECTOR_SHUFFLE: {
3571 SDValue LHS = Op.getOperand(0);
3572 SDValue RHS = Op.getOperand(1);
3573 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3574
3575 // Collect demanded elements from shuffle operands..
3576 APInt DemandedLHS(NumElts, 0);
3577 APInt DemandedRHS(NumElts, 0);
3578 for (unsigned i = 0; i != NumElts; ++i) {
3579 int M = ShuffleMask[i];
3580 if (M < 0 || !DemandedElts[i])
3581 continue;
3582 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3583 if (M < (int)NumElts)
3584 DemandedLHS.setBit(M);
3585 else
3586 DemandedRHS.setBit(M - NumElts);
3587 }
3588
3589 // If either side isn't demanded, replace it by UNDEF. We handle this
3590 // explicitly here to also simplify in case of multiple uses (on the
3591 // contrary to the SimplifyDemandedVectorElts calls below).
3592 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3593 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3594 if (FoldLHS || FoldRHS) {
3595 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3596 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3597 SDValue NewOp =
3598 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3599 return TLO.CombineTo(Op, NewOp);
3600 }
3601
3602 // See if we can simplify either shuffle operand.
3603 APInt UndefLHS, ZeroLHS;
3604 APInt UndefRHS, ZeroRHS;
3605 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3606 Depth + 1))
3607 return true;
3608 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3609 Depth + 1))
3610 return true;
3611
3612 // Simplify mask using undef elements from LHS/RHS.
3613 bool Updated = false;
3614 bool IdentityLHS = true, IdentityRHS = true;
3615 SmallVector<int, 32> NewMask(ShuffleMask);
3616 for (unsigned i = 0; i != NumElts; ++i) {
3617 int &M = NewMask[i];
3618 if (M < 0)
3619 continue;
3620 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3621 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3622 Updated = true;
3623 M = -1;
3624 }
3625 IdentityLHS &= (M < 0) || (M == (int)i);
3626 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3627 }
3628
3629 // Update legal shuffle masks based on demanded elements if it won't reduce
3630 // to Identity which can cause premature removal of the shuffle mask.
3631 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3632 SDValue LegalShuffle =
3633 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3634 if (LegalShuffle)
3635 return TLO.CombineTo(Op, LegalShuffle);
3636 }
3637
3638 // Propagate undef/zero elements from LHS/RHS.
3639 for (unsigned i = 0; i != NumElts; ++i) {
3640 int M = ShuffleMask[i];
3641 if (M < 0) {
3642 KnownUndef.setBit(i);
3643 } else if (M < (int)NumElts) {
3644 if (UndefLHS[M])
3645 KnownUndef.setBit(i);
3646 if (ZeroLHS[M])
3647 KnownZero.setBit(i);
3648 } else {
3649 if (UndefRHS[M - NumElts])
3650 KnownUndef.setBit(i);
3651 if (ZeroRHS[M - NumElts])
3652 KnownZero.setBit(i);
3653 }
3654 }
3655 break;
3656 }
3660 APInt SrcUndef, SrcZero;
3661 SDValue Src = Op.getOperand(0);
3662 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3663 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3664 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3665 Depth + 1))
3666 return true;
3667 KnownZero = SrcZero.zextOrTrunc(NumElts);
3668 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3669
3670 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3671 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3672 DemandedSrcElts == 1) {
3673 // aext - if we just need the bottom element then we can bitcast.
3674 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3675 }
3676
3677 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3678 // zext(undef) upper bits are guaranteed to be zero.
3679 if (DemandedElts.isSubsetOf(KnownUndef))
3680 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3681 KnownUndef.clearAllBits();
3682
3683 // zext - if we just need the bottom element then we can mask:
3684 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3685 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3686 Op->isOnlyUserOf(Src.getNode()) &&
3687 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3688 SDLoc DL(Op);
3689 EVT SrcVT = Src.getValueType();
3690 EVT SrcSVT = SrcVT.getScalarType();
3691 SmallVector<SDValue> MaskElts;
3692 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3693 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3694 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3695 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3696 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3697 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3698 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3699 }
3700 }
3701 }
3702 break;
3703 }
3704
3705 // TODO: There are more binop opcodes that could be handled here - MIN,
3706 // MAX, saturated math, etc.
3707 case ISD::ADD: {
3708 SDValue Op0 = Op.getOperand(0);
3709 SDValue Op1 = Op.getOperand(1);
3710 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3711 APInt UndefLHS, ZeroLHS;
3712 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3713 Depth + 1, /*AssumeSingleUse*/ true))
3714 return true;
3715 }
3716 [[fallthrough]];
3717 }
3718 case ISD::AVGCEILS:
3719 case ISD::AVGCEILU:
3720 case ISD::AVGFLOORS:
3721 case ISD::AVGFLOORU:
3722 case ISD::OR:
3723 case ISD::XOR:
3724 case ISD::SUB:
3725 case ISD::FADD:
3726 case ISD::FSUB:
3727 case ISD::FMUL:
3728 case ISD::FDIV:
3729 case ISD::FREM: {
3730 SDValue Op0 = Op.getOperand(0);
3731 SDValue Op1 = Op.getOperand(1);
3732
3733 APInt UndefRHS, ZeroRHS;
3734 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3735 Depth + 1))
3736 return true;
3737 APInt UndefLHS, ZeroLHS;
3738 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3739 Depth + 1))
3740 return true;
3741
3742 KnownZero = ZeroLHS & ZeroRHS;
3743 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3744
3745 // Attempt to avoid multi-use ops if we don't need anything from them.
3746 // TODO - use KnownUndef to relax the demandedelts?
3747 if (!DemandedElts.isAllOnes())
3748 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3749 return true;
3750 break;
3751 }
3752 case ISD::SHL:
3753 case ISD::SRL:
3754 case ISD::SRA:
3755 case ISD::ROTL:
3756 case ISD::ROTR: {
3757 SDValue Op0 = Op.getOperand(0);
3758 SDValue Op1 = Op.getOperand(1);
3759
3760 APInt UndefRHS, ZeroRHS;
3761 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3762 Depth + 1))
3763 return true;
3764 APInt UndefLHS, ZeroLHS;
3765 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3766 Depth + 1))
3767 return true;
3768
3769 KnownZero = ZeroLHS;
3770 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3771
3772 // Attempt to avoid multi-use ops if we don't need anything from them.
3773 // TODO - use KnownUndef to relax the demandedelts?
3774 if (!DemandedElts.isAllOnes())
3775 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3776 return true;
3777 break;
3778 }
3779 case ISD::MUL:
3780 case ISD::MULHU:
3781 case ISD::MULHS:
3782 case ISD::AND: {
3783 SDValue Op0 = Op.getOperand(0);
3784 SDValue Op1 = Op.getOperand(1);
3785
3786 APInt SrcUndef, SrcZero;
3787 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3788 Depth + 1))
3789 return true;
3790 // If we know that a demanded element was zero in Op1 we don't need to
3791 // demand it in Op0 - its guaranteed to be zero.
3792 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3793 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3794 TLO, Depth + 1))
3795 return true;
3796
3797 KnownUndef &= DemandedElts0;
3798 KnownZero &= DemandedElts0;
3799
3800 // If every element pair has a zero/undef then just fold to zero.
3801 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3802 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3803 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3804 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3805
3806 // If either side has a zero element, then the result element is zero, even
3807 // if the other is an UNDEF.
3808 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3809 // and then handle 'and' nodes with the rest of the binop opcodes.
3810 KnownZero |= SrcZero;
3811 KnownUndef &= SrcUndef;
3812 KnownUndef &= ~KnownZero;
3813
3814 // Attempt to avoid multi-use ops if we don't need anything from them.
3815 if (!DemandedElts.isAllOnes())
3816 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3817 return true;
3818 break;
3819 }
3820 case ISD::TRUNCATE:
3821 case ISD::SIGN_EXTEND:
3822 case ISD::ZERO_EXTEND:
3823 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3824 KnownZero, TLO, Depth + 1))
3825 return true;
3826
3827 if (!DemandedElts.isAllOnes())
3829 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3830 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3831
3832 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3833 // zext(undef) upper bits are guaranteed to be zero.
3834 if (DemandedElts.isSubsetOf(KnownUndef))
3835 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3836 KnownUndef.clearAllBits();
3837 }
3838 break;
3839 case ISD::SINT_TO_FP:
3840 case ISD::UINT_TO_FP:
3841 case ISD::FP_TO_SINT:
3842 case ISD::FP_TO_UINT:
3843 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3844 KnownZero, TLO, Depth + 1))
3845 return true;
3846 // Don't fall through to generic undef -> undef handling.
3847 return false;
3848 default: {
3849 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3850 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3851 KnownZero, TLO, Depth))
3852 return true;
3853 } else {
3854 KnownBits Known;
3855 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3856 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3857 TLO, Depth, AssumeSingleUse))
3858 return true;
3859 }
3860 break;
3861 }
3862 }
3863 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3864
3865 // Constant fold all undef cases.
3866 // TODO: Handle zero cases as well.
3867 if (DemandedElts.isSubsetOf(KnownUndef))
3868 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3869
3870 return false;
3871}
3872
3873/// Determine which of the bits specified in Mask are known to be either zero or
3874/// one and return them in the Known.
3876 KnownBits &Known,
3877 const APInt &DemandedElts,
3878 const SelectionDAG &DAG,
3879 unsigned Depth) const {
3880 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3881 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3882 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3883 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3884 "Should use MaskedValueIsZero if you don't know whether Op"
3885 " is a target node!");
3886 Known.resetAll();
3887}
3888
3891 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3892 unsigned Depth) const {
3893 Known.resetAll();
3894}
3895
3898 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3899 unsigned Depth) const {
3900 Known.resetAll();
3901}
3902
3904 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3905 // The low bits are known zero if the pointer is aligned.
3906 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3907}
3908
3914
3915/// This method can be implemented by targets that want to expose additional
3916/// information about sign bits to the DAG Combiner.
3918 const APInt &,
3919 const SelectionDAG &,
3920 unsigned Depth) const {
3921 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3922 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3923 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3924 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3925 "Should use ComputeNumSignBits if you don't know whether Op"
3926 " is a target node!");
3927 return 1;
3928}
3929
3931 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3932 const MachineRegisterInfo &MRI, unsigned Depth) const {
3933 return 1;
3934}
3935
3937 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3938 TargetLoweringOpt &TLO, unsigned Depth) const {
3939 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3940 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3941 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3942 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3943 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3944 " is a target node!");
3945 return false;
3946}
3947
3949 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3950 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3951 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3952 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3953 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3954 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3955 "Should use SimplifyDemandedBits if you don't know whether Op"
3956 " is a target node!");
3957 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3958 return false;
3959}
3960
3962 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3963 SelectionDAG &DAG, unsigned Depth) const {
3964 assert(
3965 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3966 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3967 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3968 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3969 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3970 " is a target node!");
3971 return SDValue();
3972}
3973
3974SDValue
3977 SelectionDAG &DAG) const {
3978 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3979 if (!LegalMask) {
3980 std::swap(N0, N1);
3982 LegalMask = isShuffleMaskLegal(Mask, VT);
3983 }
3984
3985 if (!LegalMask)
3986 return SDValue();
3987
3988 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3989}
3990
3992 return nullptr;
3993}
3994
3996 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3997 bool PoisonOnly, unsigned Depth) const {
3998 assert(
3999 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4000 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4001 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4002 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4003 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4004 " is a target node!");
4005
4006 // If Op can't create undef/poison and none of its operands are undef/poison
4007 // then Op is never undef/poison.
4008 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4009 /*ConsiderFlags*/ true, Depth) &&
4010 all_of(Op->ops(), [&](SDValue V) {
4011 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4012 Depth + 1);
4013 });
4014}
4015
4017 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4018 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4019 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4020 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4021 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4022 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4023 "Should use canCreateUndefOrPoison if you don't know whether Op"
4024 " is a target node!");
4025 // Be conservative and return true.
4026 return true;
4027}
4028
4030 const APInt &DemandedElts,
4031 const SelectionDAG &DAG,
4032 bool SNaN,
4033 unsigned Depth) const {
4034 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4035 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4036 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4037 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4038 "Should use isKnownNeverNaN if you don't know whether Op"
4039 " is a target node!");
4040 return false;
4041}
4042
4044 const APInt &DemandedElts,
4045 APInt &UndefElts,
4046 const SelectionDAG &DAG,
4047 unsigned Depth) const {
4048 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4049 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4050 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4051 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4052 "Should use isSplatValue if you don't know whether Op"
4053 " is a target node!");
4054 return false;
4055}
4056
4057// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4058// work with truncating build vectors and vectors with elements of less than
4059// 8 bits.
4061 if (!N)
4062 return false;
4063
4064 unsigned EltWidth;
4065 APInt CVal;
4066 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4067 /*AllowTruncation=*/true)) {
4068 CVal = CN->getAPIntValue();
4069 EltWidth = N.getValueType().getScalarSizeInBits();
4070 } else
4071 return false;
4072
4073 // If this is a truncating splat, truncate the splat value.
4074 // Otherwise, we may fail to match the expected values below.
4075 if (EltWidth < CVal.getBitWidth())
4076 CVal = CVal.trunc(EltWidth);
4077
4078 switch (getBooleanContents(N.getValueType())) {
4080 return CVal[0];
4082 return CVal.isOne();
4084 return CVal.isAllOnes();
4085 }
4086
4087 llvm_unreachable("Invalid boolean contents");
4088}
4089
4091 if (!N)
4092 return false;
4093
4095 if (!CN) {
4097 if (!BV)
4098 return false;
4099
4100 // Only interested in constant splats, we don't care about undef
4101 // elements in identifying boolean constants and getConstantSplatNode
4102 // returns NULL if all ops are undef;
4103 CN = BV->getConstantSplatNode();
4104 if (!CN)
4105 return false;
4106 }
4107
4108 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4109 return !CN->getAPIntValue()[0];
4110
4111 return CN->isZero();
4112}
4113
4115 bool SExt) const {
4116 if (VT == MVT::i1)
4117 return N->isOne();
4118
4120 switch (Cnt) {
4122 // An extended value of 1 is always true, unless its original type is i1,
4123 // in which case it will be sign extended to -1.
4124 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4127 return N->isAllOnes() && SExt;
4128 }
4129 llvm_unreachable("Unexpected enumeration.");
4130}
4131
4132/// This helper function of SimplifySetCC tries to optimize the comparison when
4133/// either operand of the SetCC node is a bitwise-and instruction.
4134SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4135 ISD::CondCode Cond, const SDLoc &DL,
4136 DAGCombinerInfo &DCI) const {
4137 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4138 std::swap(N0, N1);
4139
4140 SelectionDAG &DAG = DCI.DAG;
4141 EVT OpVT = N0.getValueType();
4142 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4143 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4144 return SDValue();
4145
4146 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4147 // iff everything but LSB is known zero:
4148 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4151 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4152 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4153 if (DAG.MaskedValueIsZero(N0, UpperBits))
4154 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4155 }
4156
4157 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4158 // test in a narrow type that we can truncate to with no cost. Examples:
4159 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4160 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4161 // TODO: This conservatively checks for type legality on the source and
4162 // destination types. That may inhibit optimizations, but it also
4163 // allows setcc->shift transforms that may be more beneficial.
4164 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4165 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4166 isTypeLegal(OpVT) && N0.hasOneUse()) {
4167 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4168 AndC->getAPIntValue().getActiveBits());
4169 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4170 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4171 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4172 return DAG.getSetCC(DL, VT, Trunc, Zero,
4174 }
4175 }
4176
4177 // Match these patterns in any of their permutations:
4178 // (X & Y) == Y
4179 // (X & Y) != Y
4180 SDValue X, Y;
4181 if (N0.getOperand(0) == N1) {
4182 X = N0.getOperand(1);
4183 Y = N0.getOperand(0);
4184 } else if (N0.getOperand(1) == N1) {
4185 X = N0.getOperand(0);
4186 Y = N0.getOperand(1);
4187 } else {
4188 return SDValue();
4189 }
4190
4191 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4192 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4193 // its liable to create and infinite loop.
4194 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4195 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4197 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4198 // Note that where Y is variable and is known to have at most one bit set
4199 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4200 // equivalent when Y == 0.
4201 assert(OpVT.isInteger());
4203 if (DCI.isBeforeLegalizeOps() ||
4205 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4206 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4207 // If the target supports an 'and-not' or 'and-complement' logic operation,
4208 // try to use that to make a comparison operation more efficient.
4209 // But don't do this transform if the mask is a single bit because there are
4210 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4211 // 'rlwinm' on PPC).
4212
4213 // Bail out if the compare operand that we want to turn into a zero is
4214 // already a zero (otherwise, infinite loop).
4215 if (isNullConstant(Y))
4216 return SDValue();
4217
4218 // Transform this into: ~X & Y == 0.
4219 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4220 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4221 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4222 }
4223
4224 return SDValue();
4225}
4226
4227/// This helper function of SimplifySetCC tries to optimize the comparison when
4228/// either operand of the SetCC node is a bitwise-or instruction.
4229/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4230SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4231 ISD::CondCode Cond, const SDLoc &DL,
4232 DAGCombinerInfo &DCI) const {
4233 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4234 std::swap(N0, N1);
4235
4236 SelectionDAG &DAG = DCI.DAG;
4237 EVT OpVT = N0.getValueType();
4238 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4239 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4240 return SDValue();
4241
4242 // (X | Y) == Y
4243 // (X | Y) != Y
4244 SDValue X;
4245 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4246 // If the target supports an 'and-not' or 'and-complement' logic operation,
4247 // try to use that to make a comparison operation more efficient.
4248
4249 // Bail out if the compare operand that we want to turn into a zero is
4250 // already a zero (otherwise, infinite loop).
4251 if (isNullConstant(N1))
4252 return SDValue();
4253
4254 // Transform this into: X & ~Y ==/!= 0.
4255 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4256 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4257 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4258 }
4259
4260 return SDValue();
4261}
4262
4263/// There are multiple IR patterns that could be checking whether certain
4264/// truncation of a signed number would be lossy or not. The pattern which is
4265/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4266/// We are looking for the following pattern: (KeptBits is a constant)
4267/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4268/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4269/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4270/// We will unfold it into the natural trunc+sext pattern:
4271/// ((%x << C) a>> C) dstcond %x
4272/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4273SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4274 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4275 const SDLoc &DL) const {
4276 // We must be comparing with a constant.
4277 ConstantSDNode *C1;
4278 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4279 return SDValue();
4280
4281 // N0 should be: add %x, (1 << (KeptBits-1))
4282 if (N0->getOpcode() != ISD::ADD)
4283 return SDValue();
4284
4285 // And we must be 'add'ing a constant.
4286 ConstantSDNode *C01;
4287 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4288 return SDValue();
4289
4290 SDValue X = N0->getOperand(0);
4291 EVT XVT = X.getValueType();
4292
4293 // Validate constants ...
4294
4295 APInt I1 = C1->getAPIntValue();
4296
4297 ISD::CondCode NewCond;
4298 if (Cond == ISD::CondCode::SETULT) {
4299 NewCond = ISD::CondCode::SETEQ;
4300 } else if (Cond == ISD::CondCode::SETULE) {
4301 NewCond = ISD::CondCode::SETEQ;
4302 // But need to 'canonicalize' the constant.
4303 I1 += 1;
4304 } else if (Cond == ISD::CondCode::SETUGT) {
4305 NewCond = ISD::CondCode::SETNE;
4306 // But need to 'canonicalize' the constant.
4307 I1 += 1;
4308 } else if (Cond == ISD::CondCode::SETUGE) {
4309 NewCond = ISD::CondCode::SETNE;
4310 } else
4311 return SDValue();
4312
4313 APInt I01 = C01->getAPIntValue();
4314
4315 auto checkConstants = [&I1, &I01]() -> bool {
4316 // Both of them must be power-of-two, and the constant from setcc is bigger.
4317 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4318 };
4319
4320 if (checkConstants()) {
4321 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4322 } else {
4323 // What if we invert constants? (and the target predicate)
4324 I1.negate();
4325 I01.negate();
4326 assert(XVT.isInteger());
4327 NewCond = getSetCCInverse(NewCond, XVT);
4328 if (!checkConstants())
4329 return SDValue();
4330 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4331 }
4332
4333 // They are power-of-two, so which bit is set?
4334 const unsigned KeptBits = I1.logBase2();
4335 const unsigned KeptBitsMinusOne = I01.logBase2();
4336
4337 // Magic!
4338 if (KeptBits != (KeptBitsMinusOne + 1))
4339 return SDValue();
4340 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4341
4342 // We don't want to do this in every single case.
4343 SelectionDAG &DAG = DCI.DAG;
4344 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4345 return SDValue();
4346
4347 // Unfold into: sext_inreg(%x) cond %x
4348 // Where 'cond' will be either 'eq' or 'ne'.
4349 SDValue SExtInReg = DAG.getNode(
4351 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4352 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4353}
4354
4355// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4356SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4357 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4358 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4360 "Should be a comparison with 0.");
4361 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4362 "Valid only for [in]equality comparisons.");
4363
4364 unsigned NewShiftOpcode;
4365 SDValue X, C, Y;
4366
4367 SelectionDAG &DAG = DCI.DAG;
4368
4369 // Look for '(C l>>/<< Y)'.
4370 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4371 // The shift should be one-use.
4372 if (!V.hasOneUse())
4373 return false;
4374 unsigned OldShiftOpcode = V.getOpcode();
4375 switch (OldShiftOpcode) {
4376 case ISD::SHL:
4377 NewShiftOpcode = ISD::SRL;
4378 break;
4379 case ISD::SRL:
4380 NewShiftOpcode = ISD::SHL;
4381 break;
4382 default:
4383 return false; // must be a logical shift.
4384 }
4385 // We should be shifting a constant.
4386 // FIXME: best to use isConstantOrConstantVector().
4387 C = V.getOperand(0);
4388 ConstantSDNode *CC =
4389 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4390 if (!CC)
4391 return false;
4392 Y = V.getOperand(1);
4393
4394 ConstantSDNode *XC =
4395 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4397 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4398 };
4399
4400 // LHS of comparison should be an one-use 'and'.
4401 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4402 return SDValue();
4403
4404 X = N0.getOperand(0);
4405 SDValue Mask = N0.getOperand(1);
4406
4407 // 'and' is commutative!
4408 if (!Match(Mask)) {
4409 std::swap(X, Mask);
4410 if (!Match(Mask))
4411 return SDValue();
4412 }
4413
4414 EVT VT = X.getValueType();
4415
4416 // Produce:
4417 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4418 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4419 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4420 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4421 return T2;
4422}
4423
4424/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4425/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4426/// handle the commuted versions of these patterns.
4427SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4428 ISD::CondCode Cond, const SDLoc &DL,
4429 DAGCombinerInfo &DCI) const {
4430 unsigned BOpcode = N0.getOpcode();
4431 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4432 "Unexpected binop");
4433 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4434
4435 // (X + Y) == X --> Y == 0
4436 // (X - Y) == X --> Y == 0
4437 // (X ^ Y) == X --> Y == 0
4438 SelectionDAG &DAG = DCI.DAG;
4439 EVT OpVT = N0.getValueType();
4440 SDValue X = N0.getOperand(0);
4441 SDValue Y = N0.getOperand(1);
4442 if (X == N1)
4443 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4444
4445 if (Y != N1)
4446 return SDValue();
4447
4448 // (X + Y) == Y --> X == 0
4449 // (X ^ Y) == Y --> X == 0
4450 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4451 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4452
4453 // The shift would not be valid if the operands are boolean (i1).
4454 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4455 return SDValue();
4456
4457 // (X - Y) == Y --> X == Y << 1
4458 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4459 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4460 if (!DCI.isCalledByLegalizer())
4461 DCI.AddToWorklist(YShl1.getNode());
4462 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4463}
4464
4466 SDValue N0, const APInt &C1,
4467 ISD::CondCode Cond, const SDLoc &dl,
4468 SelectionDAG &DAG) {
4469 // Look through truncs that don't change the value of a ctpop.
4470 // FIXME: Add vector support? Need to be careful with setcc result type below.
4471 SDValue CTPOP = N0;
4472 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4474 CTPOP = N0.getOperand(0);
4475
4476 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4477 return SDValue();
4478
4479 EVT CTVT = CTPOP.getValueType();
4480 SDValue CTOp = CTPOP.getOperand(0);
4481
4482 // Expand a power-of-2-or-zero comparison based on ctpop:
4483 // (ctpop x) u< 2 -> (x & x-1) == 0
4484 // (ctpop x) u> 1 -> (x & x-1) != 0
4485 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4486 // Keep the CTPOP if it is a cheap vector op.
4487 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4488 return SDValue();
4489
4490 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4491 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4492 return SDValue();
4493 if (C1 == 0 && (Cond == ISD::SETULT))
4494 return SDValue(); // This is handled elsewhere.
4495
4496 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4497
4498 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4499 SDValue Result = CTOp;
4500 for (unsigned i = 0; i < Passes; i++) {
4501 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4502 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4503 }
4505 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4506 }
4507
4508 // Expand a power-of-2 comparison based on ctpop
4509 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4510 // Keep the CTPOP if it is cheap.
4511 if (TLI.isCtpopFast(CTVT))
4512 return SDValue();
4513
4514 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4515 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4516 assert(CTVT.isInteger());
4517 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4518
4519 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4520 // check before emitting a potentially unnecessary op.
4521 if (DAG.isKnownNeverZero(CTOp)) {
4522 // (ctpop x) == 1 --> (x & x-1) == 0
4523 // (ctpop x) != 1 --> (x & x-1) != 0
4524 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4525 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4526 return RHS;
4527 }
4528
4529 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4530 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4531 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4533 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4534 }
4535
4536 return SDValue();
4537}
4538
4540 ISD::CondCode Cond, const SDLoc &dl,
4541 SelectionDAG &DAG) {
4542 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4543 return SDValue();
4544
4545 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4546 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4547 return SDValue();
4548
4549 auto getRotateSource = [](SDValue X) {
4550 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4551 return X.getOperand(0);
4552 return SDValue();
4553 };
4554
4555 // Peek through a rotated value compared against 0 or -1:
4556 // (rot X, Y) == 0/-1 --> X == 0/-1
4557 // (rot X, Y) != 0/-1 --> X != 0/-1
4558 if (SDValue R = getRotateSource(N0))
4559 return DAG.getSetCC(dl, VT, R, N1, Cond);
4560
4561 // Peek through an 'or' of a rotated value compared against 0:
4562 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4563 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4564 //
4565 // TODO: Add the 'and' with -1 sibling.
4566 // TODO: Recurse through a series of 'or' ops to find the rotate.
4567 EVT OpVT = N0.getValueType();
4568 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4569 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4570 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4571 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4572 }
4573 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4574 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4575 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4576 }
4577 }
4578
4579 return SDValue();
4580}
4581
4583 ISD::CondCode Cond, const SDLoc &dl,
4584 SelectionDAG &DAG) {
4585 // If we are testing for all-bits-clear, we might be able to do that with
4586 // less shifting since bit-order does not matter.
4587 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4588 return SDValue();
4589
4590 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4591 if (!C1 || !C1->isZero())
4592 return SDValue();
4593
4594 if (!N0.hasOneUse() ||
4595 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4596 return SDValue();
4597
4598 unsigned BitWidth = N0.getScalarValueSizeInBits();
4599 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4600 if (!ShAmtC)
4601 return SDValue();
4602
4603 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4604 if (ShAmt == 0)
4605 return SDValue();
4606
4607 // Canonicalize fshr as fshl to reduce pattern-matching.
4608 if (N0.getOpcode() == ISD::FSHR)
4609 ShAmt = BitWidth - ShAmt;
4610
4611 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4612 SDValue X, Y;
4613 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4614 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4615 return false;
4616 if (Or.getOperand(0) == Other) {
4617 X = Or.getOperand(0);
4618 Y = Or.getOperand(1);
4619 return true;
4620 }
4621 if (Or.getOperand(1) == Other) {
4622 X = Or.getOperand(1);
4623 Y = Or.getOperand(0);
4624 return true;
4625 }
4626 return false;
4627 };
4628
4629 EVT OpVT = N0.getValueType();
4630 EVT ShAmtVT = N0.getOperand(2).getValueType();
4631 SDValue F0 = N0.getOperand(0);
4632 SDValue F1 = N0.getOperand(1);
4633 if (matchOr(F0, F1)) {
4634 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4635 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4636 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4637 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4638 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4639 }
4640 if (matchOr(F1, F0)) {
4641 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4642 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4643 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4644 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4645 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4646 }
4647
4648 return SDValue();
4649}
4650
4651/// Try to simplify a setcc built with the specified operands and cc. If it is
4652/// unable to simplify it, return a null SDValue.
4654 ISD::CondCode Cond, bool foldBooleans,
4655 DAGCombinerInfo &DCI,
4656 const SDLoc &dl) const {
4657 SelectionDAG &DAG = DCI.DAG;
4658 const DataLayout &Layout = DAG.getDataLayout();
4659 EVT OpVT = N0.getValueType();
4660 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4661
4662 // Constant fold or commute setcc.
4663 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4664 return Fold;
4665
4666 bool N0ConstOrSplat =
4667 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4668 bool N1ConstOrSplat =
4669 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4670
4671 // Canonicalize toward having the constant on the RHS.
4672 // TODO: Handle non-splat vector constants. All undef causes trouble.
4673 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4674 // infinite loop here when we encounter one.
4676 if (N0ConstOrSplat && !N1ConstOrSplat &&
4677 (DCI.isBeforeLegalizeOps() ||
4678 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4679 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4680
4681 // If we have a subtract with the same 2 non-constant operands as this setcc
4682 // -- but in reverse order -- then try to commute the operands of this setcc
4683 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4684 // instruction on some targets.
4685 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4686 (DCI.isBeforeLegalizeOps() ||
4687 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4688 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4689 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4690 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4691
4692 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4693 return V;
4694
4695 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4696 return V;
4697
4698 if (auto *N1C = isConstOrConstSplat(N1)) {
4699 const APInt &C1 = N1C->getAPIntValue();
4700
4701 // Optimize some CTPOP cases.
4702 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4703 return V;
4704
4705 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4706 // X * Y == 0 --> (X == 0) || (Y == 0)
4707 // X * Y != 0 --> (X != 0) && (Y != 0)
4708 // TODO: This bails out if minsize is set, but if the target doesn't have a
4709 // single instruction multiply for this type, it would likely be
4710 // smaller to decompose.
4711 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4712 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4713 (N0->getFlags().hasNoUnsignedWrap() ||
4714 N0->getFlags().hasNoSignedWrap()) &&
4715 !Attr.hasFnAttr(Attribute::MinSize)) {
4716 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4717 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4718 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4719 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4720 }
4721
4722 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4723 // equality comparison, then we're just comparing whether X itself is
4724 // zero.
4725 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4726 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4728 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4729 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4730 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4731 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4732 // (srl (ctlz x), 5) == 0 -> X != 0
4733 // (srl (ctlz x), 5) != 1 -> X != 0
4734 Cond = ISD::SETNE;
4735 } else {
4736 // (srl (ctlz x), 5) != 0 -> X == 0
4737 // (srl (ctlz x), 5) == 1 -> X == 0
4738 Cond = ISD::SETEQ;
4739 }
4740 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4741 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4742 Cond);
4743 }
4744 }
4745 }
4746 }
4747
4748 // setcc X, 0, setlt --> X (when X is all sign bits)
4749 // setcc X, 0, setne --> X (when X is all sign bits)
4750 //
4751 // When we know that X has 0 or -1 in each element (or scalar), this
4752 // comparison will produce X. This is only true when boolean contents are
4753 // represented via 0s and -1s.
4754 if (VT == OpVT &&
4755 // Check that the result of setcc is 0 and -1.
4757 // Match only for checks X < 0 and X != 0
4758 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4759 // The identity holds iff we know all sign bits for all lanes.
4761 return N0;
4762
4763 // FIXME: Support vectors.
4764 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4765 const APInt &C1 = N1C->getAPIntValue();
4766
4767 // (zext x) == C --> x == (trunc C)
4768 // (sext x) == C --> x == (trunc C)
4769 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4770 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4771 unsigned MinBits = N0.getValueSizeInBits();
4772 SDValue PreExt;
4773 bool Signed = false;
4774 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4775 // ZExt
4776 MinBits = N0->getOperand(0).getValueSizeInBits();
4777 PreExt = N0->getOperand(0);
4778 } else if (N0->getOpcode() == ISD::AND) {
4779 // DAGCombine turns costly ZExts into ANDs
4780 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4781 if ((C->getAPIntValue()+1).isPowerOf2()) {
4782 MinBits = C->getAPIntValue().countr_one();
4783 PreExt = N0->getOperand(0);
4784 }
4785 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4786 // SExt
4787 MinBits = N0->getOperand(0).getValueSizeInBits();
4788 PreExt = N0->getOperand(0);
4789 Signed = true;
4790 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4791 // ZEXTLOAD / SEXTLOAD
4792 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4793 MinBits = LN0->getMemoryVT().getSizeInBits();
4794 PreExt = N0;
4795 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4796 Signed = true;
4797 MinBits = LN0->getMemoryVT().getSizeInBits();
4798 PreExt = N0;
4799 }
4800 }
4801
4802 // Figure out how many bits we need to preserve this constant.
4803 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4804
4805 // Make sure we're not losing bits from the constant.
4806 if (MinBits > 0 &&
4807 MinBits < C1.getBitWidth() &&
4808 MinBits >= ReqdBits) {
4809 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4810 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4811 // Will get folded away.
4812 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4813 if (MinBits == 1 && C1 == 1)
4814 // Invert the condition.
4815 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4817 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4818 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4819 }
4820
4821 // If truncating the setcc operands is not desirable, we can still
4822 // simplify the expression in some cases:
4823 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4824 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4825 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4826 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4827 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4828 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4829 SDValue TopSetCC = N0->getOperand(0);
4830 unsigned N0Opc = N0->getOpcode();
4831 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4832 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4833 TopSetCC.getOpcode() == ISD::SETCC &&
4834 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4835 (isConstFalseVal(N1) ||
4836 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4837
4838 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4839 (!N1C->isZero() && Cond == ISD::SETNE);
4840
4841 if (!Inverse)
4842 return TopSetCC;
4843
4845 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4846 TopSetCC.getOperand(0).getValueType());
4847 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4848 TopSetCC.getOperand(1),
4849 InvCond);
4850 }
4851 }
4852 }
4853
4854 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4855 // equality or unsigned, and all 1 bits of the const are in the same
4856 // partial word, see if we can shorten the load.
4857 if (DCI.isBeforeLegalize() &&
4859 N0.getOpcode() == ISD::AND && C1 == 0 &&
4860 N0.getNode()->hasOneUse() &&
4861 isa<LoadSDNode>(N0.getOperand(0)) &&
4862 N0.getOperand(0).getNode()->hasOneUse() &&
4864 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4865 APInt bestMask;
4866 unsigned bestWidth = 0, bestOffset = 0;
4867 if (Lod->isSimple() && Lod->isUnindexed() &&
4868 (Lod->getMemoryVT().isByteSized() ||
4869 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4870 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4871 unsigned origWidth = N0.getValueSizeInBits();
4872 unsigned maskWidth = origWidth;
4873 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4874 // 8 bits, but have to be careful...
4875 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4876 origWidth = Lod->getMemoryVT().getSizeInBits();
4877 const APInt &Mask = N0.getConstantOperandAPInt(1);
4878 // Only consider power-of-2 widths (and at least one byte) as candiates
4879 // for the narrowed load.
4880 for (unsigned width = 8; width < origWidth; width *= 2) {
4881 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4882 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4883 // Avoid accessing any padding here for now (we could use memWidth
4884 // instead of origWidth here otherwise).
4885 unsigned maxOffset = origWidth - width;
4886 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4887 if (Mask.isSubsetOf(newMask)) {
4888 unsigned ptrOffset =
4889 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4890 unsigned IsFast = 0;
4891 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4892 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4894 ptrOffset / 8) &&
4896 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4897 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4898 IsFast) {
4899 bestOffset = ptrOffset / 8;
4900 bestMask = Mask.lshr(offset);
4901 bestWidth = width;
4902 break;
4903 }
4904 }
4905 newMask <<= 8;
4906 }
4907 if (bestWidth)
4908 break;
4909 }
4910 }
4911 if (bestWidth) {
4912 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4913 SDValue Ptr = Lod->getBasePtr();
4914 if (bestOffset != 0)
4915 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4916 SDValue NewLoad =
4917 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4918 Lod->getPointerInfo().getWithOffset(bestOffset),
4919 Lod->getBaseAlign());
4920 SDValue And =
4921 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4922 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4923 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4924 }
4925 }
4926
4927 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4928 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4929 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4930
4931 // If the comparison constant has bits in the upper part, the
4932 // zero-extended value could never match.
4934 C1.getBitWidth() - InSize))) {
4935 switch (Cond) {
4936 case ISD::SETUGT:
4937 case ISD::SETUGE:
4938 case ISD::SETEQ:
4939 return DAG.getConstant(0, dl, VT);
4940 case ISD::SETULT:
4941 case ISD::SETULE:
4942 case ISD::SETNE:
4943 return DAG.getConstant(1, dl, VT);
4944 case ISD::SETGT:
4945 case ISD::SETGE:
4946 // True if the sign bit of C1 is set.
4947 return DAG.getConstant(C1.isNegative(), dl, VT);
4948 case ISD::SETLT:
4949 case ISD::SETLE:
4950 // True if the sign bit of C1 isn't set.
4951 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4952 default:
4953 break;
4954 }
4955 }
4956
4957 // Otherwise, we can perform the comparison with the low bits.
4958 switch (Cond) {
4959 case ISD::SETEQ:
4960 case ISD::SETNE:
4961 case ISD::SETUGT:
4962 case ISD::SETUGE:
4963 case ISD::SETULT:
4964 case ISD::SETULE: {
4965 EVT newVT = N0.getOperand(0).getValueType();
4966 // FIXME: Should use isNarrowingProfitable.
4967 if (DCI.isBeforeLegalizeOps() ||
4968 (isOperationLegal(ISD::SETCC, newVT) &&
4969 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4971 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4972 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4973
4974 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4975 NewConst, Cond);
4976 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4977 }
4978 break;
4979 }
4980 default:
4981 break; // todo, be more careful with signed comparisons
4982 }
4983 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4984 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4986 OpVT)) {
4987 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4988 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4989 EVT ExtDstTy = N0.getValueType();
4990 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4991
4992 // If the constant doesn't fit into the number of bits for the source of
4993 // the sign extension, it is impossible for both sides to be equal.
4994 if (C1.getSignificantBits() > ExtSrcTyBits)
4995 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4996
4997 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4998 ExtDstTy != ExtSrcTy && "Unexpected types!");
4999 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5000 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5001 DAG.getConstant(Imm, dl, ExtDstTy));
5002 if (!DCI.isCalledByLegalizer())
5003 DCI.AddToWorklist(ZextOp.getNode());
5004 // Otherwise, make this a use of a zext.
5005 return DAG.getSetCC(dl, VT, ZextOp,
5006 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5007 } else if ((N1C->isZero() || N1C->isOne()) &&
5008 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5009 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5010 // excluded as they are handled below whilst checking for foldBooleans.
5011 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5012 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5013 (N0.getValueType() == MVT::i1 ||
5017 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5018 if (TrueWhenTrue)
5019 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5020 // Invert the condition.
5021 if (N0.getOpcode() == ISD::SETCC) {
5024 if (DCI.isBeforeLegalizeOps() ||
5026 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5027 }
5028 }
5029
5030 if ((N0.getOpcode() == ISD::XOR ||
5031 (N0.getOpcode() == ISD::AND &&
5032 N0.getOperand(0).getOpcode() == ISD::XOR &&
5033 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5034 isOneConstant(N0.getOperand(1))) {
5035 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5036 // can only do this if the top bits are known zero.
5037 unsigned BitWidth = N0.getValueSizeInBits();
5038 if (DAG.MaskedValueIsZero(N0,
5040 BitWidth-1))) {
5041 // Okay, get the un-inverted input value.
5042 SDValue Val;
5043 if (N0.getOpcode() == ISD::XOR) {
5044 Val = N0.getOperand(0);
5045 } else {
5046 assert(N0.getOpcode() == ISD::AND &&
5047 N0.getOperand(0).getOpcode() == ISD::XOR);
5048 // ((X^1)&1)^1 -> X & 1
5049 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5050 N0.getOperand(0).getOperand(0),
5051 N0.getOperand(1));
5052 }
5053
5054 return DAG.getSetCC(dl, VT, Val, N1,
5056 }
5057 } else if (N1C->isOne()) {
5058 SDValue Op0 = N0;
5059 if (Op0.getOpcode() == ISD::TRUNCATE)
5060 Op0 = Op0.getOperand(0);
5061
5062 if ((Op0.getOpcode() == ISD::XOR) &&
5063 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5064 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5065 SDValue XorLHS = Op0.getOperand(0);
5066 SDValue XorRHS = Op0.getOperand(1);
5067 // Ensure that the input setccs return an i1 type or 0/1 value.
5068 if (Op0.getValueType() == MVT::i1 ||
5073 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5075 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5076 }
5077 }
5078 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5079 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5080 if (Op0.getValueType().bitsGT(VT))
5081 Op0 = DAG.getNode(ISD::AND, dl, VT,
5082 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5083 DAG.getConstant(1, dl, VT));
5084 else if (Op0.getValueType().bitsLT(VT))
5085 Op0 = DAG.getNode(ISD::AND, dl, VT,
5086 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5087 DAG.getConstant(1, dl, VT));
5088
5089 return DAG.getSetCC(dl, VT, Op0,
5090 DAG.getConstant(0, dl, Op0.getValueType()),
5092 }
5093 if (Op0.getOpcode() == ISD::AssertZext &&
5094 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5095 return DAG.getSetCC(dl, VT, Op0,
5096 DAG.getConstant(0, dl, Op0.getValueType()),
5098 }
5099 }
5100
5101 // Given:
5102 // icmp eq/ne (urem %x, %y), 0
5103 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5104 // icmp eq/ne %x, 0
5105 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5106 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5107 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5108 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5109 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5110 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5111 }
5112
5113 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5114 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5115 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5117 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5118 N1C->isAllOnes()) {
5119 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5120 DAG.getConstant(0, dl, OpVT),
5122 }
5123
5124 // fold (setcc (trunc x) c) -> (setcc x c)
5125 if (N0.getOpcode() == ISD::TRUNCATE &&
5127 (N0->getFlags().hasNoSignedWrap() &&
5130 EVT NewVT = N0.getOperand(0).getValueType();
5131 SDValue NewConst = DAG.getConstant(
5133 ? C1.sext(NewVT.getSizeInBits())
5134 : C1.zext(NewVT.getSizeInBits()),
5135 dl, NewVT);
5136 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5137 }
5138
5139 if (SDValue V =
5140 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5141 return V;
5142 }
5143
5144 // These simplifications apply to splat vectors as well.
5145 // TODO: Handle more splat vector cases.
5146 if (auto *N1C = isConstOrConstSplat(N1)) {
5147 const APInt &C1 = N1C->getAPIntValue();
5148
5149 APInt MinVal, MaxVal;
5150 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5152 MinVal = APInt::getSignedMinValue(OperandBitSize);
5153 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5154 } else {
5155 MinVal = APInt::getMinValue(OperandBitSize);
5156 MaxVal = APInt::getMaxValue(OperandBitSize);
5157 }
5158
5159 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5160 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5161 // X >= MIN --> true
5162 if (C1 == MinVal)
5163 return DAG.getBoolConstant(true, dl, VT, OpVT);
5164
5165 if (!VT.isVector()) { // TODO: Support this for vectors.
5166 // X >= C0 --> X > (C0 - 1)
5167 APInt C = C1 - 1;
5169 if ((DCI.isBeforeLegalizeOps() ||
5170 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5171 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5172 isLegalICmpImmediate(C.getSExtValue())))) {
5173 return DAG.getSetCC(dl, VT, N0,
5174 DAG.getConstant(C, dl, N1.getValueType()),
5175 NewCC);
5176 }
5177 }
5178 }
5179
5180 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5181 // X <= MAX --> true
5182 if (C1 == MaxVal)
5183 return DAG.getBoolConstant(true, dl, VT, OpVT);
5184
5185 // X <= C0 --> X < (C0 + 1)
5186 if (!VT.isVector()) { // TODO: Support this for vectors.
5187 APInt C = C1 + 1;
5189 if ((DCI.isBeforeLegalizeOps() ||
5190 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5191 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5192 isLegalICmpImmediate(C.getSExtValue())))) {
5193 return DAG.getSetCC(dl, VT, N0,
5194 DAG.getConstant(C, dl, N1.getValueType()),
5195 NewCC);
5196 }
5197 }
5198 }
5199
5200 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5201 if (C1 == MinVal)
5202 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5203
5204 // TODO: Support this for vectors after legalize ops.
5205 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5206 // Canonicalize setlt X, Max --> setne X, Max
5207 if (C1 == MaxVal)
5208 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5209
5210 // If we have setult X, 1, turn it into seteq X, 0
5211 if (C1 == MinVal+1)
5212 return DAG.getSetCC(dl, VT, N0,
5213 DAG.getConstant(MinVal, dl, N0.getValueType()),
5214 ISD::SETEQ);
5215 }
5216 }
5217
5218 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5219 if (C1 == MaxVal)
5220 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5221
5222 // TODO: Support this for vectors after legalize ops.
5223 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5224 // Canonicalize setgt X, Min --> setne X, Min
5225 if (C1 == MinVal)
5226 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5227
5228 // If we have setugt X, Max-1, turn it into seteq X, Max
5229 if (C1 == MaxVal-1)
5230 return DAG.getSetCC(dl, VT, N0,
5231 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5232 ISD::SETEQ);
5233 }
5234 }
5235
5236 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5237 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5238 if (C1.isZero())
5239 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5240 VT, N0, N1, Cond, DCI, dl))
5241 return CC;
5242
5243 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5244 // For example, when high 32-bits of i64 X are known clear:
5245 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5246 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5247 bool CmpZero = N1C->isZero();
5248 bool CmpNegOne = N1C->isAllOnes();
5249 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5250 // Match or(lo,shl(hi,bw/2)) pattern.
5251 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5252 unsigned EltBits = V.getScalarValueSizeInBits();
5253 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5254 return false;
5255 SDValue LHS = V.getOperand(0);
5256 SDValue RHS = V.getOperand(1);
5257 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5258 // Unshifted element must have zero upperbits.
5259 if (RHS.getOpcode() == ISD::SHL &&
5260 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5261 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5262 DAG.MaskedValueIsZero(LHS, HiBits)) {
5263 Lo = LHS;
5264 Hi = RHS.getOperand(0);
5265 return true;
5266 }
5267 if (LHS.getOpcode() == ISD::SHL &&
5268 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5269 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5270 DAG.MaskedValueIsZero(RHS, HiBits)) {
5271 Lo = RHS;
5272 Hi = LHS.getOperand(0);
5273 return true;
5274 }
5275 return false;
5276 };
5277
5278 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5279 unsigned EltBits = N0.getScalarValueSizeInBits();
5280 unsigned HalfBits = EltBits / 2;
5281 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5282 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5283 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5284 SDValue NewN0 =
5285 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5286 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5287 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5288 };
5289
5290 SDValue Lo, Hi;
5291 if (IsConcat(N0, Lo, Hi))
5292 return MergeConcat(Lo, Hi);
5293
5294 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5295 SDValue Lo0, Lo1, Hi0, Hi1;
5296 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5297 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5298 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5299 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5300 }
5301 }
5302 }
5303 }
5304
5305 // If we have "setcc X, C0", check to see if we can shrink the immediate
5306 // by changing cc.
5307 // TODO: Support this for vectors after legalize ops.
5308 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5309 // SETUGT X, SINTMAX -> SETLT X, 0
5310 // SETUGE X, SINTMIN -> SETLT X, 0
5311 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5312 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5313 return DAG.getSetCC(dl, VT, N0,
5314 DAG.getConstant(0, dl, N1.getValueType()),
5315 ISD::SETLT);
5316
5317 // SETULT X, SINTMIN -> SETGT X, -1
5318 // SETULE X, SINTMAX -> SETGT X, -1
5319 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5320 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5321 return DAG.getSetCC(dl, VT, N0,
5322 DAG.getAllOnesConstant(dl, N1.getValueType()),
5323 ISD::SETGT);
5324 }
5325 }
5326
5327 // Back to non-vector simplifications.
5328 // TODO: Can we do these for vector splats?
5329 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5330 const APInt &C1 = N1C->getAPIntValue();
5331 EVT ShValTy = N0.getValueType();
5332
5333 // Fold bit comparisons when we can. This will result in an
5334 // incorrect value when boolean false is negative one, unless
5335 // the bitsize is 1 in which case the false value is the same
5336 // in practice regardless of the representation.
5337 if ((VT.getSizeInBits() == 1 ||
5339 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5340 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5341 N0.getOpcode() == ISD::AND) {
5342 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5343 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5344 // Perform the xform if the AND RHS is a single bit.
5345 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5346 if (AndRHS->getAPIntValue().isPowerOf2() &&
5347 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5348 return DAG.getNode(
5349 ISD::TRUNCATE, dl, VT,
5350 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5351 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5352 }
5353 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5354 // (X & 8) == 8 --> (X & 8) >> 3
5355 // Perform the xform if C1 is a single bit.
5356 unsigned ShCt = C1.logBase2();
5357 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5358 return DAG.getNode(
5359 ISD::TRUNCATE, dl, VT,
5360 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5361 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5362 }
5363 }
5364 }
5365 }
5366
5367 if (C1.getSignificantBits() <= 64 &&
5369 // (X & -256) == 256 -> (X >> 8) == 1
5370 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5371 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5372 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5373 const APInt &AndRHSC = AndRHS->getAPIntValue();
5374 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5375 unsigned ShiftBits = AndRHSC.countr_zero();
5376 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5377 // If using an unsigned shift doesn't yield a legal compare
5378 // immediate, try using sra instead.
5379 APInt NewC = C1.lshr(ShiftBits);
5380 if (NewC.getSignificantBits() <= 64 &&
5382 APInt SignedC = C1.ashr(ShiftBits);
5383 if (SignedC.getSignificantBits() <= 64 &&
5385 SDValue Shift = DAG.getNode(
5386 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5387 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5388 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5389 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5390 }
5391 }
5392 SDValue Shift = DAG.getNode(
5393 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5394 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5395 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5396 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5397 }
5398 }
5399 }
5400 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5401 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5402 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5403 // X < 0x100000000 -> (X >> 32) < 1
5404 // X >= 0x100000000 -> (X >> 32) >= 1
5405 // X <= 0x0ffffffff -> (X >> 32) < 1
5406 // X > 0x0ffffffff -> (X >> 32) >= 1
5407 unsigned ShiftBits;
5408 APInt NewC = C1;
5409 ISD::CondCode NewCond = Cond;
5410 if (AdjOne) {
5411 ShiftBits = C1.countr_one();
5412 NewC = NewC + 1;
5413 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5414 } else {
5415 ShiftBits = C1.countr_zero();
5416 }
5417 NewC.lshrInPlace(ShiftBits);
5418 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5420 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5421 SDValue Shift =
5422 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5423 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5424 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5425 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5426 }
5427 }
5428 }
5429 }
5430
5432 auto *CFP = cast<ConstantFPSDNode>(N1);
5433 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5434
5435 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5436 // constant if knowing that the operand is non-nan is enough. We prefer to
5437 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5438 // materialize 0.0.
5439 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5440 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5441
5442 // setcc (fneg x), C -> setcc swap(pred) x, -C
5443 if (N0.getOpcode() == ISD::FNEG) {
5445 if (DCI.isBeforeLegalizeOps() ||
5446 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5447 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5448 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5449 }
5450 }
5451
5452 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5454 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5455 bool IsFabs = N0.getOpcode() == ISD::FABS;
5456 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5457 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5458 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5459 : (IsFabs ? fcInf : fcPosInf);
5460 if (Cond == ISD::SETUEQ)
5461 Flag |= fcNan;
5462 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5463 DAG.getTargetConstant(Flag, dl, MVT::i32));
5464 }
5465 }
5466
5467 // If the condition is not legal, see if we can find an equivalent one
5468 // which is legal.
5470 // If the comparison was an awkward floating-point == or != and one of
5471 // the comparison operands is infinity or negative infinity, convert the
5472 // condition to a less-awkward <= or >=.
5473 if (CFP->getValueAPF().isInfinity()) {
5474 bool IsNegInf = CFP->getValueAPF().isNegative();
5476 switch (Cond) {
5477 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5478 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5479 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5480 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5481 default: break;
5482 }
5483 if (NewCond != ISD::SETCC_INVALID &&
5484 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5485 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5486 }
5487 }
5488 }
5489
5490 if (N0 == N1) {
5491 // The sext(setcc()) => setcc() optimization relies on the appropriate
5492 // constant being emitted.
5493 assert(!N0.getValueType().isInteger() &&
5494 "Integer types should be handled by FoldSetCC");
5495
5496 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5497 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5498 if (UOF == 2) // FP operators that are undefined on NaNs.
5499 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5500 if (UOF == unsigned(EqTrue))
5501 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5502 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5503 // if it is not already.
5504 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5505 if (NewCond != Cond &&
5506 (DCI.isBeforeLegalizeOps() ||
5507 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5508 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5509 }
5510
5511 // ~X > ~Y --> Y > X
5512 // ~X < ~Y --> Y < X
5513 // ~X < C --> X > ~C
5514 // ~X > C --> X < ~C
5515 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5516 N0.getValueType().isInteger()) {
5517 if (isBitwiseNot(N0)) {
5518 if (isBitwiseNot(N1))
5519 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5520
5523 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5524 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5525 }
5526 }
5527 }
5528
5529 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5530 N0.getValueType().isInteger()) {
5531 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5532 N0.getOpcode() == ISD::XOR) {
5533 // Simplify (X+Y) == (X+Z) --> Y == Z
5534 if (N0.getOpcode() == N1.getOpcode()) {
5535 if (N0.getOperand(0) == N1.getOperand(0))
5536 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5537 if (N0.getOperand(1) == N1.getOperand(1))
5538 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5539 if (isCommutativeBinOp(N0.getOpcode())) {
5540 // If X op Y == Y op X, try other combinations.
5541 if (N0.getOperand(0) == N1.getOperand(1))
5542 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5543 Cond);
5544 if (N0.getOperand(1) == N1.getOperand(0))
5545 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5546 Cond);
5547 }
5548 }
5549
5550 // If RHS is a legal immediate value for a compare instruction, we need
5551 // to be careful about increasing register pressure needlessly.
5552 bool LegalRHSImm = false;
5553
5554 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5555 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5556 // Turn (X+C1) == C2 --> X == C2-C1
5557 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5558 return DAG.getSetCC(
5559 dl, VT, N0.getOperand(0),
5560 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5561 dl, N0.getValueType()),
5562 Cond);
5563
5564 // Turn (X^C1) == C2 --> X == C1^C2
5565 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5566 return DAG.getSetCC(
5567 dl, VT, N0.getOperand(0),
5568 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5569 dl, N0.getValueType()),
5570 Cond);
5571 }
5572
5573 // Turn (C1-X) == C2 --> X == C1-C2
5574 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5575 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5576 return DAG.getSetCC(
5577 dl, VT, N0.getOperand(1),
5578 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5579 dl, N0.getValueType()),
5580 Cond);
5581
5582 // Could RHSC fold directly into a compare?
5583 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5584 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5585 }
5586
5587 // (X+Y) == X --> Y == 0 and similar folds.
5588 // Don't do this if X is an immediate that can fold into a cmp
5589 // instruction and X+Y has other uses. It could be an induction variable
5590 // chain, and the transform would increase register pressure.
5591 if (!LegalRHSImm || N0.hasOneUse())
5592 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5593 return V;
5594 }
5595
5596 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5597 N1.getOpcode() == ISD::XOR)
5598 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5599 return V;
5600
5601 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5602 return V;
5603
5604 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5605 return V;
5606 }
5607
5608 // Fold remainder of division by a constant.
5609 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5610 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5611 // When division is cheap or optimizing for minimum size,
5612 // fall through to DIVREM creation by skipping this fold.
5613 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5614 if (N0.getOpcode() == ISD::UREM) {
5615 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5616 return Folded;
5617 } else if (N0.getOpcode() == ISD::SREM) {
5618 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5619 return Folded;
5620 }
5621 }
5622 }
5623
5624 // Fold away ALL boolean setcc's.
5625 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5626 SDValue Temp;
5627 switch (Cond) {
5628 default: llvm_unreachable("Unknown integer setcc!");
5629 case ISD::SETEQ: // X == Y -> ~(X^Y)
5630 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5631 N0 = DAG.getNOT(dl, Temp, OpVT);
5632 if (!DCI.isCalledByLegalizer())
5633 DCI.AddToWorklist(Temp.getNode());
5634 break;
5635 case ISD::SETNE: // X != Y --> (X^Y)
5636 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5637 break;
5638 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5639 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5640 Temp = DAG.getNOT(dl, N0, OpVT);
5641 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5642 if (!DCI.isCalledByLegalizer())
5643 DCI.AddToWorklist(Temp.getNode());
5644 break;
5645 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5646 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5647 Temp = DAG.getNOT(dl, N1, OpVT);
5648 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5649 if (!DCI.isCalledByLegalizer())
5650 DCI.AddToWorklist(Temp.getNode());
5651 break;
5652 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5653 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5654 Temp = DAG.getNOT(dl, N0, OpVT);
5655 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5656 if (!DCI.isCalledByLegalizer())
5657 DCI.AddToWorklist(Temp.getNode());
5658 break;
5659 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5660 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5661 Temp = DAG.getNOT(dl, N1, OpVT);
5662 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5663 break;
5664 }
5665 if (VT.getScalarType() != MVT::i1) {
5666 if (!DCI.isCalledByLegalizer())
5667 DCI.AddToWorklist(N0.getNode());
5668 // FIXME: If running after legalize, we probably can't do this.
5670 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5671 }
5672 return N0;
5673 }
5674
5675 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5676 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5677 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5679 N1->getFlags().hasNoUnsignedWrap()) ||
5681 N1->getFlags().hasNoSignedWrap())) &&
5683 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5684 }
5685
5686 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5687 // TODO: Remove that .isVector() check
5688 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5690 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5691 }
5692
5693 // Could not fold it.
5694 return SDValue();
5695}
5696
5697/// Returns true (and the GlobalValue and the offset) if the node is a
5698/// GlobalAddress + offset.
5700 int64_t &Offset) const {
5701
5702 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5703
5704 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5705 GA = GASD->getGlobal();
5706 Offset += GASD->getOffset();
5707 return true;
5708 }
5709
5710 if (N->isAnyAdd()) {
5711 SDValue N1 = N->getOperand(0);
5712 SDValue N2 = N->getOperand(1);
5713 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5714 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5715 Offset += V->getSExtValue();
5716 return true;
5717 }
5718 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5719 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5720 Offset += V->getSExtValue();
5721 return true;
5722 }
5723 }
5724 }
5725
5726 return false;
5727}
5728
5730 DAGCombinerInfo &DCI) const {
5731 // Default implementation: no optimization.
5732 return SDValue();
5733}
5734
5735//===----------------------------------------------------------------------===//
5736// Inline Assembler Implementation Methods
5737//===----------------------------------------------------------------------===//
5738
5741 unsigned S = Constraint.size();
5742
5743 if (S == 1) {
5744 switch (Constraint[0]) {
5745 default: break;
5746 case 'r':
5747 return C_RegisterClass;
5748 case 'm': // memory
5749 case 'o': // offsetable
5750 case 'V': // not offsetable
5751 return C_Memory;
5752 case 'p': // Address.
5753 return C_Address;
5754 case 'n': // Simple Integer
5755 case 'E': // Floating Point Constant
5756 case 'F': // Floating Point Constant
5757 return C_Immediate;
5758 case 'i': // Simple Integer or Relocatable Constant
5759 case 's': // Relocatable Constant
5760 case 'X': // Allow ANY value.
5761 case 'I': // Target registers.
5762 case 'J':
5763 case 'K':
5764 case 'L':
5765 case 'M':
5766 case 'N':
5767 case 'O':
5768 case 'P':
5769 case '<':
5770 case '>':
5771 return C_Other;
5772 }
5773 }
5774
5775 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5776 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5777 return C_Memory;
5778 return C_Register;
5779 }
5780 return C_Unknown;
5781}
5782
5783/// Try to replace an X constraint, which matches anything, with another that
5784/// has more specific requirements based on the type of the corresponding
5785/// operand.
5786const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5787 if (ConstraintVT.isInteger())
5788 return "r";
5789 if (ConstraintVT.isFloatingPoint())
5790 return "f"; // works for many targets
5791 return nullptr;
5792}
5793
5795 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5796 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5797 return SDValue();
5798}
5799
5800/// Lower the specified operand into the Ops vector.
5801/// If it is invalid, don't add anything to Ops.
5803 StringRef Constraint,
5804 std::vector<SDValue> &Ops,
5805 SelectionDAG &DAG) const {
5806
5807 if (Constraint.size() > 1)
5808 return;
5809
5810 char ConstraintLetter = Constraint[0];
5811 switch (ConstraintLetter) {
5812 default: break;
5813 case 'X': // Allows any operand
5814 case 'i': // Simple Integer or Relocatable Constant
5815 case 'n': // Simple Integer
5816 case 's': { // Relocatable Constant
5817
5819 uint64_t Offset = 0;
5820
5821 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5822 // etc., since getelementpointer is variadic. We can't use
5823 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5824 // while in this case the GA may be furthest from the root node which is
5825 // likely an ISD::ADD.
5826 while (true) {
5827 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5828 // gcc prints these as sign extended. Sign extend value to 64 bits
5829 // now; without this it would get ZExt'd later in
5830 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5831 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5832 BooleanContent BCont = getBooleanContents(MVT::i64);
5833 ISD::NodeType ExtOpc =
5834 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5835 int64_t ExtVal =
5836 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5837 Ops.push_back(
5838 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5839 return;
5840 }
5841 if (ConstraintLetter != 'n') {
5842 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5843 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5844 GA->getValueType(0),
5845 Offset + GA->getOffset()));
5846 return;
5847 }
5848 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5849 Ops.push_back(DAG.getTargetBlockAddress(
5850 BA->getBlockAddress(), BA->getValueType(0),
5851 Offset + BA->getOffset(), BA->getTargetFlags()));
5852 return;
5853 }
5855 Ops.push_back(Op);
5856 return;
5857 }
5858 }
5859 const unsigned OpCode = Op.getOpcode();
5860 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5861 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5862 Op = Op.getOperand(1);
5863 // Subtraction is not commutative.
5864 else if (OpCode == ISD::ADD &&
5865 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5866 Op = Op.getOperand(0);
5867 else
5868 return;
5869 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5870 continue;
5871 }
5872 return;
5873 }
5874 break;
5875 }
5876 }
5877}
5878
5882
5883std::pair<unsigned, const TargetRegisterClass *>
5885 StringRef Constraint,
5886 MVT VT) const {
5887 if (!Constraint.starts_with("{"))
5888 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5889 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5890
5891 // Remove the braces from around the name.
5892 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5893
5894 std::pair<unsigned, const TargetRegisterClass *> R =
5895 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5896
5897 // Figure out which register class contains this reg.
5898 for (const TargetRegisterClass *RC : RI->regclasses()) {
5899 // If none of the value types for this register class are valid, we
5900 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5901 if (!isLegalRC(*RI, *RC))
5902 continue;
5903
5904 for (const MCPhysReg &PR : *RC) {
5905 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5906 std::pair<unsigned, const TargetRegisterClass *> S =
5907 std::make_pair(PR, RC);
5908
5909 // If this register class has the requested value type, return it,
5910 // otherwise keep searching and return the first class found
5911 // if no other is found which explicitly has the requested type.
5912 if (RI->isTypeLegalForClass(*RC, VT))
5913 return S;
5914 if (!R.second)
5915 R = S;
5916 }
5917 }
5918 }
5919
5920 return R;
5921}
5922
5923//===----------------------------------------------------------------------===//
5924// Constraint Selection.
5925
5926/// Return true of this is an input operand that is a matching constraint like
5927/// "4".
5929 assert(!ConstraintCode.empty() && "No known constraint!");
5930 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5931}
5932
5933/// If this is an input matching constraint, this method returns the output
5934/// operand it matches.
5936 assert(!ConstraintCode.empty() && "No known constraint!");
5937 return atoi(ConstraintCode.c_str());
5938}
5939
5940/// Split up the constraint string from the inline assembly value into the
5941/// specific constraints and their prefixes, and also tie in the associated
5942/// operand values.
5943/// If this returns an empty vector, and if the constraint string itself
5944/// isn't empty, there was an error parsing.
5947 const TargetRegisterInfo *TRI,
5948 const CallBase &Call) const {
5949 /// Information about all of the constraints.
5950 AsmOperandInfoVector ConstraintOperands;
5951 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5952 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5953
5954 // Do a prepass over the constraints, canonicalizing them, and building up the
5955 // ConstraintOperands list.
5956 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5957 unsigned ResNo = 0; // ResNo - The result number of the next output.
5958 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5959
5960 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5961 ConstraintOperands.emplace_back(std::move(CI));
5962 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5963
5964 // Update multiple alternative constraint count.
5965 if (OpInfo.multipleAlternatives.size() > maCount)
5966 maCount = OpInfo.multipleAlternatives.size();
5967
5968 OpInfo.ConstraintVT = MVT::Other;
5969
5970 // Compute the value type for each operand.
5971 switch (OpInfo.Type) {
5972 case InlineAsm::isOutput: {
5973 // Indirect outputs just consume an argument.
5974 if (OpInfo.isIndirect) {
5975 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5976 break;
5977 }
5978
5979 // The return value of the call is this value. As such, there is no
5980 // corresponding argument.
5981 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5982 EVT VT;
5983 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5984 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
5985 } else {
5986 assert(ResNo == 0 && "Asm only has one result!");
5987 VT = getAsmOperandValueType(DL, Call.getType());
5988 }
5989 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5990 ++ResNo;
5991 break;
5992 }
5993 case InlineAsm::isInput:
5994 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5995 break;
5996 case InlineAsm::isLabel:
5997 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5998 ++LabelNo;
5999 continue;
6001 // Nothing to do.
6002 break;
6003 }
6004
6005 if (OpInfo.CallOperandVal) {
6006 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6007 if (OpInfo.isIndirect) {
6008 OpTy = Call.getParamElementType(ArgNo);
6009 assert(OpTy && "Indirect operand must have elementtype attribute");
6010 }
6011
6012 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6013 if (StructType *STy = dyn_cast<StructType>(OpTy))
6014 if (STy->getNumElements() == 1)
6015 OpTy = STy->getElementType(0);
6016
6017 // If OpTy is not a single value, it may be a struct/union that we
6018 // can tile with integers.
6019 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6020 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6021 switch (BitSize) {
6022 default: break;
6023 case 1:
6024 case 8:
6025 case 16:
6026 case 32:
6027 case 64:
6028 case 128:
6029 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6030 break;
6031 }
6032 }
6033
6034 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6035 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6036 ArgNo++;
6037 }
6038 }
6039
6040 // If we have multiple alternative constraints, select the best alternative.
6041 if (!ConstraintOperands.empty()) {
6042 if (maCount) {
6043 unsigned bestMAIndex = 0;
6044 int bestWeight = -1;
6045 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6046 int weight = -1;
6047 unsigned maIndex;
6048 // Compute the sums of the weights for each alternative, keeping track
6049 // of the best (highest weight) one so far.
6050 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6051 int weightSum = 0;
6052 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6053 cIndex != eIndex; ++cIndex) {
6054 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6055 if (OpInfo.Type == InlineAsm::isClobber)
6056 continue;
6057
6058 // If this is an output operand with a matching input operand,
6059 // look up the matching input. If their types mismatch, e.g. one
6060 // is an integer, the other is floating point, or their sizes are
6061 // different, flag it as an maCantMatch.
6062 if (OpInfo.hasMatchingInput()) {
6063 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6064 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6065 if ((OpInfo.ConstraintVT.isInteger() !=
6066 Input.ConstraintVT.isInteger()) ||
6067 (OpInfo.ConstraintVT.getSizeInBits() !=
6068 Input.ConstraintVT.getSizeInBits())) {
6069 weightSum = -1; // Can't match.
6070 break;
6071 }
6072 }
6073 }
6074 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6075 if (weight == -1) {
6076 weightSum = -1;
6077 break;
6078 }
6079 weightSum += weight;
6080 }
6081 // Update best.
6082 if (weightSum > bestWeight) {
6083 bestWeight = weightSum;
6084 bestMAIndex = maIndex;
6085 }
6086 }
6087
6088 // Now select chosen alternative in each constraint.
6089 for (AsmOperandInfo &cInfo : ConstraintOperands)
6090 if (cInfo.Type != InlineAsm::isClobber)
6091 cInfo.selectAlternative(bestMAIndex);
6092 }
6093 }
6094
6095 // Check and hook up tied operands, choose constraint code to use.
6096 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6097 cIndex != eIndex; ++cIndex) {
6098 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6099
6100 // If this is an output operand with a matching input operand, look up the
6101 // matching input. If their types mismatch, e.g. one is an integer, the
6102 // other is floating point, or their sizes are different, flag it as an
6103 // error.
6104 if (OpInfo.hasMatchingInput()) {
6105 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6106
6107 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6108 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6109 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6110 OpInfo.ConstraintVT);
6111 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6112 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6113 Input.ConstraintVT);
6114 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6115 OpInfo.ConstraintVT.isFloatingPoint();
6116 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6117 Input.ConstraintVT.isFloatingPoint();
6118 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6119 (MatchRC.second != InputRC.second)) {
6120 report_fatal_error("Unsupported asm: input constraint"
6121 " with a matching output constraint of"
6122 " incompatible type!");
6123 }
6124 }
6125 }
6126 }
6127
6128 return ConstraintOperands;
6129}
6130
6131/// Return a number indicating our preference for chosing a type of constraint
6132/// over another, for the purpose of sorting them. Immediates are almost always
6133/// preferrable (when they can be emitted). A higher return value means a
6134/// stronger preference for one constraint type relative to another.
6135/// FIXME: We should prefer registers over memory but doing so may lead to
6136/// unrecoverable register exhaustion later.
6137/// https://github.com/llvm/llvm-project/issues/20571
6139 switch (CT) {
6142 return 4;
6145 return 3;
6147 return 2;
6149 return 1;
6151 return 0;
6152 }
6153 llvm_unreachable("Invalid constraint type");
6154}
6155
6156/// Examine constraint type and operand type and determine a weight value.
6157/// This object must already have been set up with the operand type
6158/// and the current alternative constraint selected.
6161 AsmOperandInfo &info, int maIndex) const {
6163 if (maIndex >= (int)info.multipleAlternatives.size())
6164 rCodes = &info.Codes;
6165 else
6166 rCodes = &info.multipleAlternatives[maIndex].Codes;
6167 ConstraintWeight BestWeight = CW_Invalid;
6168
6169 // Loop over the options, keeping track of the most general one.
6170 for (const std::string &rCode : *rCodes) {
6171 ConstraintWeight weight =
6172 getSingleConstraintMatchWeight(info, rCode.c_str());
6173 if (weight > BestWeight)
6174 BestWeight = weight;
6175 }
6176
6177 return BestWeight;
6178}
6179
6180/// Examine constraint type and operand type and determine a weight value.
6181/// This object must already have been set up with the operand type
6182/// and the current alternative constraint selected.
6185 AsmOperandInfo &info, const char *constraint) const {
6187 Value *CallOperandVal = info.CallOperandVal;
6188 // If we don't have a value, we can't do a match,
6189 // but allow it at the lowest weight.
6190 if (!CallOperandVal)
6191 return CW_Default;
6192 // Look at the constraint type.
6193 switch (*constraint) {
6194 case 'i': // immediate integer.
6195 case 'n': // immediate integer with a known value.
6196 if (isa<ConstantInt>(CallOperandVal))
6197 weight = CW_Constant;
6198 break;
6199 case 's': // non-explicit intregal immediate.
6200 if (isa<GlobalValue>(CallOperandVal))
6201 weight = CW_Constant;
6202 break;
6203 case 'E': // immediate float if host format.
6204 case 'F': // immediate float.
6205 if (isa<ConstantFP>(CallOperandVal))
6206 weight = CW_Constant;
6207 break;
6208 case '<': // memory operand with autodecrement.
6209 case '>': // memory operand with autoincrement.
6210 case 'm': // memory operand.
6211 case 'o': // offsettable memory operand
6212 case 'V': // non-offsettable memory operand
6213 weight = CW_Memory;
6214 break;
6215 case 'r': // general register.
6216 case 'g': // general register, memory operand or immediate integer.
6217 // note: Clang converts "g" to "imr".
6218 if (CallOperandVal->getType()->isIntegerTy())
6219 weight = CW_Register;
6220 break;
6221 case 'X': // any operand.
6222 default:
6223 weight = CW_Default;
6224 break;
6225 }
6226 return weight;
6227}
6228
6229/// If there are multiple different constraints that we could pick for this
6230/// operand (e.g. "imr") try to pick the 'best' one.
6231/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6232/// into seven classes:
6233/// Register -> one specific register
6234/// RegisterClass -> a group of regs
6235/// Memory -> memory
6236/// Address -> a symbolic memory reference
6237/// Immediate -> immediate values
6238/// Other -> magic values (such as "Flag Output Operands")
6239/// Unknown -> something we don't recognize yet and can't handle
6240/// Ideally, we would pick the most specific constraint possible: if we have
6241/// something that fits into a register, we would pick it. The problem here
6242/// is that if we have something that could either be in a register or in
6243/// memory that use of the register could cause selection of *other*
6244/// operands to fail: they might only succeed if we pick memory. Because of
6245/// this the heuristic we use is:
6246///
6247/// 1) If there is an 'other' constraint, and if the operand is valid for
6248/// that constraint, use it. This makes us take advantage of 'i'
6249/// constraints when available.
6250/// 2) Otherwise, pick the most general constraint present. This prefers
6251/// 'm' over 'r', for example.
6252///
6254 TargetLowering::AsmOperandInfo &OpInfo) const {
6255 ConstraintGroup Ret;
6256
6257 Ret.reserve(OpInfo.Codes.size());
6258 for (StringRef Code : OpInfo.Codes) {
6260
6261 // Indirect 'other' or 'immediate' constraints are not allowed.
6262 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6263 CType == TargetLowering::C_Register ||
6265 continue;
6266
6267 // Things with matching constraints can only be registers, per gcc
6268 // documentation. This mainly affects "g" constraints.
6269 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6270 continue;
6271
6272 Ret.emplace_back(Code, CType);
6273 }
6274
6276 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6277 });
6278
6279 return Ret;
6280}
6281
6282/// If we have an immediate, see if we can lower it. Return true if we can,
6283/// false otherwise.
6285 SDValue Op, SelectionDAG *DAG,
6286 const TargetLowering &TLI) {
6287
6288 assert((P.second == TargetLowering::C_Other ||
6289 P.second == TargetLowering::C_Immediate) &&
6290 "need immediate or other");
6291
6292 if (!Op.getNode())
6293 return false;
6294
6295 std::vector<SDValue> ResultOps;
6296 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6297 return !ResultOps.empty();
6298}
6299
6300/// Determines the constraint code and constraint type to use for the specific
6301/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6303 SDValue Op,
6304 SelectionDAG *DAG) const {
6305 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6306
6307 // Single-letter constraints ('r') are very common.
6308 if (OpInfo.Codes.size() == 1) {
6309 OpInfo.ConstraintCode = OpInfo.Codes[0];
6310 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6311 } else {
6313 if (G.empty())
6314 return;
6315
6316 unsigned BestIdx = 0;
6317 for (const unsigned E = G.size();
6318 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6319 G[BestIdx].second == TargetLowering::C_Immediate);
6320 ++BestIdx) {
6321 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6322 break;
6323 // If we're out of constraints, just pick the first one.
6324 if (BestIdx + 1 == E) {
6325 BestIdx = 0;
6326 break;
6327 }
6328 }
6329
6330 OpInfo.ConstraintCode = G[BestIdx].first;
6331 OpInfo.ConstraintType = G[BestIdx].second;
6332 }
6333
6334 // 'X' matches anything.
6335 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6336 // Constants are handled elsewhere. For Functions, the type here is the
6337 // type of the result, which is not what we want to look at; leave them
6338 // alone.
6339 Value *v = OpInfo.CallOperandVal;
6340 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6341 return;
6342 }
6343
6344 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6345 OpInfo.ConstraintCode = "i";
6346 return;
6347 }
6348
6349 // Otherwise, try to resolve it to something we know about by looking at
6350 // the actual operand type.
6351 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6352 OpInfo.ConstraintCode = Repl;
6353 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6354 }
6355 }
6356}
6357
6358/// Given an exact SDIV by a constant, create a multiplication
6359/// with the multiplicative inverse of the constant.
6360/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6362 const SDLoc &dl, SelectionDAG &DAG,
6363 SmallVectorImpl<SDNode *> &Created) {
6364 SDValue Op0 = N->getOperand(0);
6365 SDValue Op1 = N->getOperand(1);
6366 EVT VT = N->getValueType(0);
6367 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6368 EVT ShSVT = ShVT.getScalarType();
6369
6370 bool UseSRA = false;
6371 SmallVector<SDValue, 16> Shifts, Factors;
6372
6373 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6374 if (C->isZero())
6375 return false;
6376
6377 EVT CT = C->getValueType(0);
6378 APInt Divisor = C->getAPIntValue();
6379 unsigned Shift = Divisor.countr_zero();
6380 if (Shift) {
6381 Divisor.ashrInPlace(Shift);
6382 UseSRA = true;
6383 }
6384 APInt Factor = Divisor.multiplicativeInverse();
6385 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6386 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6387 return true;
6388 };
6389
6390 // Collect all magic values from the build vector.
6391 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false,
6392 /*AllowTruncation=*/true))
6393 return SDValue();
6394
6395 SDValue Shift, Factor;
6396 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6397 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6398 Factor = DAG.getBuildVector(VT, dl, Factors);
6399 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6400 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6401 "Expected matchUnaryPredicate to return one element for scalable "
6402 "vectors");
6403 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6404 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6405 } else {
6406 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6407 Shift = Shifts[0];
6408 Factor = Factors[0];
6409 }
6410
6411 SDValue Res = Op0;
6412 if (UseSRA) {
6413 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6414 Created.push_back(Res.getNode());
6415 }
6416
6417 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6418}
6419
6420/// Given an exact UDIV by a constant, create a multiplication
6421/// with the multiplicative inverse of the constant.
6422/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6424 const SDLoc &dl, SelectionDAG &DAG,
6425 SmallVectorImpl<SDNode *> &Created) {
6426 EVT VT = N->getValueType(0);
6427 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6428 EVT ShSVT = ShVT.getScalarType();
6429
6430 bool UseSRL = false;
6431 SmallVector<SDValue, 16> Shifts, Factors;
6432
6433 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6434 if (C->isZero())
6435 return false;
6436
6437 EVT CT = C->getValueType(0);
6438 APInt Divisor = C->getAPIntValue();
6439 unsigned Shift = Divisor.countr_zero();
6440 if (Shift) {
6441 Divisor.lshrInPlace(Shift);
6442 UseSRL = true;
6443 }
6444 // Calculate the multiplicative inverse modulo BW.
6445 APInt Factor = Divisor.multiplicativeInverse();
6446 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6447 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6448 return true;
6449 };
6450
6451 SDValue Op1 = N->getOperand(1);
6452
6453 // Collect all magic values from the build vector.
6454 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false,
6455 /*AllowTruncation=*/true))
6456 return SDValue();
6457
6458 SDValue Shift, Factor;
6459 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6460 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6461 Factor = DAG.getBuildVector(VT, dl, Factors);
6462 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6463 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6464 "Expected matchUnaryPredicate to return one element for scalable "
6465 "vectors");
6466 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6467 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6468 } else {
6469 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6470 Shift = Shifts[0];
6471 Factor = Factors[0];
6472 }
6473
6474 SDValue Res = N->getOperand(0);
6475 if (UseSRL) {
6476 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6477 Created.push_back(Res.getNode());
6478 }
6479
6480 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6481}
6482
6484 SelectionDAG &DAG,
6485 SmallVectorImpl<SDNode *> &Created) const {
6486 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6487 if (isIntDivCheap(N->getValueType(0), Attr))
6488 return SDValue(N, 0); // Lower SDIV as SDIV
6489 return SDValue();
6490}
6491
6492SDValue
6494 SelectionDAG &DAG,
6495 SmallVectorImpl<SDNode *> &Created) const {
6496 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6497 if (isIntDivCheap(N->getValueType(0), Attr))
6498 return SDValue(N, 0); // Lower SREM as SREM
6499 return SDValue();
6500}
6501
6502/// Build sdiv by power-of-2 with conditional move instructions
6503/// Ref: "Hacker's Delight" by Henry Warren 10-1
6504/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6505/// bgez x, label
6506/// add x, x, 2**k-1
6507/// label:
6508/// sra res, x, k
6509/// neg res, res (when the divisor is negative)
6511 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6512 SmallVectorImpl<SDNode *> &Created) const {
6513 unsigned Lg2 = Divisor.countr_zero();
6514 EVT VT = N->getValueType(0);
6515
6516 SDLoc DL(N);
6517 SDValue N0 = N->getOperand(0);
6518 SDValue Zero = DAG.getConstant(0, DL, VT);
6519 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6520 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6521
6522 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6523 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6524 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6525 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6526 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6527
6528 Created.push_back(Cmp.getNode());
6529 Created.push_back(Add.getNode());
6530 Created.push_back(CMov.getNode());
6531
6532 // Divide by pow2.
6533 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6534 DAG.getShiftAmountConstant(Lg2, VT, DL));
6535
6536 // If we're dividing by a positive value, we're done. Otherwise, we must
6537 // negate the result.
6538 if (Divisor.isNonNegative())
6539 return SRA;
6540
6541 Created.push_back(SRA.getNode());
6542 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6543}
6544
6545/// Given an ISD::SDIV node expressing a divide by constant,
6546/// return a DAG expression to select that will generate the same value by
6547/// multiplying by a magic number.
6548/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6550 bool IsAfterLegalization,
6551 bool IsAfterLegalTypes,
6552 SmallVectorImpl<SDNode *> &Created) const {
6553 SDLoc dl(N);
6554 EVT VT = N->getValueType(0);
6555 EVT SVT = VT.getScalarType();
6556 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6557 EVT ShSVT = ShVT.getScalarType();
6558 unsigned EltBits = VT.getScalarSizeInBits();
6559 EVT MulVT;
6560
6561 // Check to see if we can do this.
6562 // FIXME: We should be more aggressive here.
6563 if (!isTypeLegal(VT)) {
6564 // Limit this to simple scalars for now.
6565 if (VT.isVector() || !VT.isSimple())
6566 return SDValue();
6567
6568 // If this type will be promoted to a large enough type with a legal
6569 // multiply operation, we can go ahead and do this transform.
6571 return SDValue();
6572
6573 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6574 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6575 !isOperationLegal(ISD::MUL, MulVT))
6576 return SDValue();
6577 }
6578
6579 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6580 if (N->getFlags().hasExact())
6581 return BuildExactSDIV(*this, N, dl, DAG, Created);
6582
6583 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6584
6585 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6586 if (C->isZero())
6587 return false;
6588 // Truncate the divisor to the target scalar type in case it was promoted
6589 // during type legalization.
6590 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6592 int NumeratorFactor = 0;
6593 int ShiftMask = -1;
6594
6595 if (Divisor.isOne() || Divisor.isAllOnes()) {
6596 // If d is +1/-1, we just multiply the numerator by +1/-1.
6597 NumeratorFactor = Divisor.getSExtValue();
6598 magics.Magic = 0;
6599 magics.ShiftAmount = 0;
6600 ShiftMask = 0;
6601 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6602 // If d > 0 and m < 0, add the numerator.
6603 NumeratorFactor = 1;
6604 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6605 // If d < 0 and m > 0, subtract the numerator.
6606 NumeratorFactor = -1;
6607 }
6608
6609 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6610 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6611 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6612 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6613 return true;
6614 };
6615
6616 SDValue N0 = N->getOperand(0);
6617 SDValue N1 = N->getOperand(1);
6618
6619 // Collect the shifts / magic values from each element.
6620 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6621 /*AllowTruncation=*/true))
6622 return SDValue();
6623
6624 SDValue MagicFactor, Factor, Shift, ShiftMask;
6625 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6626 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6627 Factor = DAG.getBuildVector(VT, dl, Factors);
6628 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6629 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6630 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6631 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6632 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6633 "Expected matchUnaryPredicate to return one element for scalable "
6634 "vectors");
6635 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6636 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6637 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6638 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6639 } else {
6640 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6641 MagicFactor = MagicFactors[0];
6642 Factor = Factors[0];
6643 Shift = Shifts[0];
6644 ShiftMask = ShiftMasks[0];
6645 }
6646
6647 // Multiply the numerator (operand 0) by the magic value.
6648 // FIXME: We should support doing a MUL in a wider type.
6649 auto GetMULHS = [&](SDValue X, SDValue Y) {
6650 // If the type isn't legal, use a wider mul of the type calculated
6651 // earlier.
6652 if (!isTypeLegal(VT)) {
6653 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6654 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6655 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6656 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6657 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6658 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6659 }
6660
6661 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6662 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6663 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6664 SDValue LoHi =
6665 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6666 return SDValue(LoHi.getNode(), 1);
6667 }
6668 // If type twice as wide legal, widen and use a mul plus a shift.
6669 unsigned Size = VT.getScalarSizeInBits();
6670 EVT WideVT = VT.changeElementType(
6671 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), Size * 2));
6672 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6673 // custom lowered. This is very expensive so avoid it at all costs for
6674 // constant divisors.
6675 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6678 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6679 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6680 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6681 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6682 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6683 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6684 }
6685 return SDValue();
6686 };
6687
6688 SDValue Q = GetMULHS(N0, MagicFactor);
6689 if (!Q)
6690 return SDValue();
6691
6692 Created.push_back(Q.getNode());
6693
6694 // (Optionally) Add/subtract the numerator using Factor.
6695 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6696 Created.push_back(Factor.getNode());
6697 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6698 Created.push_back(Q.getNode());
6699
6700 // Shift right algebraic by shift value.
6701 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6702 Created.push_back(Q.getNode());
6703
6704 // Extract the sign bit, mask it and add it to the quotient.
6705 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6706 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6707 Created.push_back(T.getNode());
6708 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6709 Created.push_back(T.getNode());
6710 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6711}
6712
6713/// Given an ISD::UDIV node expressing a divide by constant,
6714/// return a DAG expression to select that will generate the same value by
6715/// multiplying by a magic number.
6716/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6718 bool IsAfterLegalization,
6719 bool IsAfterLegalTypes,
6720 SmallVectorImpl<SDNode *> &Created) const {
6721 SDLoc dl(N);
6722 EVT VT = N->getValueType(0);
6723 EVT SVT = VT.getScalarType();
6724 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6725 EVT ShSVT = ShVT.getScalarType();
6726 unsigned EltBits = VT.getScalarSizeInBits();
6727 EVT MulVT;
6728
6729 // Check to see if we can do this.
6730 // FIXME: We should be more aggressive here.
6731 if (!isTypeLegal(VT)) {
6732 // Limit this to simple scalars for now.
6733 if (VT.isVector() || !VT.isSimple())
6734 return SDValue();
6735
6736 // If this type will be promoted to a large enough type with a legal
6737 // multiply operation, we can go ahead and do this transform.
6739 return SDValue();
6740
6741 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6742 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6743 !isOperationLegal(ISD::MUL, MulVT))
6744 return SDValue();
6745 }
6746
6747 // If the udiv has an 'exact' bit we can use a simpler lowering.
6748 if (N->getFlags().hasExact())
6749 return BuildExactUDIV(*this, N, dl, DAG, Created);
6750
6751 SDValue N0 = N->getOperand(0);
6752 SDValue N1 = N->getOperand(1);
6753
6754 // Try to use leading zeros of the dividend to reduce the multiplier and
6755 // avoid expensive fixups.
6756 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6757
6758 // If we're after type legalization and SVT is not legal, use the
6759 // promoted type for creating constants to avoid creating nodes with
6760 // illegal types.
6761 if (IsAfterLegalTypes && VT.isVector()) {
6762 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6763 if (SVT.bitsLT(VT.getScalarType()))
6764 return SDValue();
6765 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6766 if (ShSVT.bitsLT(ShVT.getScalarType()))
6767 return SDValue();
6768 }
6769 const unsigned SVTBits = SVT.getSizeInBits();
6770
6771 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6772 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6773
6774 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6775 if (C->isZero())
6776 return false;
6777 // Truncate the divisor to the target scalar type in case it was promoted
6778 // during type legalization.
6779 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6780
6781 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6782
6783 // Magic algorithm doesn't work for division by 1. We need to emit a select
6784 // at the end.
6785 if (Divisor.isOne()) {
6786 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6787 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6788 } else {
6791 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6792
6793 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6794
6795 assert(magics.PreShift < Divisor.getBitWidth() &&
6796 "We shouldn't generate an undefined shift!");
6797 assert(magics.PostShift < Divisor.getBitWidth() &&
6798 "We shouldn't generate an undefined shift!");
6799 assert((!magics.IsAdd || magics.PreShift == 0) &&
6800 "Unexpected pre-shift");
6801 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6802 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6803 NPQFactor = DAG.getConstant(
6804 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6805 : APInt::getZero(SVTBits),
6806 dl, SVT);
6807 UseNPQ |= magics.IsAdd;
6808 UsePreShift |= magics.PreShift != 0;
6809 UsePostShift |= magics.PostShift != 0;
6810 }
6811
6812 PreShifts.push_back(PreShift);
6813 MagicFactors.push_back(MagicFactor);
6814 NPQFactors.push_back(NPQFactor);
6815 PostShifts.push_back(PostShift);
6816 return true;
6817 };
6818
6819 // Collect the shifts/magic values from each element.
6820 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6821 /*AllowTruncation=*/true))
6822 return SDValue();
6823
6824 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6825 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6826 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6827 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6828 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6829 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6830 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6831 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6832 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6833 "Expected matchUnaryPredicate to return one for scalable vectors");
6834 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6835 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6836 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6837 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6838 } else {
6839 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6840 PreShift = PreShifts[0];
6841 MagicFactor = MagicFactors[0];
6842 PostShift = PostShifts[0];
6843 }
6844
6845 SDValue Q = N0;
6846 if (UsePreShift) {
6847 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6848 Created.push_back(Q.getNode());
6849 }
6850
6851 // FIXME: We should support doing a MUL in a wider type.
6852 auto GetMULHU = [&](SDValue X, SDValue Y) {
6853 // If the type isn't legal, use a wider mul of the type calculated
6854 // earlier.
6855 if (!isTypeLegal(VT)) {
6856 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6857 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6858 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6859 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6860 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6861 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6862 }
6863
6864 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6865 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6866 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6867 SDValue LoHi =
6868 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6869 return SDValue(LoHi.getNode(), 1);
6870 }
6871 // If type twice as wide legal, widen and use a mul plus a shift.
6872 unsigned Size = VT.getScalarSizeInBits();
6873 EVT WideVT = VT.changeElementType(
6874 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), Size * 2));
6875 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6876 // custom lowered. This is very expensive so avoid it at all costs for
6877 // constant divisors.
6878 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6881 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6882 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6883 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6884 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6885 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6886 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6887 }
6888 return SDValue(); // No mulhu or equivalent
6889 };
6890
6891 // Multiply the numerator (operand 0) by the magic value.
6892 Q = GetMULHU(Q, MagicFactor);
6893 if (!Q)
6894 return SDValue();
6895
6896 Created.push_back(Q.getNode());
6897
6898 if (UseNPQ) {
6899 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6900 Created.push_back(NPQ.getNode());
6901
6902 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6903 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6904 if (VT.isVector())
6905 NPQ = GetMULHU(NPQ, NPQFactor);
6906 else
6907 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6908
6909 Created.push_back(NPQ.getNode());
6910
6911 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6912 Created.push_back(Q.getNode());
6913 }
6914
6915 if (UsePostShift) {
6916 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6917 Created.push_back(Q.getNode());
6918 }
6919
6920 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6921
6922 SDValue One = DAG.getConstant(1, dl, VT);
6923 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6924 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6925}
6926
6927/// If all values in Values that *don't* match the predicate are same 'splat'
6928/// value, then replace all values with that splat value.
6929/// Else, if AlternativeReplacement was provided, then replace all values that
6930/// do match predicate with AlternativeReplacement value.
6931static void
6933 std::function<bool(SDValue)> Predicate,
6934 SDValue AlternativeReplacement = SDValue()) {
6935 SDValue Replacement;
6936 // Is there a value for which the Predicate does *NOT* match? What is it?
6937 auto SplatValue = llvm::find_if_not(Values, Predicate);
6938 if (SplatValue != Values.end()) {
6939 // Does Values consist only of SplatValue's and values matching Predicate?
6940 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6941 return Value == *SplatValue || Predicate(Value);
6942 })) // Then we shall replace values matching predicate with SplatValue.
6943 Replacement = *SplatValue;
6944 }
6945 if (!Replacement) {
6946 // Oops, we did not find the "baseline" splat value.
6947 if (!AlternativeReplacement)
6948 return; // Nothing to do.
6949 // Let's replace with provided value then.
6950 Replacement = AlternativeReplacement;
6951 }
6952 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6953}
6954
6955/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6956/// where the divisor is constant and the comparison target is zero,
6957/// return a DAG expression that will generate the same comparison result
6958/// using only multiplications, additions and shifts/rotations.
6959/// Ref: "Hacker's Delight" 10-17.
6960SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6961 SDValue CompTargetNode,
6963 DAGCombinerInfo &DCI,
6964 const SDLoc &DL) const {
6966 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6967 DCI, DL, Built)) {
6968 for (SDNode *N : Built)
6969 DCI.AddToWorklist(N);
6970 return Folded;
6971 }
6972
6973 return SDValue();
6974}
6975
6976SDValue
6977TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6978 SDValue CompTargetNode, ISD::CondCode Cond,
6979 DAGCombinerInfo &DCI, const SDLoc &DL,
6980 SmallVectorImpl<SDNode *> &Created) const {
6981 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6982 // - D must be constant, with D = D0 * 2^K where D0 is odd
6983 // - P is the multiplicative inverse of D0 modulo 2^W
6984 // - Q = floor(((2^W) - 1) / D)
6985 // where W is the width of the common type of N and D.
6986 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6987 "Only applicable for (in)equality comparisons.");
6988
6989 SelectionDAG &DAG = DCI.DAG;
6990
6991 EVT VT = REMNode.getValueType();
6992 EVT SVT = VT.getScalarType();
6993 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6994 EVT ShSVT = ShVT.getScalarType();
6995
6996 // If MUL is unavailable, we cannot proceed in any case.
6997 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6998 return SDValue();
6999
7000 bool ComparingWithAllZeros = true;
7001 bool AllComparisonsWithNonZerosAreTautological = true;
7002 bool HadTautologicalLanes = false;
7003 bool AllLanesAreTautological = true;
7004 bool HadEvenDivisor = false;
7005 bool AllDivisorsArePowerOfTwo = true;
7006 bool HadTautologicalInvertedLanes = false;
7007 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7008
7009 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7010 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7011 if (CDiv->isZero())
7012 return false;
7013
7014 const APInt &D = CDiv->getAPIntValue();
7015 const APInt &Cmp = CCmp->getAPIntValue();
7016
7017 ComparingWithAllZeros &= Cmp.isZero();
7018
7019 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7020 // if C2 is not less than C1, the comparison is always false.
7021 // But we will only be able to produce the comparison that will give the
7022 // opposive tautological answer. So this lane would need to be fixed up.
7023 bool TautologicalInvertedLane = D.ule(Cmp);
7024 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7025
7026 // If all lanes are tautological (either all divisors are ones, or divisor
7027 // is not greater than the constant we are comparing with),
7028 // we will prefer to avoid the fold.
7029 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7030 HadTautologicalLanes |= TautologicalLane;
7031 AllLanesAreTautological &= TautologicalLane;
7032
7033 // If we are comparing with non-zero, we need'll need to subtract said
7034 // comparison value from the LHS. But there is no point in doing that if
7035 // every lane where we are comparing with non-zero is tautological..
7036 if (!Cmp.isZero())
7037 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7038
7039 // Decompose D into D0 * 2^K
7040 unsigned K = D.countr_zero();
7041 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7042 APInt D0 = D.lshr(K);
7043
7044 // D is even if it has trailing zeros.
7045 HadEvenDivisor |= (K != 0);
7046 // D is a power-of-two if D0 is one.
7047 // If all divisors are power-of-two, we will prefer to avoid the fold.
7048 AllDivisorsArePowerOfTwo &= D0.isOne();
7049
7050 // P = inv(D0, 2^W)
7051 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7052 unsigned W = D.getBitWidth();
7053 APInt P = D0.multiplicativeInverse();
7054 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7055
7056 // Q = floor((2^W - 1) u/ D)
7057 // R = ((2^W - 1) u% D)
7058 APInt Q, R;
7060
7061 // If we are comparing with zero, then that comparison constant is okay,
7062 // else it may need to be one less than that.
7063 if (Cmp.ugt(R))
7064 Q -= 1;
7065
7067 "We are expecting that K is always less than all-ones for ShSVT");
7068
7069 // If the lane is tautological the result can be constant-folded.
7070 if (TautologicalLane) {
7071 // Set P and K amount to a bogus values so we can try to splat them.
7072 P = 0;
7073 K = -1;
7074 // And ensure that comparison constant is tautological,
7075 // it will always compare true/false.
7076 Q = -1;
7077 }
7078
7079 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7080 KAmts.push_back(
7081 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7082 /*implicitTrunc=*/true),
7083 DL, ShSVT));
7084 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7085 return true;
7086 };
7087
7088 SDValue N = REMNode.getOperand(0);
7089 SDValue D = REMNode.getOperand(1);
7090
7091 // Collect the values from each element.
7092 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7093 return SDValue();
7094
7095 // If all lanes are tautological, the result can be constant-folded.
7096 if (AllLanesAreTautological)
7097 return SDValue();
7098
7099 // If this is a urem by a powers-of-two, avoid the fold since it can be
7100 // best implemented as a bit test.
7101 if (AllDivisorsArePowerOfTwo)
7102 return SDValue();
7103
7104 SDValue PVal, KVal, QVal;
7105 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7106 if (HadTautologicalLanes) {
7107 // Try to turn PAmts into a splat, since we don't care about the values
7108 // that are currently '0'. If we can't, just keep '0'`s.
7110 // Try to turn KAmts into a splat, since we don't care about the values
7111 // that are currently '-1'. If we can't, change them to '0'`s.
7113 DAG.getConstant(0, DL, ShSVT));
7114 }
7115
7116 PVal = DAG.getBuildVector(VT, DL, PAmts);
7117 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7118 QVal = DAG.getBuildVector(VT, DL, QAmts);
7119 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7120 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7121 "Expected matchBinaryPredicate to return one element for "
7122 "SPLAT_VECTORs");
7123 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7124 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7125 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7126 } else {
7127 PVal = PAmts[0];
7128 KVal = KAmts[0];
7129 QVal = QAmts[0];
7130 }
7131
7132 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7133 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7134 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7135 assert(CompTargetNode.getValueType() == N.getValueType() &&
7136 "Expecting that the types on LHS and RHS of comparisons match.");
7137 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7138 }
7139
7140 // (mul N, P)
7141 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7142 Created.push_back(Op0.getNode());
7143
7144 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7145 // divisors as a performance improvement, since rotating by 0 is a no-op.
7146 if (HadEvenDivisor) {
7147 // We need ROTR to do this.
7148 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7149 return SDValue();
7150 // UREM: (rotr (mul N, P), K)
7151 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7152 Created.push_back(Op0.getNode());
7153 }
7154
7155 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7156 SDValue NewCC =
7157 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7159 if (!HadTautologicalInvertedLanes)
7160 return NewCC;
7161
7162 // If any lanes previously compared always-false, the NewCC will give
7163 // always-true result for them, so we need to fixup those lanes.
7164 // Or the other way around for inequality predicate.
7165 assert(VT.isVector() && "Can/should only get here for vectors.");
7166 Created.push_back(NewCC.getNode());
7167
7168 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7169 // if C2 is not less than C1, the comparison is always false.
7170 // But we have produced the comparison that will give the
7171 // opposive tautological answer. So these lanes would need to be fixed up.
7172 SDValue TautologicalInvertedChannels =
7173 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7174 Created.push_back(TautologicalInvertedChannels.getNode());
7175
7176 // NOTE: we avoid letting illegal types through even if we're before legalize
7177 // ops – legalization has a hard time producing good code for this.
7178 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7179 // If we have a vector select, let's replace the comparison results in the
7180 // affected lanes with the correct tautological result.
7181 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7182 DL, SETCCVT, SETCCVT);
7183 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7184 Replacement, NewCC);
7185 }
7186
7187 // Else, we can just invert the comparison result in the appropriate lanes.
7188 //
7189 // NOTE: see the note above VSELECT above.
7190 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7191 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7192 TautologicalInvertedChannels);
7193
7194 return SDValue(); // Don't know how to lower.
7195}
7196
7197/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7198/// where the divisor is constant and the comparison target is zero,
7199/// return a DAG expression that will generate the same comparison result
7200/// using only multiplications, additions and shifts/rotations.
7201/// Ref: "Hacker's Delight" 10-17.
7202SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7203 SDValue CompTargetNode,
7205 DAGCombinerInfo &DCI,
7206 const SDLoc &DL) const {
7208 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7209 DCI, DL, Built)) {
7210 assert(Built.size() <= 7 && "Max size prediction failed.");
7211 for (SDNode *N : Built)
7212 DCI.AddToWorklist(N);
7213 return Folded;
7214 }
7215
7216 return SDValue();
7217}
7218
7219SDValue
7220TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7221 SDValue CompTargetNode, ISD::CondCode Cond,
7222 DAGCombinerInfo &DCI, const SDLoc &DL,
7223 SmallVectorImpl<SDNode *> &Created) const {
7224 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7225 // Fold:
7226 // (seteq/ne (srem N, D), 0)
7227 // To:
7228 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7229 //
7230 // - D must be constant, with D = D0 * 2^K where D0 is odd
7231 // - P is the multiplicative inverse of D0 modulo 2^W
7232 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7233 // - Q = floor((2 * A) / (2^K))
7234 // where W is the width of the common type of N and D.
7235 //
7236 // When D is a power of two (and thus D0 is 1), the normal
7237 // formula for A and Q don't apply, because the derivation
7238 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7239 // does not apply. This specifically fails when N = INT_MIN.
7240 //
7241 // Instead, for power-of-two D, we use:
7242 // - A = 2^(W-1)
7243 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7244 // - Q = 2^(W-K) - 1
7245 // |-> Test that the top K bits are zero after rotation
7246 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7247 "Only applicable for (in)equality comparisons.");
7248
7249 SelectionDAG &DAG = DCI.DAG;
7250
7251 EVT VT = REMNode.getValueType();
7252 EVT SVT = VT.getScalarType();
7253 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7254 EVT ShSVT = ShVT.getScalarType();
7255
7256 // If we are after ops legalization, and MUL is unavailable, we can not
7257 // proceed.
7258 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7259 return SDValue();
7260
7261 // TODO: Could support comparing with non-zero too.
7262 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7263 if (!CompTarget || !CompTarget->isZero())
7264 return SDValue();
7265
7266 bool HadIntMinDivisor = false;
7267 bool HadOneDivisor = false;
7268 bool AllDivisorsAreOnes = true;
7269 bool HadEvenDivisor = false;
7270 bool NeedToApplyOffset = false;
7271 bool AllDivisorsArePowerOfTwo = true;
7272 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7273
7274 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7275 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7276 if (C->isZero())
7277 return false;
7278
7279 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7280
7281 // WARNING: this fold is only valid for positive divisors!
7282 APInt D = C->getAPIntValue();
7283 if (D.isNegative())
7284 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7285
7286 HadIntMinDivisor |= D.isMinSignedValue();
7287
7288 // If all divisors are ones, we will prefer to avoid the fold.
7289 HadOneDivisor |= D.isOne();
7290 AllDivisorsAreOnes &= D.isOne();
7291
7292 // Decompose D into D0 * 2^K
7293 unsigned K = D.countr_zero();
7294 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7295 APInt D0 = D.lshr(K);
7296
7297 if (!D.isMinSignedValue()) {
7298 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7299 // we don't care about this lane in this fold, we'll special-handle it.
7300 HadEvenDivisor |= (K != 0);
7301 }
7302
7303 // D is a power-of-two if D0 is one. This includes INT_MIN.
7304 // If all divisors are power-of-two, we will prefer to avoid the fold.
7305 AllDivisorsArePowerOfTwo &= D0.isOne();
7306
7307 // P = inv(D0, 2^W)
7308 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7309 unsigned W = D.getBitWidth();
7310 APInt P = D0.multiplicativeInverse();
7311 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7312
7313 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7314 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7315 A.clearLowBits(K);
7316
7317 if (!D.isMinSignedValue()) {
7318 // If divisor INT_MIN, then we don't care about this lane in this fold,
7319 // we'll special-handle it.
7320 NeedToApplyOffset |= A != 0;
7321 }
7322
7323 // Q = floor((2 * A) / (2^K))
7324 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7325
7327 "We are expecting that A is always less than all-ones for SVT");
7329 "We are expecting that K is always less than all-ones for ShSVT");
7330
7331 // If D was a power of two, apply the alternate constant derivation.
7332 if (D0.isOne()) {
7333 // A = 2^(W-1)
7335 // - Q = 2^(W-K) - 1
7336 Q = APInt::getAllOnes(W - K).zext(W);
7337 }
7338
7339 // If the divisor is 1 the result can be constant-folded. Likewise, we
7340 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7341 if (D.isOne()) {
7342 // Set P, A and K to a bogus values so we can try to splat them.
7343 P = 0;
7344 A = -1;
7345 K = -1;
7346
7347 // x ?% 1 == 0 <--> true <--> x u<= -1
7348 Q = -1;
7349 }
7350
7351 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7352 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7353 KAmts.push_back(
7354 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7355 /*implicitTrunc=*/true),
7356 DL, ShSVT));
7357 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7358 return true;
7359 };
7360
7361 SDValue N = REMNode.getOperand(0);
7362 SDValue D = REMNode.getOperand(1);
7363
7364 // Collect the values from each element.
7365 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7366 return SDValue();
7367
7368 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7369 if (AllDivisorsAreOnes)
7370 return SDValue();
7371
7372 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7373 // since it can be best implemented as a bit test.
7374 if (AllDivisorsArePowerOfTwo)
7375 return SDValue();
7376
7377 SDValue PVal, AVal, KVal, QVal;
7378 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7379 if (HadOneDivisor) {
7380 // Try to turn PAmts into a splat, since we don't care about the values
7381 // that are currently '0'. If we can't, just keep '0'`s.
7383 // Try to turn AAmts into a splat, since we don't care about the
7384 // values that are currently '-1'. If we can't, change them to '0'`s.
7386 DAG.getConstant(0, DL, SVT));
7387 // Try to turn KAmts into a splat, since we don't care about the values
7388 // that are currently '-1'. If we can't, change them to '0'`s.
7390 DAG.getConstant(0, DL, ShSVT));
7391 }
7392
7393 PVal = DAG.getBuildVector(VT, DL, PAmts);
7394 AVal = DAG.getBuildVector(VT, DL, AAmts);
7395 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7396 QVal = DAG.getBuildVector(VT, DL, QAmts);
7397 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7398 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7399 QAmts.size() == 1 &&
7400 "Expected matchUnaryPredicate to return one element for scalable "
7401 "vectors");
7402 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7403 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7404 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7405 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7406 } else {
7407 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7408 PVal = PAmts[0];
7409 AVal = AAmts[0];
7410 KVal = KAmts[0];
7411 QVal = QAmts[0];
7412 }
7413
7414 // (mul N, P)
7415 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7416 Created.push_back(Op0.getNode());
7417
7418 if (NeedToApplyOffset) {
7419 // We need ADD to do this.
7420 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7421 return SDValue();
7422
7423 // (add (mul N, P), A)
7424 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7425 Created.push_back(Op0.getNode());
7426 }
7427
7428 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7429 // divisors as a performance improvement, since rotating by 0 is a no-op.
7430 if (HadEvenDivisor) {
7431 // We need ROTR to do this.
7432 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7433 return SDValue();
7434 // SREM: (rotr (add (mul N, P), A), K)
7435 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7436 Created.push_back(Op0.getNode());
7437 }
7438
7439 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7440 SDValue Fold =
7441 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7443
7444 // If we didn't have lanes with INT_MIN divisor, then we're done.
7445 if (!HadIntMinDivisor)
7446 return Fold;
7447
7448 // That fold is only valid for positive divisors. Which effectively means,
7449 // it is invalid for INT_MIN divisors. So if we have such a lane,
7450 // we must fix-up results for said lanes.
7451 assert(VT.isVector() && "Can/should only get here for vectors.");
7452
7453 // NOTE: we avoid letting illegal types through even if we're before legalize
7454 // ops – legalization has a hard time producing good code for the code that
7455 // follows.
7456 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7460 return SDValue();
7461
7462 Created.push_back(Fold.getNode());
7463
7464 SDValue IntMin = DAG.getConstant(
7466 SDValue IntMax = DAG.getConstant(
7468 SDValue Zero =
7470
7471 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7472 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7473 Created.push_back(DivisorIsIntMin.getNode());
7474
7475 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7476 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7477 Created.push_back(Masked.getNode());
7478 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7479 Created.push_back(MaskedIsZero.getNode());
7480
7481 // To produce final result we need to blend 2 vectors: 'SetCC' and
7482 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7483 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7484 // constant-folded, select can get lowered to a shuffle with constant mask.
7485 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7486 MaskedIsZero, Fold);
7487
7488 return Blended;
7489}
7490
7492 const DenormalMode &Mode) const {
7493 SDLoc DL(Op);
7494 EVT VT = Op.getValueType();
7495 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7496 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7497
7498 // This is specifically a check for the handling of denormal inputs, not the
7499 // result.
7500 if (Mode.Input == DenormalMode::PreserveSign ||
7501 Mode.Input == DenormalMode::PositiveZero) {
7502 // Test = X == 0.0
7503 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7504 }
7505
7506 // Testing it with denormal inputs to avoid wrong estimate.
7507 //
7508 // Test = fabs(X) < SmallestNormal
7509 const fltSemantics &FltSem = VT.getFltSemantics();
7510 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7511 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7512 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7513 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7514}
7515
7517 bool LegalOps, bool OptForSize,
7519 unsigned Depth) const {
7520 // fneg is removable even if it has multiple uses.
7521 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7523 return Op.getOperand(0);
7524 }
7525
7526 // Don't recurse exponentially.
7528 return SDValue();
7529
7530 // Pre-increment recursion depth for use in recursive calls.
7531 ++Depth;
7532 const SDNodeFlags Flags = Op->getFlags();
7533 EVT VT = Op.getValueType();
7534 unsigned Opcode = Op.getOpcode();
7535
7536 // Don't allow anything with multiple uses unless we know it is free.
7537 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7538 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7539 isFPExtFree(VT, Op.getOperand(0).getValueType());
7540 if (!IsFreeExtend)
7541 return SDValue();
7542 }
7543
7544 auto RemoveDeadNode = [&](SDValue N) {
7545 if (N && N.getNode()->use_empty())
7546 DAG.RemoveDeadNode(N.getNode());
7547 };
7548
7549 SDLoc DL(Op);
7550
7551 // Because getNegatedExpression can delete nodes we need a handle to keep
7552 // temporary nodes alive in case the recursion manages to create an identical
7553 // node.
7554 std::list<HandleSDNode> Handles;
7555
7556 switch (Opcode) {
7557 case ISD::ConstantFP: {
7558 // Don't invert constant FP values after legalization unless the target says
7559 // the negated constant is legal.
7560 bool IsOpLegal =
7562 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7563 OptForSize);
7564
7565 if (LegalOps && !IsOpLegal)
7566 break;
7567
7568 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7569 V.changeSign();
7570 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7571
7572 // If we already have the use of the negated floating constant, it is free
7573 // to negate it even it has multiple uses.
7574 if (!Op.hasOneUse() && CFP.use_empty())
7575 break;
7577 return CFP;
7578 }
7579 case ISD::SPLAT_VECTOR: {
7580 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7581 SDValue X = Op.getOperand(0);
7583 break;
7584
7585 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7586 if (!NegX)
7587 break;
7589 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7590 }
7591 case ISD::BUILD_VECTOR: {
7592 // Only permit BUILD_VECTOR of constants.
7593 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7594 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7595 }))
7596 break;
7597
7598 bool IsOpLegal =
7601 llvm::all_of(Op->op_values(), [&](SDValue N) {
7602 return N.isUndef() ||
7603 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7604 OptForSize);
7605 });
7606
7607 if (LegalOps && !IsOpLegal)
7608 break;
7609
7611 for (SDValue C : Op->op_values()) {
7612 if (C.isUndef()) {
7613 Ops.push_back(C);
7614 continue;
7615 }
7616 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7617 V.changeSign();
7618 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7619 }
7621 return DAG.getBuildVector(VT, DL, Ops);
7622 }
7623 case ISD::FADD: {
7624 if (!Flags.hasNoSignedZeros())
7625 break;
7626
7627 // After operation legalization, it might not be legal to create new FSUBs.
7628 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7629 break;
7630 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7631
7632 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7634 SDValue NegX =
7635 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7636 // Prevent this node from being deleted by the next call.
7637 if (NegX)
7638 Handles.emplace_back(NegX);
7639
7640 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7642 SDValue NegY =
7643 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7644
7645 // We're done with the handles.
7646 Handles.clear();
7647
7648 // Negate the X if its cost is less or equal than Y.
7649 if (NegX && (CostX <= CostY)) {
7650 Cost = CostX;
7651 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7652 if (NegY != N)
7653 RemoveDeadNode(NegY);
7654 return N;
7655 }
7656
7657 // Negate the Y if it is not expensive.
7658 if (NegY) {
7659 Cost = CostY;
7660 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7661 if (NegX != N)
7662 RemoveDeadNode(NegX);
7663 return N;
7664 }
7665 break;
7666 }
7667 case ISD::FSUB: {
7668 // We can't turn -(A-B) into B-A when we honor signed zeros.
7669 if (!Flags.hasNoSignedZeros())
7670 break;
7671
7672 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7673 // fold (fneg (fsub 0, Y)) -> Y
7674 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7675 if (C->isZero()) {
7677 return Y;
7678 }
7679
7680 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7682 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7683 }
7684 case ISD::FMUL:
7685 case ISD::FDIV: {
7686 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7687
7688 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7690 SDValue NegX =
7691 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7692 // Prevent this node from being deleted by the next call.
7693 if (NegX)
7694 Handles.emplace_back(NegX);
7695
7696 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7698 SDValue NegY =
7699 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7700
7701 // We're done with the handles.
7702 Handles.clear();
7703
7704 // Negate the X if its cost is less or equal than Y.
7705 if (NegX && (CostX <= CostY)) {
7706 Cost = CostX;
7707 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7708 if (NegY != N)
7709 RemoveDeadNode(NegY);
7710 return N;
7711 }
7712
7713 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7714 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7715 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7716 break;
7717
7718 // Negate the Y if it is not expensive.
7719 if (NegY) {
7720 Cost = CostY;
7721 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7722 if (NegX != N)
7723 RemoveDeadNode(NegX);
7724 return N;
7725 }
7726 break;
7727 }
7728 case ISD::FMA:
7729 case ISD::FMULADD:
7730 case ISD::FMAD: {
7731 if (!Flags.hasNoSignedZeros())
7732 break;
7733
7734 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7736 SDValue NegZ =
7737 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7738 // Give up if fail to negate the Z.
7739 if (!NegZ)
7740 break;
7741
7742 // Prevent this node from being deleted by the next two calls.
7743 Handles.emplace_back(NegZ);
7744
7745 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7747 SDValue NegX =
7748 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7749 // Prevent this node from being deleted by the next call.
7750 if (NegX)
7751 Handles.emplace_back(NegX);
7752
7753 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7755 SDValue NegY =
7756 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7757
7758 // We're done with the handles.
7759 Handles.clear();
7760
7761 // Negate the X if its cost is less or equal than Y.
7762 if (NegX && (CostX <= CostY)) {
7763 Cost = std::min(CostX, CostZ);
7764 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7765 if (NegY != N)
7766 RemoveDeadNode(NegY);
7767 return N;
7768 }
7769
7770 // Negate the Y if it is not expensive.
7771 if (NegY) {
7772 Cost = std::min(CostY, CostZ);
7773 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7774 if (NegX != N)
7775 RemoveDeadNode(NegX);
7776 return N;
7777 }
7778 break;
7779 }
7780
7781 case ISD::FP_EXTEND:
7782 case ISD::FSIN:
7783 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7784 OptForSize, Cost, Depth))
7785 return DAG.getNode(Opcode, DL, VT, NegV);
7786 break;
7787 case ISD::FP_ROUND:
7788 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7789 OptForSize, Cost, Depth))
7790 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7791 break;
7792 case ISD::SELECT:
7793 case ISD::VSELECT: {
7794 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7795 // iff at least one cost is cheaper and the other is neutral/cheaper
7796 SDValue LHS = Op.getOperand(1);
7798 SDValue NegLHS =
7799 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7800 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7801 RemoveDeadNode(NegLHS);
7802 break;
7803 }
7804
7805 // Prevent this node from being deleted by the next call.
7806 Handles.emplace_back(NegLHS);
7807
7808 SDValue RHS = Op.getOperand(2);
7810 SDValue NegRHS =
7811 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7812
7813 // We're done with the handles.
7814 Handles.clear();
7815
7816 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7817 (CostLHS != NegatibleCost::Cheaper &&
7818 CostRHS != NegatibleCost::Cheaper)) {
7819 RemoveDeadNode(NegLHS);
7820 RemoveDeadNode(NegRHS);
7821 break;
7822 }
7823
7824 Cost = std::min(CostLHS, CostRHS);
7825 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7826 }
7827 }
7828
7829 return SDValue();
7830}
7831
7832//===----------------------------------------------------------------------===//
7833// Legalization Utilities
7834//===----------------------------------------------------------------------===//
7835
7836bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7837 SDValue LHS, SDValue RHS,
7839 EVT HiLoVT, SelectionDAG &DAG,
7840 MulExpansionKind Kind, SDValue LL,
7841 SDValue LH, SDValue RL, SDValue RH) const {
7842 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7843 Opcode == ISD::SMUL_LOHI);
7844
7845 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7847 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7849 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7851 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7853
7854 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7855 return false;
7856
7857 unsigned OuterBitSize = VT.getScalarSizeInBits();
7858 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7859
7860 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7861 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7862 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7863
7864 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7865 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7866 bool Signed) -> bool {
7867 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7868 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7869 Hi = SDValue(Lo.getNode(), 1);
7870 return true;
7871 }
7872 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7873 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7874 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7875 return true;
7876 }
7877 return false;
7878 };
7879
7880 SDValue Lo, Hi;
7881
7882 if (!LL.getNode() && !RL.getNode() &&
7884 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7885 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7886 }
7887
7888 if (!LL.getNode())
7889 return false;
7890
7891 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7892 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7893 DAG.MaskedValueIsZero(RHS, HighMask)) {
7894 // The inputs are both zero-extended.
7895 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7896 Result.push_back(Lo);
7897 Result.push_back(Hi);
7898 if (Opcode != ISD::MUL) {
7899 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7900 Result.push_back(Zero);
7901 Result.push_back(Zero);
7902 }
7903 return true;
7904 }
7905 }
7906
7907 if (!VT.isVector() && Opcode == ISD::MUL &&
7908 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7909 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7910 // The input values are both sign-extended.
7911 // TODO non-MUL case?
7912 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7913 Result.push_back(Lo);
7914 Result.push_back(Hi);
7915 return true;
7916 }
7917 }
7918
7919 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7920 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7921
7922 if (!LH.getNode() && !RH.getNode() &&
7925 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7926 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7927 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7928 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7929 }
7930
7931 if (!LH.getNode())
7932 return false;
7933
7934 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7935 return false;
7936
7937 Result.push_back(Lo);
7938
7939 if (Opcode == ISD::MUL) {
7940 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7941 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7942 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7943 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7944 Result.push_back(Hi);
7945 return true;
7946 }
7947
7948 // Compute the full width result.
7949 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7950 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7951 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7952 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7953 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7954 };
7955
7956 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7957 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7958 return false;
7959
7960 // This is effectively the add part of a multiply-add of half-sized operands,
7961 // so it cannot overflow.
7962 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7963
7964 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7965 return false;
7966
7967 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7968 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7969
7970 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7972 if (UseGlue)
7973 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7974 Merge(Lo, Hi));
7975 else
7976 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7977 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7978
7979 SDValue Carry = Next.getValue(1);
7980 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7981 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7982
7983 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7984 return false;
7985
7986 if (UseGlue)
7987 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7988 Carry);
7989 else
7990 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7991 Zero, Carry);
7992
7993 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7994
7995 if (Opcode == ISD::SMUL_LOHI) {
7996 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7997 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7998 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7999
8000 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8001 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8002 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8003 }
8004
8005 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8006 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8007 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8008 return true;
8009}
8010
8012 SelectionDAG &DAG, MulExpansionKind Kind,
8013 SDValue LL, SDValue LH, SDValue RL,
8014 SDValue RH) const {
8016 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8017 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8018 DAG, Kind, LL, LH, RL, RH);
8019 if (Ok) {
8020 assert(Result.size() == 2);
8021 Lo = Result[0];
8022 Hi = Result[1];
8023 }
8024 return Ok;
8025}
8026
8027// Optimize unsigned division or remainder by constants for types twice as large
8028// as a legal VT.
8029//
8030// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8031// can be computed
8032// as:
8033// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
8034// Remainder = Sum % Constant
8035// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8036//
8037// For division, we can compute the remainder using the algorithm described
8038// above, subtract it from the dividend to get an exact multiple of Constant.
8039// Then multiply that exact multiply by the multiplicative inverse modulo
8040// (1 << (BitWidth / 2)) to get the quotient.
8041
8042// If Constant is even, we can shift right the dividend and the divisor by the
8043// number of trailing zeros in Constant before applying the remainder algorithm.
8044// If we're after the quotient, we can subtract this value from the shifted
8045// dividend and multiply by the multiplicative inverse of the shifted divisor.
8046// If we want the remainder, we shift the value left by the number of trailing
8047// zeros and add the bits that were shifted out of the dividend.
8050 EVT HiLoVT, SelectionDAG &DAG,
8051 SDValue LL, SDValue LH) const {
8052 unsigned Opcode = N->getOpcode();
8053 EVT VT = N->getValueType(0);
8054
8055 // TODO: Support signed division/remainder.
8056 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8057 return false;
8058 assert(
8059 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8060 "Unexpected opcode");
8061
8062 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8063 if (!CN)
8064 return false;
8065
8066 APInt Divisor = CN->getAPIntValue();
8067 unsigned BitWidth = Divisor.getBitWidth();
8068 unsigned HBitWidth = BitWidth / 2;
8070 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8071
8072 // Divisor needs to less than (1 << HBitWidth).
8073 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8074 if (Divisor.uge(HalfMaxPlus1))
8075 return false;
8076
8077 // We depend on the UREM by constant optimization in DAGCombiner that requires
8078 // high multiply.
8079 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8081 return false;
8082
8083 // Don't expand if optimizing for size.
8084 if (DAG.shouldOptForSize())
8085 return false;
8086
8087 // Early out for 0 or 1 divisors.
8088 if (Divisor.ule(1))
8089 return false;
8090
8091 // If the divisor is even, shift it until it becomes odd.
8092 unsigned TrailingZeros = 0;
8093 if (!Divisor[0]) {
8094 TrailingZeros = Divisor.countr_zero();
8095 Divisor.lshrInPlace(TrailingZeros);
8096 }
8097
8098 SDLoc dl(N);
8099 SDValue Sum;
8100 SDValue PartialRem;
8101
8102 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8103 // then add in the carry.
8104 // TODO: If we can't split it in half, we might be able to split into 3 or
8105 // more pieces using a smaller bit width.
8106 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8107 assert(!LL == !LH && "Expected both input halves or no input halves!");
8108 if (!LL)
8109 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8110
8111 // Shift the input by the number of TrailingZeros in the divisor. The
8112 // shifted out bits will be added to the remainder later.
8113 if (TrailingZeros) {
8114 // Save the shifted off bits if we need the remainder.
8115 if (Opcode != ISD::UDIV) {
8116 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8117 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8118 DAG.getConstant(Mask, dl, HiLoVT));
8119 }
8120
8121 LL = DAG.getNode(
8122 ISD::OR, dl, HiLoVT,
8123 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8124 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8125 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8126 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8127 HiLoVT, dl)));
8128 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8129 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8130 }
8131
8132 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8133 EVT SetCCType =
8134 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8136 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8137 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8138 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8139 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8140 } else {
8141 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8142 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8143 // If the boolean for the target is 0 or 1, we can add the setcc result
8144 // directly.
8145 if (getBooleanContents(HiLoVT) ==
8147 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8148 else
8149 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8150 DAG.getConstant(0, dl, HiLoVT));
8151 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8152 }
8153 }
8154
8155 // If we didn't find a sum, we can't do the expansion.
8156 if (!Sum)
8157 return false;
8158
8159 // Perform a HiLoVT urem on the Sum using truncated divisor.
8160 SDValue RemL =
8161 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8162 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8163 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8164
8165 if (Opcode != ISD::UREM) {
8166 // Subtract the remainder from the shifted dividend.
8167 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8168 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8169
8170 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8171
8172 // Multiply by the multiplicative inverse of the divisor modulo
8173 // (1 << BitWidth).
8174 APInt MulFactor = Divisor.multiplicativeInverse();
8175
8176 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8177 DAG.getConstant(MulFactor, dl, VT));
8178
8179 // Split the quotient into low and high parts.
8180 SDValue QuotL, QuotH;
8181 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8182 Result.push_back(QuotL);
8183 Result.push_back(QuotH);
8184 }
8185
8186 if (Opcode != ISD::UDIV) {
8187 // If we shifted the input, shift the remainder left and add the bits we
8188 // shifted off the input.
8189 if (TrailingZeros) {
8190 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8191 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8192 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8193 }
8194 Result.push_back(RemL);
8195 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8196 }
8197
8198 return true;
8199}
8200
8201// Check that (every element of) Z is undef or not an exact multiple of BW.
8202static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8204 Z,
8205 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8206 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8207}
8208
8210 EVT VT = Node->getValueType(0);
8211 SDValue ShX, ShY;
8212 SDValue ShAmt, InvShAmt;
8213 SDValue X = Node->getOperand(0);
8214 SDValue Y = Node->getOperand(1);
8215 SDValue Z = Node->getOperand(2);
8216 SDValue Mask = Node->getOperand(3);
8217 SDValue VL = Node->getOperand(4);
8218
8219 unsigned BW = VT.getScalarSizeInBits();
8220 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8221 SDLoc DL(SDValue(Node, 0));
8222
8223 EVT ShVT = Z.getValueType();
8224 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8225 // fshl: X << C | Y >> (BW - C)
8226 // fshr: X << (BW - C) | Y >> C
8227 // where C = Z % BW is not zero
8228 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8229 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8230 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8231 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8232 VL);
8233 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8234 VL);
8235 } else {
8236 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8237 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8238 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8239 if (isPowerOf2_32(BW)) {
8240 // Z % BW -> Z & (BW - 1)
8241 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8242 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8243 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8244 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8245 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8246 } else {
8247 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8248 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8249 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8250 }
8251
8252 SDValue One = DAG.getConstant(1, DL, ShVT);
8253 if (IsFSHL) {
8254 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8255 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8256 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8257 } else {
8258 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8259 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8260 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8261 }
8262 }
8263 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8264}
8265
8267 SelectionDAG &DAG) const {
8268 if (Node->isVPOpcode())
8269 return expandVPFunnelShift(Node, DAG);
8270
8271 EVT VT = Node->getValueType(0);
8272
8273 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8277 return SDValue();
8278
8279 SDValue X = Node->getOperand(0);
8280 SDValue Y = Node->getOperand(1);
8281 SDValue Z = Node->getOperand(2);
8282
8283 unsigned BW = VT.getScalarSizeInBits();
8284 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8285 SDLoc DL(SDValue(Node, 0));
8286
8287 EVT ShVT = Z.getValueType();
8288
8289 // If a funnel shift in the other direction is more supported, use it.
8290 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8291 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8292 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8293 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8294 // fshl X, Y, Z -> fshr X, Y, -Z
8295 // fshr X, Y, Z -> fshl X, Y, -Z
8296 Z = DAG.getNegative(Z, DL, ShVT);
8297 } else {
8298 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8299 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8300 SDValue One = DAG.getConstant(1, DL, ShVT);
8301 if (IsFSHL) {
8302 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8303 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8304 } else {
8305 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8306 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8307 }
8308 Z = DAG.getNOT(DL, Z, ShVT);
8309 }
8310 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8311 }
8312
8313 SDValue ShX, ShY;
8314 SDValue ShAmt, InvShAmt;
8315 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8316 // fshl: X << C | Y >> (BW - C)
8317 // fshr: X << (BW - C) | Y >> C
8318 // where C = Z % BW is not zero
8319 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8320 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8321 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8322 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8323 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8324 } else {
8325 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8326 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8327 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8328 if (isPowerOf2_32(BW)) {
8329 // Z % BW -> Z & (BW - 1)
8330 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8331 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8332 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8333 } else {
8334 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8335 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8336 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8337 }
8338
8339 SDValue One = DAG.getConstant(1, DL, ShVT);
8340 if (IsFSHL) {
8341 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8342 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8343 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8344 } else {
8345 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8346 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8347 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8348 }
8349 }
8350 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8351}
8352
8353// TODO: Merge with expandFunnelShift.
8355 SelectionDAG &DAG) const {
8356 EVT VT = Node->getValueType(0);
8357 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8358 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8359 SDValue Op0 = Node->getOperand(0);
8360 SDValue Op1 = Node->getOperand(1);
8361 SDLoc DL(SDValue(Node, 0));
8362
8363 EVT ShVT = Op1.getValueType();
8364 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8365
8366 // If a rotate in the other direction is more supported, use it.
8367 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8368 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8369 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8370 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8371 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8372 }
8373
8374 if (!AllowVectorOps && VT.isVector() &&
8380 return SDValue();
8381
8382 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8383 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8384 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8385 SDValue ShVal;
8386 SDValue HsVal;
8387 if (isPowerOf2_32(EltSizeInBits)) {
8388 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8389 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8390 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8391 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8392 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8393 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8394 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8395 } else {
8396 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8397 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8398 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8399 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8400 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8401 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8402 SDValue One = DAG.getConstant(1, DL, ShVT);
8403 HsVal =
8404 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8405 }
8406 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8407}
8408
8410 SDLoc DL(Node);
8411 EVT VT = Node->getValueType(0);
8412 SDValue X = Node->getOperand(0);
8413 SDValue Y = Node->getOperand(1);
8414 unsigned BW = VT.getScalarSizeInBits();
8415 unsigned Opcode = Node->getOpcode();
8416
8417 switch (Opcode) {
8418 case ISD::CLMUL: {
8419 SDValue Res = DAG.getConstant(0, DL, VT);
8420 for (unsigned I = 0; I < BW; ++I) {
8421 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8422 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8423 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8424 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Mul);
8425 }
8426 return Res;
8427 }
8428 case ISD::CLMULR:
8429 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8432 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8433 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8434 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8435 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8436 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8437 DAG.getShiftAmountConstant(1, VT, DL));
8438 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8439 }
8440 [[fallthrough]];
8441 case ISD::CLMULH: {
8442 EVT ExtVT = VT.changeElementType(
8443 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), 2 * BW));
8444 // For example, ExtVT = i64 based operations aren't legal on a 32-bit
8445 // target; use bitreverse-based lowering in this case.
8448 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8449 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8450 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8451 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8452 if (Opcode == ISD::CLMULH)
8453 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8454 DAG.getShiftAmountConstant(1, VT, DL));
8455 return Res;
8456 }
8457 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8458 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8459 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8460 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8461 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8462 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8463 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8464 }
8465 }
8466 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8467}
8468
8470 SelectionDAG &DAG) const {
8471 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8472 EVT VT = Node->getValueType(0);
8473 unsigned VTBits = VT.getScalarSizeInBits();
8474 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8475
8476 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8477 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8478 SDValue ShOpLo = Node->getOperand(0);
8479 SDValue ShOpHi = Node->getOperand(1);
8480 SDValue ShAmt = Node->getOperand(2);
8481 EVT ShAmtVT = ShAmt.getValueType();
8482 EVT ShAmtCCVT =
8483 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8484 SDLoc dl(Node);
8485
8486 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8487 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8488 // away during isel.
8489 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8490 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8491 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8492 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8493 : DAG.getConstant(0, dl, VT);
8494
8495 SDValue Tmp2, Tmp3;
8496 if (IsSHL) {
8497 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8498 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8499 } else {
8500 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8501 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8502 }
8503
8504 // If the shift amount is larger or equal than the width of a part we don't
8505 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8506 // values for large shift amounts.
8507 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8508 DAG.getConstant(VTBits, dl, ShAmtVT));
8509 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8510 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8511
8512 if (IsSHL) {
8513 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8514 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8515 } else {
8516 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8517 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8518 }
8519}
8520
8522 SelectionDAG &DAG) const {
8523 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8524 SDValue Src = Node->getOperand(OpNo);
8525 EVT SrcVT = Src.getValueType();
8526 EVT DstVT = Node->getValueType(0);
8527 SDLoc dl(SDValue(Node, 0));
8528
8529 // FIXME: Only f32 to i64 conversions are supported.
8530 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8531 return false;
8532
8533 if (Node->isStrictFPOpcode())
8534 // When a NaN is converted to an integer a trap is allowed. We can't
8535 // use this expansion here because it would eliminate that trap. Other
8536 // traps are also allowed and cannot be eliminated. See
8537 // IEEE 754-2008 sec 5.8.
8538 return false;
8539
8540 // Expand f32 -> i64 conversion
8541 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8542 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8543 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8544 EVT IntVT = SrcVT.changeTypeToInteger();
8545 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8546
8547 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8548 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8549 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8550 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8551 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8552 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8553
8554 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8555
8556 SDValue ExponentBits = DAG.getNode(
8557 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8558 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8559 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8560
8561 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8562 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8563 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8564 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8565
8566 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8567 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8568 DAG.getConstant(0x00800000, dl, IntVT));
8569
8570 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8571
8572 R = DAG.getSelectCC(
8573 dl, Exponent, ExponentLoBit,
8574 DAG.getNode(ISD::SHL, dl, DstVT, R,
8575 DAG.getZExtOrTrunc(
8576 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8577 dl, IntShVT)),
8578 DAG.getNode(ISD::SRL, dl, DstVT, R,
8579 DAG.getZExtOrTrunc(
8580 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8581 dl, IntShVT)),
8582 ISD::SETGT);
8583
8584 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8585 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8586
8587 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8588 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8589 return true;
8590}
8591
8593 SDValue &Chain,
8594 SelectionDAG &DAG) const {
8595 SDLoc dl(SDValue(Node, 0));
8596 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8597 SDValue Src = Node->getOperand(OpNo);
8598
8599 EVT SrcVT = Src.getValueType();
8600 EVT DstVT = Node->getValueType(0);
8601 EVT SetCCVT =
8602 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8603 EVT DstSetCCVT =
8604 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8605
8606 // Only expand vector types if we have the appropriate vector bit operations.
8607 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8609 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8611 return false;
8612
8613 // If the maximum float value is smaller then the signed integer range,
8614 // the destination signmask can't be represented by the float, so we can
8615 // just use FP_TO_SINT directly.
8616 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8617 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8618 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8620 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8621 if (Node->isStrictFPOpcode()) {
8622 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8623 { Node->getOperand(0), Src });
8624 Chain = Result.getValue(1);
8625 } else
8626 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8627 return true;
8628 }
8629
8630 // Don't expand it if there isn't cheap fsub instruction.
8632 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8633 return false;
8634
8635 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8636 SDValue Sel;
8637
8638 if (Node->isStrictFPOpcode()) {
8639 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8640 Node->getOperand(0), /*IsSignaling*/ true);
8641 Chain = Sel.getValue(1);
8642 } else {
8643 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8644 }
8645
8646 bool Strict = Node->isStrictFPOpcode() ||
8647 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8648
8649 if (Strict) {
8650 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8651 // signmask then offset (the result of which should be fully representable).
8652 // Sel = Src < 0x8000000000000000
8653 // FltOfs = select Sel, 0, 0x8000000000000000
8654 // IntOfs = select Sel, 0, 0x8000000000000000
8655 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8656
8657 // TODO: Should any fast-math-flags be set for the FSUB?
8658 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8659 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8660 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8661 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8662 DAG.getConstant(0, dl, DstVT),
8663 DAG.getConstant(SignMask, dl, DstVT));
8664 SDValue SInt;
8665 if (Node->isStrictFPOpcode()) {
8666 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8667 { Chain, Src, FltOfs });
8668 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8669 { Val.getValue(1), Val });
8670 Chain = SInt.getValue(1);
8671 } else {
8672 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8673 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8674 }
8675 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8676 } else {
8677 // Expand based on maximum range of FP_TO_SINT:
8678 // True = fp_to_sint(Src)
8679 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8680 // Result = select (Src < 0x8000000000000000), True, False
8681
8682 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8683 // TODO: Should any fast-math-flags be set for the FSUB?
8684 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8685 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8686 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8687 DAG.getConstant(SignMask, dl, DstVT));
8688 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8689 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8690 }
8691 return true;
8692}
8693
8695 SDValue &Chain, SelectionDAG &DAG) const {
8696 // This transform is not correct for converting 0 when rounding mode is set
8697 // to round toward negative infinity which will produce -0.0. So disable
8698 // under strictfp.
8699 if (Node->isStrictFPOpcode())
8700 return false;
8701
8702 SDValue Src = Node->getOperand(0);
8703 EVT SrcVT = Src.getValueType();
8704 EVT DstVT = Node->getValueType(0);
8705
8706 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8707 // it.
8708 if (Node->getFlags().hasNonNeg() &&
8710 Result =
8711 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8712 return true;
8713 }
8714
8715 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8716 return false;
8717
8718 // Only expand vector types if we have the appropriate vector bit
8719 // operations.
8720 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8725 return false;
8726
8727 SDLoc dl(SDValue(Node, 0));
8728
8729 // Implementation of unsigned i64 to f64 following the algorithm in
8730 // __floatundidf in compiler_rt. This implementation performs rounding
8731 // correctly in all rounding modes with the exception of converting 0
8732 // when rounding toward negative infinity. In that case the fsub will
8733 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8734 // incorrect.
8735 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8736 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8737 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8738 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8739 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8740 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8741
8742 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8743 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8744 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8745 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8746 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8747 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8748 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8749 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8750 return true;
8751}
8752
8753SDValue
8755 SelectionDAG &DAG) const {
8756 unsigned Opcode = Node->getOpcode();
8757 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8758 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8759 "Wrong opcode");
8760
8761 if (Node->getFlags().hasNoNaNs()) {
8762 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8763 EVT VT = Node->getValueType(0);
8764 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8766 VT.isVector())
8767 return SDValue();
8768 SDValue Op1 = Node->getOperand(0);
8769 SDValue Op2 = Node->getOperand(1);
8770 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8771 Node->getFlags());
8772 }
8773
8774 return SDValue();
8775}
8776
8778 SelectionDAG &DAG) const {
8779 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8780 return Expanded;
8781
8782 EVT VT = Node->getValueType(0);
8783 if (VT.isScalableVector())
8785 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8786
8787 SDLoc dl(Node);
8788 unsigned NewOp =
8790
8791 if (isOperationLegalOrCustom(NewOp, VT)) {
8792 SDValue Quiet0 = Node->getOperand(0);
8793 SDValue Quiet1 = Node->getOperand(1);
8794
8795 if (!Node->getFlags().hasNoNaNs()) {
8796 // Insert canonicalizes if it's possible we need to quiet to get correct
8797 // sNaN behavior.
8798 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8799 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8800 Node->getFlags());
8801 }
8802 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8803 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8804 Node->getFlags());
8805 }
8806 }
8807
8808 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8809 }
8810
8811 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8812 // instead if there are no NaNs and there can't be an incompatible zero
8813 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8814 if ((Node->getFlags().hasNoNaNs() ||
8815 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8816 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8817 (Node->getFlags().hasNoSignedZeros() ||
8818 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8819 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8820 unsigned IEEE2018Op =
8821 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8822 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8823 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8824 Node->getOperand(1), Node->getFlags());
8825 }
8826
8828 return SelCC;
8829
8830 return SDValue();
8831}
8832
8834 SelectionDAG &DAG) const {
8835 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8836 return Expanded;
8837
8838 SDLoc DL(N);
8839 SDValue LHS = N->getOperand(0);
8840 SDValue RHS = N->getOperand(1);
8841 unsigned Opc = N->getOpcode();
8842 EVT VT = N->getValueType(0);
8843 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8844 bool IsMax = Opc == ISD::FMAXIMUM;
8845 SDNodeFlags Flags = N->getFlags();
8846
8847 // First, implement comparison not propagating NaN. If no native fmin or fmax
8848 // available, use plain select with setcc instead.
8850 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8851 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8852
8853 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8854 // signed zero behavior.
8855 bool MinMaxMustRespectOrderedZero = false;
8856
8857 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8858 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8859 MinMaxMustRespectOrderedZero = true;
8860 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8861 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8862 } else {
8864 return DAG.UnrollVectorOp(N);
8865
8866 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8867 SDValue Compare =
8868 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8869 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8870 }
8871
8872 // Propagate any NaN of both operands
8873 if (!N->getFlags().hasNoNaNs() &&
8874 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8875 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8877 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8878 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8879 }
8880
8881 // fminimum/fmaximum requires -0.0 less than +0.0
8882 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8883 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8884 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8885 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8886 SDValue TestZero =
8887 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8888 SDValue LCmp = DAG.getSelect(
8889 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8890 MinMax, Flags);
8891 SDValue RCmp = DAG.getSelect(
8892 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8893 LCmp, Flags);
8894 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8895 }
8896
8897 return MinMax;
8898}
8899
8901 SelectionDAG &DAG) const {
8902 SDLoc DL(Node);
8903 SDValue LHS = Node->getOperand(0);
8904 SDValue RHS = Node->getOperand(1);
8905 unsigned Opc = Node->getOpcode();
8906 EVT VT = Node->getValueType(0);
8907 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8908 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8909 SDNodeFlags Flags = Node->getFlags();
8910
8911 unsigned NewOp =
8913
8914 if (isOperationLegalOrCustom(NewOp, VT)) {
8915 if (!Flags.hasNoNaNs()) {
8916 // Insert canonicalizes if it's possible we need to quiet to get correct
8917 // sNaN behavior.
8918 if (!DAG.isKnownNeverSNaN(LHS)) {
8919 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8920 }
8921 if (!DAG.isKnownNeverSNaN(RHS)) {
8922 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8923 }
8924 }
8925
8926 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8927 }
8928
8929 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8930 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8931 if (Flags.hasNoNaNs() ||
8932 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8933 unsigned IEEE2019Op =
8935 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8936 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8937 }
8938
8939 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8940 // either one for +0.0 vs -0.0.
8941 if ((Flags.hasNoNaNs() ||
8942 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8943 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8944 DAG.isKnownNeverZeroFloat(RHS))) {
8945 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8946 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8947 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8948 }
8949
8950 if (VT.isVector() &&
8953 return DAG.UnrollVectorOp(Node);
8954
8955 // If only one operand is NaN, override it with another operand.
8956 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8957 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8958 }
8959 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8960 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8961 }
8962
8963 // Always prefer RHS if equal.
8964 SDValue MinMax =
8965 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8966
8967 // TODO: We need quiet sNaN if strictfp.
8968
8969 // Fixup signed zero behavior.
8970 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8971 DAG.isKnownNeverZeroFloat(RHS)) {
8972 return MinMax;
8973 }
8974 SDValue TestZero =
8975 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8976 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8977 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8978 EVT IntVT = VT.changeTypeToInteger();
8979 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
8980 SDValue LHSTrunc = LHS;
8982 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
8983 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8984 }
8985 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
8986 // we preferred RHS when generate MinMax, if the operands are equal.
8987 SDValue RetZero = DAG.getSelect(
8988 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
8989 MinMax, Flags);
8990 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
8991}
8992
8993/// Returns a true value if if this FPClassTest can be performed with an ordered
8994/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8995/// std::nullopt if it cannot be performed as a compare with 0.
8996static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8997 const fltSemantics &Semantics,
8998 const MachineFunction &MF) {
8999 FPClassTest OrderedMask = Test & ~fcNan;
9000 FPClassTest NanTest = Test & fcNan;
9001 bool IsOrdered = NanTest == fcNone;
9002 bool IsUnordered = NanTest == fcNan;
9003
9004 // Skip cases that are testing for only a qnan or snan.
9005 if (!IsOrdered && !IsUnordered)
9006 return std::nullopt;
9007
9008 if (OrderedMask == fcZero &&
9009 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9010 return IsOrdered;
9011 if (OrderedMask == (fcZero | fcSubnormal) &&
9012 MF.getDenormalMode(Semantics).inputsAreZero())
9013 return IsOrdered;
9014 return std::nullopt;
9015}
9016
9018 const FPClassTest OrigTestMask,
9019 SDNodeFlags Flags, const SDLoc &DL,
9020 SelectionDAG &DAG) const {
9021 EVT OperandVT = Op.getValueType();
9022 assert(OperandVT.isFloatingPoint());
9023 FPClassTest Test = OrigTestMask;
9024
9025 // Degenerated cases.
9026 if (Test == fcNone)
9027 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9028 if (Test == fcAllFlags)
9029 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9030
9031 // PPC double double is a pair of doubles, of which the higher part determines
9032 // the value class.
9033 if (OperandVT == MVT::ppcf128) {
9034 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9035 DAG.getConstant(1, DL, MVT::i32));
9036 OperandVT = MVT::f64;
9037 }
9038
9039 // Floating-point type properties.
9040 EVT ScalarFloatVT = OperandVT.getScalarType();
9041 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9042 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9043 bool IsF80 = (ScalarFloatVT == MVT::f80);
9044
9045 // Some checks can be implemented using float comparisons, if floating point
9046 // exceptions are ignored.
9047 if (Flags.hasNoFPExcept() &&
9049 FPClassTest FPTestMask = Test;
9050 bool IsInvertedFP = false;
9051
9052 if (FPClassTest InvertedFPCheck =
9053 invertFPClassTestIfSimpler(FPTestMask, true)) {
9054 FPTestMask = InvertedFPCheck;
9055 IsInvertedFP = true;
9056 }
9057
9058 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9059 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9060
9061 // See if we can fold an | fcNan into an unordered compare.
9062 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9063
9064 // Can't fold the ordered check if we're only testing for snan or qnan
9065 // individually.
9066 if ((FPTestMask & fcNan) != fcNan)
9067 OrderedFPTestMask = FPTestMask;
9068
9069 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9070
9071 if (std::optional<bool> IsCmp0 =
9072 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9073 IsCmp0 && (isCondCodeLegalOrCustom(
9074 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9075 OperandVT.getScalarType().getSimpleVT()))) {
9076
9077 // If denormals could be implicitly treated as 0, this is not equivalent
9078 // to a compare with 0 since it will also be true for denormals.
9079 return DAG.getSetCC(DL, ResultVT, Op,
9080 DAG.getConstantFP(0.0, DL, OperandVT),
9081 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9082 }
9083
9084 if (FPTestMask == fcNan &&
9086 OperandVT.getScalarType().getSimpleVT()))
9087 return DAG.getSetCC(DL, ResultVT, Op, Op,
9088 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9089
9090 bool IsOrderedInf = FPTestMask == fcInf;
9091 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9092 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9093 : UnorderedCmpOpcode,
9094 OperandVT.getScalarType().getSimpleVT()) &&
9097 (OperandVT.isVector() &&
9099 // isinf(x) --> fabs(x) == inf
9100 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9101 SDValue Inf =
9102 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9103 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9104 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9105 }
9106
9107 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9108 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9109 : UnorderedCmpOpcode,
9110 OperandVT.getSimpleVT())) {
9111 // isposinf(x) --> x == inf
9112 // isneginf(x) --> x == -inf
9113 // isposinf(x) || nan --> x u== inf
9114 // isneginf(x) || nan --> x u== -inf
9115
9116 SDValue Inf = DAG.getConstantFP(
9117 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9118 OperandVT);
9119 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9120 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9121 }
9122
9123 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9124 // TODO: Could handle ordered case, but it produces worse code for
9125 // x86. Maybe handle ordered if fabs is free?
9126
9127 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9128 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9129
9130 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9131 OperandVT.getScalarType().getSimpleVT())) {
9132 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9133
9134 // TODO: Maybe only makes sense if fabs is free. Integer test of
9135 // exponent bits seems better for x86.
9136 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9137 SDValue SmallestNormal = DAG.getConstantFP(
9138 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9139 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9140 IsOrdered ? OrderedOp : UnorderedOp);
9141 }
9142 }
9143
9144 if (FPTestMask == fcNormal) {
9145 // TODO: Handle unordered
9146 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9147 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9148
9149 if (isCondCodeLegalOrCustom(IsFiniteOp,
9150 OperandVT.getScalarType().getSimpleVT()) &&
9151 isCondCodeLegalOrCustom(IsNormalOp,
9152 OperandVT.getScalarType().getSimpleVT()) &&
9153 isFAbsFree(OperandVT)) {
9154 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9155 SDValue Inf =
9156 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9157 SDValue SmallestNormal = DAG.getConstantFP(
9158 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9159
9160 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9161 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9162 SDValue IsNormal =
9163 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9164 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9165 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9166 }
9167 }
9168 }
9169
9170 // Some checks may be represented as inversion of simpler check, for example
9171 // "inf|normal|subnormal|zero" => !"nan".
9172 bool IsInverted = false;
9173
9174 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9175 Test = InvertedCheck;
9176 IsInverted = true;
9177 }
9178
9179 // In the general case use integer operations.
9180 unsigned BitSize = OperandVT.getScalarSizeInBits();
9181 EVT IntVT = OperandVT.changeElementType(
9182 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9183 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9184
9185 // Various masks.
9186 APInt SignBit = APInt::getSignMask(BitSize);
9187 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9188 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9189 const unsigned ExplicitIntBitInF80 = 63;
9190 APInt ExpMask = Inf;
9191 if (IsF80)
9192 ExpMask.clearBit(ExplicitIntBitInF80);
9193 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9194 APInt QNaNBitMask =
9195 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9196 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9197
9198 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9199 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9200 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9201 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9202 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9203 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9204
9205 SDValue Res;
9206 const auto appendResult = [&](SDValue PartialRes) {
9207 if (PartialRes) {
9208 if (Res)
9209 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9210 else
9211 Res = PartialRes;
9212 }
9213 };
9214
9215 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9216 const auto getIntBitIsSet = [&]() -> SDValue {
9217 if (!IntBitIsSetV) {
9218 APInt IntBitMask(BitSize, 0);
9219 IntBitMask.setBit(ExplicitIntBitInF80);
9220 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9221 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9222 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9223 }
9224 return IntBitIsSetV;
9225 };
9226
9227 // Split the value into sign bit and absolute value.
9228 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9229 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9230 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9231
9232 // Tests that involve more than one class should be processed first.
9233 SDValue PartialRes;
9234
9235 if (IsF80)
9236 ; // Detect finite numbers of f80 by checking individual classes because
9237 // they have different settings of the explicit integer bit.
9238 else if ((Test & fcFinite) == fcFinite) {
9239 // finite(V) ==> abs(V) < exp_mask
9240 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9241 Test &= ~fcFinite;
9242 } else if ((Test & fcFinite) == fcPosFinite) {
9243 // finite(V) && V > 0 ==> V < exp_mask
9244 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9245 Test &= ~fcPosFinite;
9246 } else if ((Test & fcFinite) == fcNegFinite) {
9247 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9248 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9249 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9250 Test &= ~fcNegFinite;
9251 }
9252 appendResult(PartialRes);
9253
9254 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9255 // fcZero | fcSubnormal => test all exponent bits are 0
9256 // TODO: Handle sign bit specific cases
9257 if (PartialCheck == (fcZero | fcSubnormal)) {
9258 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9259 SDValue ExpIsZero =
9260 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9261 appendResult(ExpIsZero);
9262 Test &= ~PartialCheck & fcAllFlags;
9263 }
9264 }
9265
9266 // Check for individual classes.
9267
9268 if (unsigned PartialCheck = Test & fcZero) {
9269 if (PartialCheck == fcPosZero)
9270 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9271 else if (PartialCheck == fcZero)
9272 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9273 else // ISD::fcNegZero
9274 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9275 appendResult(PartialRes);
9276 }
9277
9278 if (unsigned PartialCheck = Test & fcSubnormal) {
9279 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9280 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9281 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9282 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9283 SDValue VMinusOneV =
9284 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9285 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9286 if (PartialCheck == fcNegSubnormal)
9287 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9288 appendResult(PartialRes);
9289 }
9290
9291 if (unsigned PartialCheck = Test & fcInf) {
9292 if (PartialCheck == fcPosInf)
9293 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9294 else if (PartialCheck == fcInf)
9295 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9296 else { // ISD::fcNegInf
9297 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9298 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9299 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9300 }
9301 appendResult(PartialRes);
9302 }
9303
9304 if (unsigned PartialCheck = Test & fcNan) {
9305 APInt InfWithQnanBit = Inf | QNaNBitMask;
9306 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9307 if (PartialCheck == fcNan) {
9308 // isnan(V) ==> abs(V) > int(inf)
9309 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9310 if (IsF80) {
9311 // Recognize unsupported values as NaNs for compatibility with glibc.
9312 // In them (exp(V)==0) == int_bit.
9313 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9314 SDValue ExpIsZero =
9315 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9316 SDValue IsPseudo =
9317 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9318 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9319 }
9320 } else if (PartialCheck == fcQNan) {
9321 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9322 PartialRes =
9323 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9324 } else { // ISD::fcSNan
9325 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9326 // abs(V) < (unsigned(Inf) | quiet_bit)
9327 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9328 SDValue IsNotQnan =
9329 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9330 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9331 }
9332 appendResult(PartialRes);
9333 }
9334
9335 if (unsigned PartialCheck = Test & fcNormal) {
9336 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9337 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9338 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9339 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9340 APInt ExpLimit = ExpMask - ExpLSB;
9341 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9342 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9343 if (PartialCheck == fcNegNormal)
9344 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9345 else if (PartialCheck == fcPosNormal) {
9346 SDValue PosSignV =
9347 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9348 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9349 }
9350 if (IsF80)
9351 PartialRes =
9352 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9353 appendResult(PartialRes);
9354 }
9355
9356 if (!Res)
9357 return DAG.getConstant(IsInverted, DL, ResultVT);
9358 if (IsInverted)
9359 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9360 return Res;
9361}
9362
9363// Only expand vector types if we have the appropriate vector bit operations.
9364static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9365 assert(VT.isVector() && "Expected vector type");
9366 unsigned Len = VT.getScalarSizeInBits();
9367 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9370 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9372}
9373
9375 SDLoc dl(Node);
9376 EVT VT = Node->getValueType(0);
9377 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9378 SDValue Op = Node->getOperand(0);
9379 unsigned Len = VT.getScalarSizeInBits();
9380 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9381
9382 // TODO: Add support for irregular type lengths.
9383 if (!(Len <= 128 && Len % 8 == 0))
9384 return SDValue();
9385
9386 // Only expand vector types if we have the appropriate vector bit operations.
9387 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9388 return SDValue();
9389
9390 // This is the "best" algorithm from
9391 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9392 SDValue Mask55 =
9393 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9394 SDValue Mask33 =
9395 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9396 SDValue Mask0F =
9397 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9398
9399 // v = v - ((v >> 1) & 0x55555555...)
9400 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9401 DAG.getNode(ISD::AND, dl, VT,
9402 DAG.getNode(ISD::SRL, dl, VT, Op,
9403 DAG.getConstant(1, dl, ShVT)),
9404 Mask55));
9405 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9406 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9407 DAG.getNode(ISD::AND, dl, VT,
9408 DAG.getNode(ISD::SRL, dl, VT, Op,
9409 DAG.getConstant(2, dl, ShVT)),
9410 Mask33));
9411 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9412 Op = DAG.getNode(ISD::AND, dl, VT,
9413 DAG.getNode(ISD::ADD, dl, VT, Op,
9414 DAG.getNode(ISD::SRL, dl, VT, Op,
9415 DAG.getConstant(4, dl, ShVT))),
9416 Mask0F);
9417
9418 if (Len <= 8)
9419 return Op;
9420
9421 // Avoid the multiply if we only have 2 bytes to add.
9422 // TODO: Only doing this for scalars because vectors weren't as obviously
9423 // improved.
9424 if (Len == 16 && !VT.isVector()) {
9425 // v = (v + (v >> 8)) & 0x00FF;
9426 return DAG.getNode(ISD::AND, dl, VT,
9427 DAG.getNode(ISD::ADD, dl, VT, Op,
9428 DAG.getNode(ISD::SRL, dl, VT, Op,
9429 DAG.getConstant(8, dl, ShVT))),
9430 DAG.getConstant(0xFF, dl, VT));
9431 }
9432
9433 // v = (v * 0x01010101...) >> (Len - 8)
9434 SDValue V;
9437 SDValue Mask01 =
9438 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9439 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9440 } else {
9441 V = Op;
9442 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9443 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9444 V = DAG.getNode(ISD::ADD, dl, VT, V,
9445 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9446 }
9447 }
9448 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9449}
9450
9452 SDLoc dl(Node);
9453 EVT VT = Node->getValueType(0);
9454 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9455 SDValue Op = Node->getOperand(0);
9456 SDValue Mask = Node->getOperand(1);
9457 SDValue VL = Node->getOperand(2);
9458 unsigned Len = VT.getScalarSizeInBits();
9459 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9460
9461 // TODO: Add support for irregular type lengths.
9462 if (!(Len <= 128 && Len % 8 == 0))
9463 return SDValue();
9464
9465 // This is same algorithm of expandCTPOP from
9466 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9467 SDValue Mask55 =
9468 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9469 SDValue Mask33 =
9470 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9471 SDValue Mask0F =
9472 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9473
9474 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9475
9476 // v = v - ((v >> 1) & 0x55555555...)
9477 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9478 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9479 DAG.getConstant(1, dl, ShVT), Mask, VL),
9480 Mask55, Mask, VL);
9481 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9482
9483 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9484 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9485 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9486 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9487 DAG.getConstant(2, dl, ShVT), Mask, VL),
9488 Mask33, Mask, VL);
9489 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9490
9491 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9492 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9493 Mask, VL),
9494 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9495 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9496
9497 if (Len <= 8)
9498 return Op;
9499
9500 // v = (v * 0x01010101...) >> (Len - 8)
9501 SDValue V;
9503 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9504 SDValue Mask01 =
9505 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9506 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9507 } else {
9508 V = Op;
9509 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9510 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9511 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9512 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9513 Mask, VL);
9514 }
9515 }
9516 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9517 Mask, VL);
9518}
9519
9521 SDLoc dl(Node);
9522 EVT VT = Node->getValueType(0);
9523 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9524 SDValue Op = Node->getOperand(0);
9525 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9526
9527 // If the non-ZERO_UNDEF version is supported we can use that instead.
9528 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9530 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9531
9532 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9534 EVT SetCCVT =
9535 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9536 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9537 SDValue Zero = DAG.getConstant(0, dl, VT);
9538 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9539 return DAG.getSelect(dl, VT, SrcIsZero,
9540 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9541 }
9542
9543 // Only expand vector types if we have the appropriate vector bit operations.
9544 // This includes the operations needed to expand CTPOP if it isn't supported.
9545 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9547 !canExpandVectorCTPOP(*this, VT)) ||
9550 return SDValue();
9551
9552 // for now, we do this:
9553 // x = x | (x >> 1);
9554 // x = x | (x >> 2);
9555 // ...
9556 // x = x | (x >>16);
9557 // x = x | (x >>32); // for 64-bit input
9558 // return popcount(~x);
9559 //
9560 // Ref: "Hacker's Delight" by Henry Warren
9561 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9562 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9563 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9564 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9565 }
9566 Op = DAG.getNOT(dl, Op, VT);
9567 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9568}
9569
9571 SDLoc dl(Node);
9572 EVT VT = Node->getValueType(0);
9573 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9574 SDValue Op = Node->getOperand(0);
9575 SDValue Mask = Node->getOperand(1);
9576 SDValue VL = Node->getOperand(2);
9577 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9578
9579 // do this:
9580 // x = x | (x >> 1);
9581 // x = x | (x >> 2);
9582 // ...
9583 // x = x | (x >>16);
9584 // x = x | (x >>32); // for 64-bit input
9585 // return popcount(~x);
9586 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9587 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9588 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9589 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9590 VL);
9591 }
9592 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9593 Mask, VL);
9594 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9595}
9596
9598 const SDLoc &DL, EVT VT, SDValue Op,
9599 unsigned BitWidth) const {
9600 if (BitWidth != 32 && BitWidth != 64)
9601 return SDValue();
9602 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9603 : APInt(64, 0x0218A392CD3D5DBFULL);
9604 const DataLayout &TD = DAG.getDataLayout();
9605 MachinePointerInfo PtrInfo =
9607 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9608 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9609 SDValue Lookup = DAG.getNode(
9610 ISD::SRL, DL, VT,
9611 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9612 DAG.getConstant(DeBruijn, DL, VT)),
9613 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9615
9617 for (unsigned i = 0; i < BitWidth; i++) {
9618 APInt Shl = DeBruijn.shl(i);
9619 APInt Lshr = Shl.lshr(ShiftAmt);
9620 Table[Lshr.getZExtValue()] = i;
9621 }
9622
9623 // Create a ConstantArray in Constant Pool
9624 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9625 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9626 TD.getPrefTypeAlign(CA->getType()));
9627 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9628 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9629 PtrInfo, MVT::i8);
9630 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9631 return ExtLoad;
9632
9633 EVT SetCCVT =
9634 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9635 SDValue Zero = DAG.getConstant(0, DL, VT);
9636 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9637 return DAG.getSelect(DL, VT, SrcIsZero,
9638 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9639}
9640
9642 SDLoc dl(Node);
9643 EVT VT = Node->getValueType(0);
9644 SDValue Op = Node->getOperand(0);
9645 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9646
9647 // If the non-ZERO_UNDEF version is supported we can use that instead.
9648 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9650 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9651
9652 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9654 EVT SetCCVT =
9655 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9656 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9657 SDValue Zero = DAG.getConstant(0, dl, VT);
9658 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9659 return DAG.getSelect(dl, VT, SrcIsZero,
9660 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9661 }
9662
9663 // Only expand vector types if we have the appropriate vector bit operations.
9664 // This includes the operations needed to expand CTPOP if it isn't supported.
9665 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9668 !canExpandVectorCTPOP(*this, VT)) ||
9672 return SDValue();
9673
9674 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9675 // to be expanded or converted to a libcall.
9678 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9679 return V;
9680
9681 // for now, we use: { return popcount(~x & (x - 1)); }
9682 // unless the target has ctlz but not ctpop, in which case we use:
9683 // { return 32 - nlz(~x & (x-1)); }
9684 // Ref: "Hacker's Delight" by Henry Warren
9685 SDValue Tmp = DAG.getNode(
9686 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9687 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9688
9689 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9691 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9692 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9693 }
9694
9695 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9696}
9697
9699 SDValue Op = Node->getOperand(0);
9700 SDValue Mask = Node->getOperand(1);
9701 SDValue VL = Node->getOperand(2);
9702 SDLoc dl(Node);
9703 EVT VT = Node->getValueType(0);
9704
9705 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9706 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9707 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9708 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9709 DAG.getConstant(1, dl, VT), Mask, VL);
9710 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9711 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9712}
9713
9715 SelectionDAG &DAG) const {
9716 // %cond = to_bool_vec %source
9717 // %splat = splat /*val=*/VL
9718 // %tz = step_vector
9719 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9720 // %r = vp.reduce.umin %v
9721 SDLoc DL(N);
9722 SDValue Source = N->getOperand(0);
9723 SDValue Mask = N->getOperand(1);
9724 SDValue EVL = N->getOperand(2);
9725 EVT SrcVT = Source.getValueType();
9726 EVT ResVT = N->getValueType(0);
9727 EVT ResVecVT =
9728 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9729
9730 // Convert to boolean vector.
9731 if (SrcVT.getScalarType() != MVT::i1) {
9732 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9733 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9734 SrcVT.getVectorElementCount());
9735 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9736 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9737 }
9738
9739 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9740 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9741 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9742 SDValue Select =
9743 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9744 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9745}
9746
9748 SelectionDAG &DAG) const {
9749 SDLoc DL(N);
9750 SDValue Mask = N->getOperand(0);
9751 EVT MaskVT = Mask.getValueType();
9752 EVT BoolVT = MaskVT.getScalarType();
9753
9754 // Find a suitable type for a stepvector.
9755 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9756 if (MaskVT.isScalableVector())
9757 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9758 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9759 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
9760 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9761 /*ZeroIsPoison=*/true, &VScaleRange);
9762 // If the step vector element type is smaller than the mask element type,
9763 // use the mask type directly to avoid widening issues.
9764 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
9765 EVT StepVT = MVT::getIntegerVT(EltWidth);
9766 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
9767
9768 // If promotion or widening is required to make the type legal, do it here.
9769 // Promotion of integers within LegalizeVectorOps is looking for types of
9770 // the same size but with a smaller number of larger elements, not the usual
9771 // larger size with the same number of larger elements.
9773 TLI.getTypeAction(StepVecVT.getSimpleVT());
9774 SDValue StepVec;
9775 if (TypeAction == TargetLowering::TypePromoteInteger) {
9776 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9777 StepVT = StepVecVT.getVectorElementType();
9778 StepVec = DAG.getStepVector(DL, StepVecVT);
9779 } else if (TypeAction == TargetLowering::TypeWidenVector) {
9780 // For widening, the element count changes. Create a step vector with only
9781 // the original elements valid and zeros for padding. Also widen the mask.
9782 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9783 unsigned WideNumElts = WideVecVT.getVectorNumElements();
9784
9785 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
9786 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
9787 SDValue UndefStep = DAG.getPOISON(WideVecVT);
9788 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
9789
9790 // Widen mask: pad with zeros.
9791 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
9792 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
9793 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
9794
9795 StepVecVT = WideVecVT;
9796 StepVT = WideVecVT.getVectorElementType();
9797 } else {
9798 StepVec = DAG.getStepVector(DL, StepVecVT);
9799 }
9800
9801 // Zero out lanes with inactive elements, then find the highest remaining
9802 // value from the stepvector.
9803 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9804 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9805 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9806 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9807}
9808
9810 bool IsNegative) const {
9811 SDLoc dl(N);
9812 EVT VT = N->getValueType(0);
9813 SDValue Op = N->getOperand(0);
9814
9815 // abs(x) -> smax(x,sub(0,x))
9816 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9818 SDValue Zero = DAG.getConstant(0, dl, VT);
9819 Op = DAG.getFreeze(Op);
9820 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9821 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9822 }
9823
9824 // abs(x) -> umin(x,sub(0,x))
9825 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9827 SDValue Zero = DAG.getConstant(0, dl, VT);
9828 Op = DAG.getFreeze(Op);
9829 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9830 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9831 }
9832
9833 // 0 - abs(x) -> smin(x, sub(0,x))
9834 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9836 SDValue Zero = DAG.getConstant(0, dl, VT);
9837 Op = DAG.getFreeze(Op);
9838 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9839 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9840 }
9841
9842 // Only expand vector types if we have the appropriate vector operations.
9843 if (VT.isVector() &&
9845 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9846 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9848 return SDValue();
9849
9850 Op = DAG.getFreeze(Op);
9851 SDValue Shift = DAG.getNode(
9852 ISD::SRA, dl, VT, Op,
9853 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9854 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9855
9856 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9857 if (!IsNegative)
9858 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9859
9860 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9861 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9862}
9863
9865 SDLoc dl(N);
9866 EVT VT = N->getValueType(0);
9867 SDValue LHS = N->getOperand(0);
9868 SDValue RHS = N->getOperand(1);
9869 bool IsSigned = N->getOpcode() == ISD::ABDS;
9870
9871 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9872 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9873 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9874 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9875 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9876 LHS = DAG.getFreeze(LHS);
9877 RHS = DAG.getFreeze(RHS);
9878 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9879 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9880 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9881 }
9882
9883 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9884 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
9885 LHS = DAG.getFreeze(LHS);
9886 RHS = DAG.getFreeze(RHS);
9887 return DAG.getNode(ISD::OR, dl, VT,
9888 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9889 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9890 }
9891
9892 // If the subtract doesn't overflow then just use abs(sub())
9893 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
9894
9895 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
9896 return DAG.getNode(ISD::ABS, dl, VT,
9897 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9898
9899 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
9900 return DAG.getNode(ISD::ABS, dl, VT,
9901 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9902
9903 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9905 LHS = DAG.getFreeze(LHS);
9906 RHS = DAG.getFreeze(RHS);
9907 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9908
9909 // Branchless expansion iff cmp result is allbits:
9910 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9911 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9912 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9913 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9914 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9915 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9916 }
9917
9918 // Similar to the branchless expansion, if we don't prefer selects, use the
9919 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
9920 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
9921 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
9922 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
9924 SDValue USubO =
9925 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9926 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9927 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9928 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9929 }
9930
9931 // FIXME: Should really try to split the vector in case it's legal on a
9932 // subvector.
9934 return DAG.UnrollVectorOp(N);
9935
9936 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9937 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9938 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9939 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9940}
9941
9943 SDLoc dl(N);
9944 EVT VT = N->getValueType(0);
9945 SDValue LHS = N->getOperand(0);
9946 SDValue RHS = N->getOperand(1);
9947
9948 unsigned Opc = N->getOpcode();
9949 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9950 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9951 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9952 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9953 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9954 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9956 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9957 "Unknown AVG node");
9958
9959 // If the operands are already extended, we can add+shift.
9960 bool IsExt =
9961 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9962 DAG.ComputeNumSignBits(RHS) >= 2) ||
9963 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9964 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9965 if (IsExt) {
9966 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9967 if (!IsFloor)
9968 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9969 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9970 DAG.getShiftAmountConstant(1, VT, dl));
9971 }
9972
9973 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9974 if (VT.isScalarInteger()) {
9975 unsigned BW = VT.getScalarSizeInBits();
9976 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9977 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9978 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9979 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9980 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9981 if (!IsFloor)
9982 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9983 DAG.getConstant(1, dl, ExtVT));
9984 // Just use SRL as we will be truncating away the extended sign bits.
9985 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9986 DAG.getShiftAmountConstant(1, ExtVT, dl));
9987 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9988 }
9989 }
9990
9991 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9992 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9993 SDValue UAddWithOverflow =
9994 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9995
9996 SDValue Sum = UAddWithOverflow.getValue(0);
9997 SDValue Overflow = UAddWithOverflow.getValue(1);
9998
9999 // Right shift the sum by 1
10000 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10001 DAG.getShiftAmountConstant(1, VT, dl));
10002
10003 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10004 SDValue OverflowShl = DAG.getNode(
10005 ISD::SHL, dl, VT, ZeroExtOverflow,
10006 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10007
10008 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10009 }
10010
10011 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10012 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10013 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10014 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10015 LHS = DAG.getFreeze(LHS);
10016 RHS = DAG.getFreeze(RHS);
10017 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10018 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10019 SDValue Shift =
10020 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10021 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10022}
10023
10025 SDLoc dl(N);
10026 EVT VT = N->getValueType(0);
10027 SDValue Op = N->getOperand(0);
10028
10029 if (!VT.isSimple())
10030 return SDValue();
10031
10032 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10033 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10034 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10035 default:
10036 return SDValue();
10037 case MVT::i16:
10038 // Use a rotate by 8. This can be further expanded if necessary.
10039 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10040 case MVT::i32:
10041 // This is meant for ARM speficially, which has ROTR but no ROTL.
10043 SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
10044 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
10045 SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
10046 SDValue Rotr =
10047 DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
10048 SDValue Rotl =
10049 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10050 SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
10051 return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
10052 }
10053 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10054 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10055 DAG.getConstant(0xFF00, dl, VT));
10056 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10057 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10058 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10059 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10060 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10061 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10062 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10063 case MVT::i64:
10064 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10065 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10066 DAG.getConstant(255ULL<<8, dl, VT));
10067 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10068 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10069 DAG.getConstant(255ULL<<16, dl, VT));
10070 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10071 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10072 DAG.getConstant(255ULL<<24, dl, VT));
10073 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10074 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10075 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10076 DAG.getConstant(255ULL<<24, dl, VT));
10077 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10078 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10079 DAG.getConstant(255ULL<<16, dl, VT));
10080 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10081 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10082 DAG.getConstant(255ULL<<8, dl, VT));
10083 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10084 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10085 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10086 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10087 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10088 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10089 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10090 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10091 }
10092}
10093
10095 SDLoc dl(N);
10096 EVT VT = N->getValueType(0);
10097 SDValue Op = N->getOperand(0);
10098 SDValue Mask = N->getOperand(1);
10099 SDValue EVL = N->getOperand(2);
10100
10101 if (!VT.isSimple())
10102 return SDValue();
10103
10104 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10105 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10106 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10107 default:
10108 return SDValue();
10109 case MVT::i16:
10110 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10111 Mask, EVL);
10112 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10113 Mask, EVL);
10114 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10115 case MVT::i32:
10116 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10117 Mask, EVL);
10118 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10119 Mask, EVL);
10120 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10121 Mask, EVL);
10122 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10123 Mask, EVL);
10124 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10125 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10126 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10127 Mask, EVL);
10128 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10129 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10130 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10131 case MVT::i64:
10132 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10133 Mask, EVL);
10134 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10135 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10136 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10137 Mask, EVL);
10138 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10139 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10140 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10141 Mask, EVL);
10142 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10143 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10144 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10145 Mask, EVL);
10146 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10147 Mask, EVL);
10148 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10149 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10150 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10151 Mask, EVL);
10152 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10153 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10154 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10155 Mask, EVL);
10156 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10157 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10158 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10159 Mask, EVL);
10160 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10161 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10162 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10163 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10164 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10165 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10166 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10167 }
10168}
10169
10171 SDLoc dl(N);
10172 EVT VT = N->getValueType(0);
10173 SDValue Op = N->getOperand(0);
10174 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10175 unsigned Sz = VT.getScalarSizeInBits();
10176
10177 SDValue Tmp, Tmp2, Tmp3;
10178
10179 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10180 // and finally the i1 pairs.
10181 // TODO: We can easily support i4/i2 legal types if any target ever does.
10182 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10183 // Create the masks - repeating the pattern every byte.
10184 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10185 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10186 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10187
10188 // BSWAP if the type is wider than a single byte.
10189 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10190
10191 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10192 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10193 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10194 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10195 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10196 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10197
10198 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10199 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10200 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10201 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10202 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10203 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10204
10205 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10206 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10207 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10208 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10209 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10210 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10211 return Tmp;
10212 }
10213
10214 Tmp = DAG.getConstant(0, dl, VT);
10215 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10216 if (I < J)
10217 Tmp2 =
10218 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10219 else
10220 Tmp2 =
10221 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10222
10223 APInt Shift = APInt::getOneBitSet(Sz, J);
10224 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10225 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10226 }
10227
10228 return Tmp;
10229}
10230
10232 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10233
10234 SDLoc dl(N);
10235 EVT VT = N->getValueType(0);
10236 SDValue Op = N->getOperand(0);
10237 SDValue Mask = N->getOperand(1);
10238 SDValue EVL = N->getOperand(2);
10239 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10240 unsigned Sz = VT.getScalarSizeInBits();
10241
10242 SDValue Tmp, Tmp2, Tmp3;
10243
10244 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10245 // and finally the i1 pairs.
10246 // TODO: We can easily support i4/i2 legal types if any target ever does.
10247 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10248 // Create the masks - repeating the pattern every byte.
10249 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10250 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10251 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10252
10253 // BSWAP if the type is wider than a single byte.
10254 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10255
10256 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10257 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10258 Mask, EVL);
10259 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10260 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10261 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10262 Mask, EVL);
10263 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10264 Mask, EVL);
10265 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10266
10267 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10268 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10269 Mask, EVL);
10270 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10271 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10272 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10273 Mask, EVL);
10274 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10275 Mask, EVL);
10276 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10277
10278 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10279 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10280 Mask, EVL);
10281 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10282 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10283 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10284 Mask, EVL);
10285 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10286 Mask, EVL);
10287 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10288 return Tmp;
10289 }
10290 return SDValue();
10291}
10292
10293std::pair<SDValue, SDValue>
10295 SelectionDAG &DAG) const {
10296 SDLoc SL(LD);
10297 SDValue Chain = LD->getChain();
10298 SDValue BasePTR = LD->getBasePtr();
10299 EVT SrcVT = LD->getMemoryVT();
10300 EVT DstVT = LD->getValueType(0);
10301 ISD::LoadExtType ExtType = LD->getExtensionType();
10302
10303 if (SrcVT.isScalableVector())
10304 report_fatal_error("Cannot scalarize scalable vector loads");
10305
10306 unsigned NumElem = SrcVT.getVectorNumElements();
10307
10308 EVT SrcEltVT = SrcVT.getScalarType();
10309 EVT DstEltVT = DstVT.getScalarType();
10310
10311 // A vector must always be stored in memory as-is, i.e. without any padding
10312 // between the elements, since various code depend on it, e.g. in the
10313 // handling of a bitcast of a vector type to int, which may be done with a
10314 // vector store followed by an integer load. A vector that does not have
10315 // elements that are byte-sized must therefore be stored as an integer
10316 // built out of the extracted vector elements.
10317 if (!SrcEltVT.isByteSized()) {
10318 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10319 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10320
10321 unsigned NumSrcBits = SrcVT.getSizeInBits();
10322 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10323
10324 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10325 SDValue SrcEltBitMask = DAG.getConstant(
10326 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10327
10328 // Load the whole vector and avoid masking off the top bits as it makes
10329 // the codegen worse.
10330 SDValue Load =
10331 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10332 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10333 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10334
10336 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10337 unsigned ShiftIntoIdx =
10338 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10339 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10340 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10341 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10342 SDValue Elt =
10343 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10344 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10345
10346 if (ExtType != ISD::NON_EXTLOAD) {
10347 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10348 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10349 }
10350
10351 Vals.push_back(Scalar);
10352 }
10353
10354 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10355 return std::make_pair(Value, Load.getValue(1));
10356 }
10357
10358 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10359 assert(SrcEltVT.isByteSized());
10360
10362 SmallVector<SDValue, 8> LoadChains;
10363
10364 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10365 SDValue ScalarLoad = DAG.getExtLoad(
10366 ExtType, SL, DstEltVT, Chain, BasePTR,
10367 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10368 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10369
10370 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10371
10372 Vals.push_back(ScalarLoad.getValue(0));
10373 LoadChains.push_back(ScalarLoad.getValue(1));
10374 }
10375
10376 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10377 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10378
10379 return std::make_pair(Value, NewChain);
10380}
10381
10383 SelectionDAG &DAG) const {
10384 SDLoc SL(ST);
10385
10386 SDValue Chain = ST->getChain();
10387 SDValue BasePtr = ST->getBasePtr();
10388 SDValue Value = ST->getValue();
10389 EVT StVT = ST->getMemoryVT();
10390
10391 if (StVT.isScalableVector())
10392 report_fatal_error("Cannot scalarize scalable vector stores");
10393
10394 // The type of the data we want to save
10395 EVT RegVT = Value.getValueType();
10396 EVT RegSclVT = RegVT.getScalarType();
10397
10398 // The type of data as saved in memory.
10399 EVT MemSclVT = StVT.getScalarType();
10400
10401 unsigned NumElem = StVT.getVectorNumElements();
10402
10403 // A vector must always be stored in memory as-is, i.e. without any padding
10404 // between the elements, since various code depend on it, e.g. in the
10405 // handling of a bitcast of a vector type to int, which may be done with a
10406 // vector store followed by an integer load. A vector that does not have
10407 // elements that are byte-sized must therefore be stored as an integer
10408 // built out of the extracted vector elements.
10409 if (!MemSclVT.isByteSized()) {
10410 unsigned NumBits = StVT.getSizeInBits();
10411 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10412
10413 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10414
10415 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10416 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10417 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10418 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10419 unsigned ShiftIntoIdx =
10420 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10421 SDValue ShiftAmount =
10422 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10423 SDValue ShiftedElt =
10424 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10425 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10426 }
10427
10428 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10429 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10430 ST->getAAInfo());
10431 }
10432
10433 // Store Stride in bytes
10434 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10435 assert(Stride && "Zero stride!");
10436 // Extract each of the elements from the original vector and save them into
10437 // memory individually.
10439 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10440 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10441
10442 SDValue Ptr =
10443 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10444
10445 // This scalar TruncStore may be illegal, but we legalize it later.
10446 SDValue Store = DAG.getTruncStore(
10447 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10448 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10449 ST->getAAInfo());
10450
10451 Stores.push_back(Store);
10452 }
10453
10454 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10455}
10456
10457std::pair<SDValue, SDValue>
10459 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10460 "unaligned indexed loads not implemented!");
10461 SDValue Chain = LD->getChain();
10462 SDValue Ptr = LD->getBasePtr();
10463 EVT VT = LD->getValueType(0);
10464 EVT LoadedVT = LD->getMemoryVT();
10465 SDLoc dl(LD);
10466 auto &MF = DAG.getMachineFunction();
10467
10468 if (VT.isFloatingPoint() || VT.isVector()) {
10469 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10470 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10471 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10472 LoadedVT.isVector()) {
10473 // Scalarize the load and let the individual components be handled.
10474 return scalarizeVectorLoad(LD, DAG);
10475 }
10476
10477 // Expand to a (misaligned) integer load of the same size,
10478 // then bitconvert to floating point or vector.
10479 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10480 LD->getMemOperand());
10481 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10482 if (LoadedVT != VT)
10483 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10484 ISD::ANY_EXTEND, dl, VT, Result);
10485
10486 return std::make_pair(Result, newLoad.getValue(1));
10487 }
10488
10489 // Copy the value to a (aligned) stack slot using (unaligned) integer
10490 // loads and stores, then do a (aligned) load from the stack slot.
10491 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10492 unsigned LoadedBytes = LoadedVT.getStoreSize();
10493 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10494 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10495
10496 // Make sure the stack slot is also aligned for the register type.
10497 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10498 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10500 SDValue StackPtr = StackBase;
10501 unsigned Offset = 0;
10502
10503 EVT PtrVT = Ptr.getValueType();
10504 EVT StackPtrVT = StackPtr.getValueType();
10505
10506 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10507 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10508
10509 // Do all but one copies using the full register width.
10510 for (unsigned i = 1; i < NumRegs; i++) {
10511 // Load one integer register's worth from the original location.
10512 SDValue Load = DAG.getLoad(
10513 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10514 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10515 // Follow the load with a store to the stack slot. Remember the store.
10516 Stores.push_back(DAG.getStore(
10517 Load.getValue(1), dl, Load, StackPtr,
10518 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10519 // Increment the pointers.
10520 Offset += RegBytes;
10521
10522 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10523 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10524 }
10525
10526 // The last copy may be partial. Do an extending load.
10527 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10528 8 * (LoadedBytes - Offset));
10529 SDValue Load = DAG.getExtLoad(
10530 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10531 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10532 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10533 // Follow the load with a store to the stack slot. Remember the store.
10534 // On big-endian machines this requires a truncating store to ensure
10535 // that the bits end up in the right place.
10536 Stores.push_back(DAG.getTruncStore(
10537 Load.getValue(1), dl, Load, StackPtr,
10538 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10539
10540 // The order of the stores doesn't matter - say it with a TokenFactor.
10541 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10542
10543 // Finally, perform the original load only redirected to the stack slot.
10544 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10545 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10546 LoadedVT);
10547
10548 // Callers expect a MERGE_VALUES node.
10549 return std::make_pair(Load, TF);
10550 }
10551
10552 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10553 "Unaligned load of unsupported type.");
10554
10555 // Compute the new VT that is half the size of the old one. This is an
10556 // integer MVT.
10557 unsigned NumBits = LoadedVT.getSizeInBits();
10558 EVT NewLoadedVT;
10559 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10560 NumBits >>= 1;
10561
10562 Align Alignment = LD->getBaseAlign();
10563 unsigned IncrementSize = NumBits / 8;
10564 ISD::LoadExtType HiExtType = LD->getExtensionType();
10565
10566 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10567 if (HiExtType == ISD::NON_EXTLOAD)
10568 HiExtType = ISD::ZEXTLOAD;
10569
10570 // Load the value in two parts
10571 SDValue Lo, Hi;
10572 if (DAG.getDataLayout().isLittleEndian()) {
10573 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10574 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10575 LD->getAAInfo());
10576
10577 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10578 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10579 LD->getPointerInfo().getWithOffset(IncrementSize),
10580 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10581 LD->getAAInfo());
10582 } else {
10583 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10584 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10585 LD->getAAInfo());
10586
10587 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10588 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10589 LD->getPointerInfo().getWithOffset(IncrementSize),
10590 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10591 LD->getAAInfo());
10592 }
10593
10594 // aggregate the two parts
10595 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10596 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10597 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10598
10599 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10600 Hi.getValue(1));
10601
10602 return std::make_pair(Result, TF);
10603}
10604
10606 SelectionDAG &DAG) const {
10607 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10608 "unaligned indexed stores not implemented!");
10609 SDValue Chain = ST->getChain();
10610 SDValue Ptr = ST->getBasePtr();
10611 SDValue Val = ST->getValue();
10612 EVT VT = Val.getValueType();
10613 Align Alignment = ST->getBaseAlign();
10614 auto &MF = DAG.getMachineFunction();
10615 EVT StoreMemVT = ST->getMemoryVT();
10616
10617 SDLoc dl(ST);
10618 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10619 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10620 if (isTypeLegal(intVT)) {
10621 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10622 StoreMemVT.isVector()) {
10623 // Scalarize the store and let the individual components be handled.
10624 SDValue Result = scalarizeVectorStore(ST, DAG);
10625 return Result;
10626 }
10627 // Expand to a bitconvert of the value to the integer type of the
10628 // same size, then a (misaligned) int store.
10629 // FIXME: Does not handle truncating floating point stores!
10630 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10631 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10632 Alignment, ST->getMemOperand()->getFlags());
10633 return Result;
10634 }
10635 // Do a (aligned) store to a stack slot, then copy from the stack slot
10636 // to the final destination using (unaligned) integer loads and stores.
10637 MVT RegVT = getRegisterType(
10638 *DAG.getContext(),
10639 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10640 EVT PtrVT = Ptr.getValueType();
10641 unsigned StoredBytes = StoreMemVT.getStoreSize();
10642 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10643 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10644
10645 // Make sure the stack slot is also aligned for the register type.
10646 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10647 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10648
10649 // Perform the original store, only redirected to the stack slot.
10650 SDValue Store = DAG.getTruncStore(
10651 Chain, dl, Val, StackPtr,
10652 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10653
10654 EVT StackPtrVT = StackPtr.getValueType();
10655
10656 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10657 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10659 unsigned Offset = 0;
10660
10661 // Do all but one copies using the full register width.
10662 for (unsigned i = 1; i < NumRegs; i++) {
10663 // Load one integer register's worth from the stack slot.
10664 SDValue Load = DAG.getLoad(
10665 RegVT, dl, Store, StackPtr,
10666 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10667 // Store it to the final location. Remember the store.
10668 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10669 ST->getPointerInfo().getWithOffset(Offset),
10670 ST->getBaseAlign(),
10671 ST->getMemOperand()->getFlags()));
10672 // Increment the pointers.
10673 Offset += RegBytes;
10674 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10675 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10676 }
10677
10678 // The last store may be partial. Do a truncating store. On big-endian
10679 // machines this requires an extending load from the stack slot to ensure
10680 // that the bits are in the right place.
10681 EVT LoadMemVT =
10682 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10683
10684 // Load from the stack slot.
10685 SDValue Load = DAG.getExtLoad(
10686 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10687 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10688
10689 Stores.push_back(DAG.getTruncStore(
10690 Load.getValue(1), dl, Load, Ptr,
10691 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10692 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10693 // The order of the stores doesn't matter - say it with a TokenFactor.
10694 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10695 return Result;
10696 }
10697
10698 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10699 "Unaligned store of unknown type.");
10700 // Get the half-size VT
10701 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10702 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10703 unsigned IncrementSize = NumBits / 8;
10704
10705 // Divide the stored value in two parts.
10706 SDValue ShiftAmount =
10707 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10708 SDValue Lo = Val;
10709 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10710 // fold and not use the upper bits. A smaller constant may be easier to
10711 // materialize.
10712 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10713 Lo = DAG.getNode(
10714 ISD::AND, dl, VT, Lo,
10715 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10716 VT));
10717 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10718
10719 // Store the two parts
10720 SDValue Store1, Store2;
10721 Store1 = DAG.getTruncStore(Chain, dl,
10722 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10723 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10724 ST->getMemOperand()->getFlags());
10725
10726 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10727 Store2 = DAG.getTruncStore(
10728 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10729 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10730 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10731
10732 SDValue Result =
10733 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10734 return Result;
10735}
10736
10737SDValue
10739 const SDLoc &DL, EVT DataVT,
10740 SelectionDAG &DAG,
10741 bool IsCompressedMemory) const {
10743 EVT AddrVT = Addr.getValueType();
10744 EVT MaskVT = Mask.getValueType();
10745 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10746 "Incompatible types of Data and Mask");
10747 if (IsCompressedMemory) {
10748 // Incrementing the pointer according to number of '1's in the mask.
10749 if (DataVT.isScalableVector()) {
10750 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
10751 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
10752 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
10753 } else {
10754 EVT MaskIntVT =
10755 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10756 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10757 if (MaskIntVT.getSizeInBits() < 32) {
10758 MaskInIntReg =
10759 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10760 MaskIntVT = MVT::i32;
10761 }
10762 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10763 }
10764 // Scale is an element size in bytes.
10765 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10766 AddrVT);
10767 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10768 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10769 } else
10770 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
10771
10772 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10773}
10774
10776 EVT VecVT, const SDLoc &dl,
10777 ElementCount SubEC) {
10778 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10779 "Cannot index a scalable vector within a fixed-width vector");
10780
10781 unsigned NElts = VecVT.getVectorMinNumElements();
10782 unsigned NumSubElts = SubEC.getKnownMinValue();
10783 EVT IdxVT = Idx.getValueType();
10784
10785 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10786 // If this is a constant index and we know the value plus the number of the
10787 // elements in the subvector minus one is less than the minimum number of
10788 // elements then it's safe to return Idx.
10789 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10790 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10791 return Idx;
10792 SDValue VS =
10793 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10794 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10795 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10796 DAG.getConstant(NumSubElts, dl, IdxVT));
10797 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10798 }
10799 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10800 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10801 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10802 DAG.getConstant(Imm, dl, IdxVT));
10803 }
10804 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10805 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10806 DAG.getConstant(MaxIndex, dl, IdxVT));
10807}
10808
10809SDValue
10811 EVT VecVT, SDValue Index,
10812 const SDNodeFlags PtrArithFlags) const {
10814 DAG, VecPtr, VecVT,
10816 Index, PtrArithFlags);
10817}
10818
10819SDValue
10821 EVT VecVT, EVT SubVecVT, SDValue Index,
10822 const SDNodeFlags PtrArithFlags) const {
10823 SDLoc dl(Index);
10824 // Make sure the index type is big enough to compute in.
10825 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10826
10827 EVT EltVT = VecVT.getVectorElementType();
10828
10829 // Calculate the element offset and add it to the pointer.
10830 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10831 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10832 "Converting bits to bytes lost precision");
10833 assert(SubVecVT.getVectorElementType() == EltVT &&
10834 "Sub-vector must be a vector with matching element type");
10835 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10836 SubVecVT.getVectorElementCount());
10837
10838 EVT IdxVT = Index.getValueType();
10839 if (SubVecVT.isScalableVector())
10840 Index =
10841 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10842 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10843
10844 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10845 DAG.getConstant(EltSize, dl, IdxVT));
10846 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
10847}
10848
10849//===----------------------------------------------------------------------===//
10850// Implementation of Emulated TLS Model
10851//===----------------------------------------------------------------------===//
10852
10854 SelectionDAG &DAG) const {
10855 // Access to address of TLS varialbe xyz is lowered to a function call:
10856 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10857 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10858 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10859 SDLoc dl(GA);
10860
10861 ArgListTy Args;
10862 const GlobalValue *GV =
10864 SmallString<32> NameString("__emutls_v.");
10865 NameString += GV->getName();
10866 StringRef EmuTlsVarName(NameString);
10867 const GlobalVariable *EmuTlsVar =
10868 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10869 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10870 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10871
10872 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10873
10875 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10876 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10877 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10878
10879 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10880 // At last for X86 targets, maybe good for other targets too?
10882 MFI.setAdjustsStack(true); // Is this only for X86 target?
10883 MFI.setHasCalls(true);
10884
10885 assert((GA->getOffset() == 0) &&
10886 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10887 return CallResult.first;
10888}
10889
10891 SelectionDAG &DAG) const {
10892 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10893 if (!isCtlzFast())
10894 return SDValue();
10895 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10896 SDLoc dl(Op);
10897 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10898 EVT VT = Op.getOperand(0).getValueType();
10899 SDValue Zext = Op.getOperand(0);
10900 if (VT.bitsLT(MVT::i32)) {
10901 VT = MVT::i32;
10902 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10903 }
10904 unsigned Log2b = Log2_32(VT.getSizeInBits());
10905 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10906 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10907 DAG.getConstant(Log2b, dl, MVT::i32));
10908 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10909 }
10910 return SDValue();
10911}
10912
10914 SDValue Op0 = Node->getOperand(0);
10915 SDValue Op1 = Node->getOperand(1);
10916 EVT VT = Op0.getValueType();
10917 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10918 unsigned Opcode = Node->getOpcode();
10919 SDLoc DL(Node);
10920
10921 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
10922 unsigned AltOpcode;
10923 switch (Opcode) {
10924 case ISD::SMIN:
10925 AltOpcode = ISD::UMIN;
10926 break;
10927 case ISD::SMAX:
10928 AltOpcode = ISD::UMAX;
10929 break;
10930 case ISD::UMIN:
10931 AltOpcode = ISD::SMIN;
10932 break;
10933 case ISD::UMAX:
10934 AltOpcode = ISD::SMAX;
10935 break;
10936 default:
10937 llvm_unreachable("Unknown MINMAX opcode");
10938 }
10939 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
10940 DAG.SignBitIsZero(Op1))
10941 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
10942
10943 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10944 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10946 Op0 = DAG.getFreeze(Op0);
10947 SDValue Zero = DAG.getConstant(0, DL, VT);
10948 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10949 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10950 }
10951
10952 // umin(x,y) -> sub(x,usubsat(x,y))
10953 // TODO: Missing freeze(Op0)?
10954 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10956 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10957 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10958 }
10959
10960 // umax(x,y) -> add(x,usubsat(y,x))
10961 // TODO: Missing freeze(Op0)?
10962 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10964 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10965 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10966 }
10967
10968 // FIXME: Should really try to split the vector in case it's legal on a
10969 // subvector.
10971 return DAG.UnrollVectorOp(Node);
10972
10973 // Attempt to find an existing SETCC node that we can reuse.
10974 // TODO: Do we need a generic doesSETCCNodeExist?
10975 // TODO: Missing freeze(Op0)/freeze(Op1)?
10976 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10977 ISD::CondCode PrefCommuteCC,
10978 ISD::CondCode AltCommuteCC) {
10979 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10980 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10981 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10982 {Op0, Op1, DAG.getCondCode(CC)})) {
10983 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10984 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10985 }
10986 }
10987 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10988 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10989 {Op0, Op1, DAG.getCondCode(CC)})) {
10990 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10991 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10992 }
10993 }
10994 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10995 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10996 };
10997
10998 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10999 // -> Y = (A < B) ? B : A
11000 // -> Y = (A >= B) ? A : B
11001 // -> Y = (A <= B) ? B : A
11002 switch (Opcode) {
11003 case ISD::SMAX:
11004 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11005 case ISD::SMIN:
11006 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11007 case ISD::UMAX:
11008 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11009 case ISD::UMIN:
11010 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11011 }
11012
11013 llvm_unreachable("How did we get here?");
11014}
11015
11017 unsigned Opcode = Node->getOpcode();
11018 SDValue LHS = Node->getOperand(0);
11019 SDValue RHS = Node->getOperand(1);
11020 EVT VT = LHS.getValueType();
11021 SDLoc dl(Node);
11022
11023 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11024 assert(VT.isInteger() && "Expected operands to be integers");
11025
11026 // usub.sat(a, b) -> umax(a, b) - b
11027 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11028 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11029 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11030 }
11031
11032 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11033 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS)) {
11034 LHS = DAG.getFreeze(LHS);
11035 SDValue Zero = DAG.getConstant(0, dl, VT);
11036 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11037 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11038 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11039 Subtrahend =
11040 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11041 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11042 }
11043
11044 // uadd.sat(a, b) -> umin(a, ~b) + b
11045 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11046 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11047 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11048 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11049 }
11050
11051 unsigned OverflowOp;
11052 switch (Opcode) {
11053 case ISD::SADDSAT:
11054 OverflowOp = ISD::SADDO;
11055 break;
11056 case ISD::UADDSAT:
11057 OverflowOp = ISD::UADDO;
11058 break;
11059 case ISD::SSUBSAT:
11060 OverflowOp = ISD::SSUBO;
11061 break;
11062 case ISD::USUBSAT:
11063 OverflowOp = ISD::USUBO;
11064 break;
11065 default:
11066 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11067 "addition or subtraction node.");
11068 }
11069
11070 // FIXME: Should really try to split the vector in case it's legal on a
11071 // subvector.
11073 return DAG.UnrollVectorOp(Node);
11074
11075 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11076 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11077 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11078 SDValue SumDiff = Result.getValue(0);
11079 SDValue Overflow = Result.getValue(1);
11080 SDValue Zero = DAG.getConstant(0, dl, VT);
11081 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11082
11083 if (Opcode == ISD::UADDSAT) {
11085 // (LHS + RHS) | OverflowMask
11086 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11087 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11088 }
11089 // Overflow ? 0xffff.... : (LHS + RHS)
11090 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11091 }
11092
11093 if (Opcode == ISD::USUBSAT) {
11095 // (LHS - RHS) & ~OverflowMask
11096 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11097 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11098 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11099 }
11100 // Overflow ? 0 : (LHS - RHS)
11101 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11102 }
11103
11104 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
11107
11108 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11109 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11110
11111 // If either of the operand signs are known, then they are guaranteed to
11112 // only saturate in one direction. If non-negative they will saturate
11113 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11114 //
11115 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11116 // sign of 'y' has to be flipped.
11117
11118 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11119 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
11120 : KnownRHS.isNegative();
11121 if (LHSIsNonNegative || RHSIsNonNegative) {
11122 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11123 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11124 }
11125
11126 bool LHSIsNegative = KnownLHS.isNegative();
11127 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
11128 : KnownRHS.isNonNegative();
11129 if (LHSIsNegative || RHSIsNegative) {
11130 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11131 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11132 }
11133 }
11134
11135 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11137 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11138 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11139 DAG.getConstant(BitWidth - 1, dl, VT));
11140 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11141 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11142}
11143
11145 unsigned Opcode = Node->getOpcode();
11146 SDValue LHS = Node->getOperand(0);
11147 SDValue RHS = Node->getOperand(1);
11148 EVT VT = LHS.getValueType();
11149 EVT ResVT = Node->getValueType(0);
11150 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11151 SDLoc dl(Node);
11152
11153 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11154 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11155 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11156 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11157
11158 // We can't perform arithmetic on i1 values. Extending them would
11159 // probably result in worse codegen, so let's just use two selects instead.
11160 // Some targets are also just better off using selects rather than subtraction
11161 // because one of the conditions can be merged with one of the selects.
11162 // And finally, if we don't know the contents of high bits of a boolean value
11163 // we can't perform any arithmetic either.
11165 BoolVT.getScalarSizeInBits() == 1 ||
11167 SDValue SelectZeroOrOne =
11168 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
11169 DAG.getConstant(0, dl, ResVT));
11170 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11171 SelectZeroOrOne);
11172 }
11173
11175 std::swap(IsGT, IsLT);
11176 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
11177 ResVT);
11178}
11179
11181 unsigned Opcode = Node->getOpcode();
11182 bool IsSigned = Opcode == ISD::SSHLSAT;
11183 SDValue LHS = Node->getOperand(0);
11184 SDValue RHS = Node->getOperand(1);
11185 EVT VT = LHS.getValueType();
11186 SDLoc dl(Node);
11187
11188 assert((Node->getOpcode() == ISD::SSHLSAT ||
11189 Node->getOpcode() == ISD::USHLSAT) &&
11190 "Expected a SHLSAT opcode");
11191 assert(VT.isInteger() && "Expected operands to be integers");
11192
11194 return DAG.UnrollVectorOp(Node);
11195
11196 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11197
11198 unsigned BW = VT.getScalarSizeInBits();
11199 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11200 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11201 SDValue Orig =
11202 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11203
11204 SDValue SatVal;
11205 if (IsSigned) {
11206 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11207 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11208 SDValue Cond =
11209 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11210 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11211 } else {
11212 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11213 }
11214 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11215 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11216}
11217
11219 bool Signed, SDValue &Lo, SDValue &Hi,
11220 SDValue LHS, SDValue RHS,
11221 SDValue HiLHS, SDValue HiRHS) const {
11222 EVT VT = LHS.getValueType();
11223 assert(RHS.getValueType() == VT && "Mismatching operand types");
11224
11225 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11226 assert((!Signed || !HiLHS) &&
11227 "Signed flag should only be set when HiLHS and RiRHS are null");
11228
11229 // We'll expand the multiplication by brute force because we have no other
11230 // options. This is a trivially-generalized version of the code from
11231 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11232 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11233 // sign bits while calculating the Hi half.
11234 unsigned Bits = VT.getSizeInBits();
11235 unsigned HalfBits = Bits / 2;
11236 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11237 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11238 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11239
11240 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11241 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11242
11243 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11244 // This is always an unsigned shift.
11245 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11246
11247 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11248 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11249 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11250
11251 SDValue U =
11252 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11253 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11254 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11255
11256 SDValue V =
11257 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11258 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11259
11260 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11261 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11262
11263 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11264 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11265
11266 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11267 // the products to Hi.
11268 if (HiLHS) {
11269 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11270 DAG.getNode(ISD::ADD, dl, VT,
11271 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11272 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11273 }
11274}
11275
11277 bool Signed, const SDValue LHS,
11278 const SDValue RHS, SDValue &Lo,
11279 SDValue &Hi) const {
11280 EVT VT = LHS.getValueType();
11281 assert(RHS.getValueType() == VT && "Mismatching operand types");
11282 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11283 // We can fall back to a libcall with an illegal type for the MUL if we
11284 // have a libcall big enough.
11285 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11286 if (WideVT == MVT::i16)
11287 LC = RTLIB::MUL_I16;
11288 else if (WideVT == MVT::i32)
11289 LC = RTLIB::MUL_I32;
11290 else if (WideVT == MVT::i64)
11291 LC = RTLIB::MUL_I64;
11292 else if (WideVT == MVT::i128)
11293 LC = RTLIB::MUL_I128;
11294
11295 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
11296 if (LibcallImpl == RTLIB::Unsupported) {
11297 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11298 return;
11299 }
11300
11301 SDValue HiLHS, HiRHS;
11302 if (Signed) {
11303 // The high part is obtained by SRA'ing all but one of the bits of low
11304 // part.
11305 unsigned LoSize = VT.getFixedSizeInBits();
11306 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11307 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11308 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11309 } else {
11310 HiLHS = DAG.getConstant(0, dl, VT);
11311 HiRHS = DAG.getConstant(0, dl, VT);
11312 }
11313
11314 // Attempt a libcall.
11315 SDValue Ret;
11317 CallOptions.setIsSigned(Signed);
11318 CallOptions.setIsPostTypeLegalization(true);
11320 // Halves of WideVT are packed into registers in different order
11321 // depending on platform endianness. This is usually handled by
11322 // the C calling convention, but we can't defer to it in
11323 // the legalizer.
11324 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11325 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11326 } else {
11327 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11328 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11329 }
11331 "Ret value is a collection of constituent nodes holding result.");
11332 if (DAG.getDataLayout().isLittleEndian()) {
11333 // Same as above.
11334 Lo = Ret.getOperand(0);
11335 Hi = Ret.getOperand(1);
11336 } else {
11337 Lo = Ret.getOperand(1);
11338 Hi = Ret.getOperand(0);
11339 }
11340}
11341
11342SDValue
11344 assert((Node->getOpcode() == ISD::SMULFIX ||
11345 Node->getOpcode() == ISD::UMULFIX ||
11346 Node->getOpcode() == ISD::SMULFIXSAT ||
11347 Node->getOpcode() == ISD::UMULFIXSAT) &&
11348 "Expected a fixed point multiplication opcode");
11349
11350 SDLoc dl(Node);
11351 SDValue LHS = Node->getOperand(0);
11352 SDValue RHS = Node->getOperand(1);
11353 EVT VT = LHS.getValueType();
11354 unsigned Scale = Node->getConstantOperandVal(2);
11355 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11356 Node->getOpcode() == ISD::UMULFIXSAT);
11357 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11358 Node->getOpcode() == ISD::SMULFIXSAT);
11359 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11360 unsigned VTSize = VT.getScalarSizeInBits();
11361
11362 if (!Scale) {
11363 // [us]mul.fix(a, b, 0) -> mul(a, b)
11364 if (!Saturating) {
11366 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11367 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11368 SDValue Result =
11369 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11370 SDValue Product = Result.getValue(0);
11371 SDValue Overflow = Result.getValue(1);
11372 SDValue Zero = DAG.getConstant(0, dl, VT);
11373
11374 APInt MinVal = APInt::getSignedMinValue(VTSize);
11375 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11376 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11377 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11378 // Xor the inputs, if resulting sign bit is 0 the product will be
11379 // positive, else negative.
11380 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11381 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11382 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11383 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11384 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11385 SDValue Result =
11386 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11387 SDValue Product = Result.getValue(0);
11388 SDValue Overflow = Result.getValue(1);
11389
11390 APInt MaxVal = APInt::getMaxValue(VTSize);
11391 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11392 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11393 }
11394 }
11395
11396 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11397 "Expected scale to be less than the number of bits if signed or at "
11398 "most the number of bits if unsigned.");
11399 assert(LHS.getValueType() == RHS.getValueType() &&
11400 "Expected both operands to be the same type");
11401
11402 // Get the upper and lower bits of the result.
11403 SDValue Lo, Hi;
11404 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11405 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11406 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11407 if (VT.isVector())
11408 WideVT =
11410 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11411 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11412 Lo = Result.getValue(0);
11413 Hi = Result.getValue(1);
11414 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11415 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11416 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11417 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11418 // Try for a multiplication using a wider type.
11419 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11420 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11421 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11422 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11423 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11424 SDValue Shifted =
11425 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11426 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11427 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11428 } else if (VT.isVector()) {
11429 return SDValue();
11430 } else {
11431 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11432 }
11433
11434 if (Scale == VTSize)
11435 // Result is just the top half since we'd be shifting by the width of the
11436 // operand. Overflow impossible so this works for both UMULFIX and
11437 // UMULFIXSAT.
11438 return Hi;
11439
11440 // The result will need to be shifted right by the scale since both operands
11441 // are scaled. The result is given to us in 2 halves, so we only want part of
11442 // both in the result.
11443 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11444 DAG.getShiftAmountConstant(Scale, VT, dl));
11445 if (!Saturating)
11446 return Result;
11447
11448 if (!Signed) {
11449 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11450 // widened multiplication) aren't all zeroes.
11451
11452 // Saturate to max if ((Hi >> Scale) != 0),
11453 // which is the same as if (Hi > ((1 << Scale) - 1))
11454 APInt MaxVal = APInt::getMaxValue(VTSize);
11455 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11456 dl, VT);
11457 Result = DAG.getSelectCC(dl, Hi, LowMask,
11458 DAG.getConstant(MaxVal, dl, VT), Result,
11459 ISD::SETUGT);
11460
11461 return Result;
11462 }
11463
11464 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11465 // widened multiplication) aren't all ones or all zeroes.
11466
11467 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11468 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11469
11470 if (Scale == 0) {
11471 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11472 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11473 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11474 // Saturated to SatMin if wide product is negative, and SatMax if wide
11475 // product is positive ...
11476 SDValue Zero = DAG.getConstant(0, dl, VT);
11477 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11478 ISD::SETLT);
11479 // ... but only if we overflowed.
11480 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11481 }
11482
11483 // We handled Scale==0 above so all the bits to examine is in Hi.
11484
11485 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11486 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11487 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11488 dl, VT);
11489 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11490 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11491 // which is the same as if (HI < (-1 << (Scale - 1))
11492 SDValue HighMask =
11493 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11494 dl, VT);
11495 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11496 return Result;
11497}
11498
11499SDValue
11501 SDValue LHS, SDValue RHS,
11502 unsigned Scale, SelectionDAG &DAG) const {
11503 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11504 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11505 "Expected a fixed point division opcode");
11506
11507 EVT VT = LHS.getValueType();
11508 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11509 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11510 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11511
11512 // If there is enough room in the type to upscale the LHS or downscale the
11513 // RHS before the division, we can perform it in this type without having to
11514 // resize. For signed operations, the LHS headroom is the number of
11515 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11516 // The headroom for the RHS is the number of trailing zeroes.
11517 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11519 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11520
11521 // For signed saturating operations, we need to be able to detect true integer
11522 // division overflow; that is, when you have MIN / -EPS. However, this
11523 // is undefined behavior and if we emit divisions that could take such
11524 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11525 // example).
11526 // Avoid this by requiring an extra bit so that we never get this case.
11527 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11528 // signed saturating division, we need to emit a whopping 32-bit division.
11529 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11530 return SDValue();
11531
11532 unsigned LHSShift = std::min(LHSLead, Scale);
11533 unsigned RHSShift = Scale - LHSShift;
11534
11535 // At this point, we know that if we shift the LHS up by LHSShift and the
11536 // RHS down by RHSShift, we can emit a regular division with a final scaling
11537 // factor of Scale.
11538
11539 if (LHSShift)
11540 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11541 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11542 if (RHSShift)
11543 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11544 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11545
11546 SDValue Quot;
11547 if (Signed) {
11548 // For signed operations, if the resulting quotient is negative and the
11549 // remainder is nonzero, subtract 1 from the quotient to round towards
11550 // negative infinity.
11551 SDValue Rem;
11552 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11553 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11554 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11555 if (isTypeLegal(VT) &&
11557 Quot = DAG.getNode(ISD::SDIVREM, dl,
11558 DAG.getVTList(VT, VT),
11559 LHS, RHS);
11560 Rem = Quot.getValue(1);
11561 Quot = Quot.getValue(0);
11562 } else {
11563 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11564 LHS, RHS);
11565 Rem = DAG.getNode(ISD::SREM, dl, VT,
11566 LHS, RHS);
11567 }
11568 SDValue Zero = DAG.getConstant(0, dl, VT);
11569 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11570 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11571 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11572 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11573 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11574 DAG.getConstant(1, dl, VT));
11575 Quot = DAG.getSelect(dl, VT,
11576 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11577 Sub1, Quot);
11578 } else
11579 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11580 LHS, RHS);
11581
11582 return Quot;
11583}
11584
11586 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11587 SDLoc dl(Node);
11588 SDValue LHS = Node->getOperand(0);
11589 SDValue RHS = Node->getOperand(1);
11590 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11591
11592 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11593 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11594 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11595 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11596 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11597 { LHS, RHS, CarryIn });
11598 Result = SDValue(NodeCarry.getNode(), 0);
11599 Overflow = SDValue(NodeCarry.getNode(), 1);
11600 return;
11601 }
11602
11603 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11604 LHS.getValueType(), LHS, RHS);
11605
11606 EVT ResultType = Node->getValueType(1);
11607 EVT SetCCType = getSetCCResultType(
11608 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11609 SDValue SetCC;
11610 if (IsAdd && isOneConstant(RHS)) {
11611 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11612 // the live range of X. We assume comparing with 0 is cheap.
11613 // The general case (X + C) < C is not necessarily beneficial. Although we
11614 // reduce the live range of X, we may introduce the materialization of
11615 // constant C.
11616 SetCC =
11617 DAG.getSetCC(dl, SetCCType, Result,
11618 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11619 } else if (IsAdd && isAllOnesConstant(RHS)) {
11620 // Special case: uaddo X, -1 overflows if X != 0.
11621 SetCC =
11622 DAG.getSetCC(dl, SetCCType, LHS,
11623 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11624 } else {
11625 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11626 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11627 }
11628 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11629}
11630
11632 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11633 SDLoc dl(Node);
11634 SDValue LHS = Node->getOperand(0);
11635 SDValue RHS = Node->getOperand(1);
11636 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11637
11638 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11639 LHS.getValueType(), LHS, RHS);
11640
11641 EVT ResultType = Node->getValueType(1);
11642 EVT OType = getSetCCResultType(
11643 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11644
11645 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11646 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11647 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11648 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11649 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11650 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11651 return;
11652 }
11653
11654 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11655
11656 // For an addition, the result should be less than one of the operands (LHS)
11657 // if and only if the other operand (RHS) is negative, otherwise there will
11658 // be overflow.
11659 // For a subtraction, the result should be less than one of the operands
11660 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11661 // otherwise there will be overflow.
11662 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11663 SDValue ConditionRHS =
11664 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11665
11666 Overflow = DAG.getBoolExtOrTrunc(
11667 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11668 ResultType, ResultType);
11669}
11670
11672 SDValue &Overflow, SelectionDAG &DAG) const {
11673 SDLoc dl(Node);
11674 EVT VT = Node->getValueType(0);
11675 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11676 SDValue LHS = Node->getOperand(0);
11677 SDValue RHS = Node->getOperand(1);
11678 bool isSigned = Node->getOpcode() == ISD::SMULO;
11679
11680 // For power-of-two multiplications we can use a simpler shift expansion.
11681 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11682 const APInt &C = RHSC->getAPIntValue();
11683 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11684 if (C.isPowerOf2()) {
11685 // smulo(x, signed_min) is same as umulo(x, signed_min).
11686 bool UseArithShift = isSigned && !C.isMinSignedValue();
11687 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11688 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11689 Overflow = DAG.getSetCC(dl, SetCCVT,
11690 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11691 dl, VT, Result, ShiftAmt),
11692 LHS, ISD::SETNE);
11693 return true;
11694 }
11695 }
11696
11697 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11698 if (VT.isVector())
11699 WideVT =
11701
11702 SDValue BottomHalf;
11703 SDValue TopHalf;
11704 static const unsigned Ops[2][3] =
11707 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11708 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11709 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11710 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11711 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11712 RHS);
11713 TopHalf = BottomHalf.getValue(1);
11714 } else if (isTypeLegal(WideVT)) {
11715 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11716 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11717 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11718 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11719 SDValue ShiftAmt =
11720 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11721 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11722 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11723 } else {
11724 if (VT.isVector())
11725 return false;
11726
11727 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11728 }
11729
11730 Result = BottomHalf;
11731 if (isSigned) {
11732 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11733 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11734 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11735 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11736 } else {
11737 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11738 DAG.getConstant(0, dl, VT), ISD::SETNE);
11739 }
11740
11741 // Truncate the result if SetCC returns a larger type than needed.
11742 EVT RType = Node->getValueType(1);
11743 if (RType.bitsLT(Overflow.getValueType()))
11744 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11745
11746 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11747 "Unexpected result type for S/UMULO legalization");
11748 return true;
11749}
11750
11752 SDLoc dl(Node);
11753 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11754 SDValue Op = Node->getOperand(0);
11755 EVT VT = Op.getValueType();
11756
11757 // Try to use a shuffle reduction for power of two vectors.
11758 if (VT.isPow2VectorType()) {
11760 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11761 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11762 break;
11763
11764 SDValue Lo, Hi;
11765 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11766 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11767 VT = HalfVT;
11768
11769 // Stop if splitting is enough to make the reduction legal.
11770 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11771 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11772 Node->getFlags());
11773 }
11774 }
11775
11776 if (VT.isScalableVector())
11778 "Expanding reductions for scalable vectors is undefined.");
11779
11780 EVT EltVT = VT.getVectorElementType();
11781 unsigned NumElts = VT.getVectorNumElements();
11782
11784 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11785
11786 SDValue Res = Ops[0];
11787 for (unsigned i = 1; i < NumElts; i++)
11788 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11789
11790 // Result type may be wider than element type.
11791 if (EltVT != Node->getValueType(0))
11792 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11793 return Res;
11794}
11795
11797 SDLoc dl(Node);
11798 SDValue AccOp = Node->getOperand(0);
11799 SDValue VecOp = Node->getOperand(1);
11800 SDNodeFlags Flags = Node->getFlags();
11801
11802 EVT VT = VecOp.getValueType();
11803 EVT EltVT = VT.getVectorElementType();
11804
11805 if (VT.isScalableVector())
11807 "Expanding reductions for scalable vectors is undefined.");
11808
11809 unsigned NumElts = VT.getVectorNumElements();
11810
11812 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11813
11814 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11815
11816 SDValue Res = AccOp;
11817 for (unsigned i = 0; i < NumElts; i++)
11818 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11819
11820 return Res;
11821}
11822
11824 SelectionDAG &DAG) const {
11825 EVT VT = Node->getValueType(0);
11826 SDLoc dl(Node);
11827 bool isSigned = Node->getOpcode() == ISD::SREM;
11828 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11829 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11830 SDValue Dividend = Node->getOperand(0);
11831 SDValue Divisor = Node->getOperand(1);
11832 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11833 SDVTList VTs = DAG.getVTList(VT, VT);
11834 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11835 return true;
11836 }
11837 if (isOperationLegalOrCustom(DivOpc, VT)) {
11838 // X % Y -> X-X/Y*Y
11839 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11840 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11841 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11842 return true;
11843 }
11844 return false;
11845}
11846
11848 SelectionDAG &DAG) const {
11849 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11850 SDLoc dl(SDValue(Node, 0));
11851 SDValue Src = Node->getOperand(0);
11852
11853 // DstVT is the result type, while SatVT is the size to which we saturate
11854 EVT SrcVT = Src.getValueType();
11855 EVT DstVT = Node->getValueType(0);
11856
11857 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11858 unsigned SatWidth = SatVT.getScalarSizeInBits();
11859 unsigned DstWidth = DstVT.getScalarSizeInBits();
11860 assert(SatWidth <= DstWidth &&
11861 "Expected saturation width smaller than result width");
11862
11863 // Determine minimum and maximum integer values and their corresponding
11864 // floating-point values.
11865 APInt MinInt, MaxInt;
11866 if (IsSigned) {
11867 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11868 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11869 } else {
11870 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11871 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11872 }
11873
11874 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11875 // libcall emission cannot handle this. Large result types will fail.
11876 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11877 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11878 SrcVT = Src.getValueType();
11879 }
11880
11881 const fltSemantics &Sem = SrcVT.getFltSemantics();
11882 APFloat MinFloat(Sem);
11883 APFloat MaxFloat(Sem);
11884
11885 APFloat::opStatus MinStatus =
11886 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11887 APFloat::opStatus MaxStatus =
11888 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11889 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11890 !(MaxStatus & APFloat::opStatus::opInexact);
11891
11892 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11893 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11894
11895 // If the integer bounds are exactly representable as floats and min/max are
11896 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11897 // of comparisons and selects.
11898 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11900 if (AreExactFloatBounds && MinMaxLegal) {
11901 SDValue Clamped = Src;
11902
11903 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11904 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11905 // Clamp by MaxFloat from above. NaN cannot occur.
11906 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11907 // Convert clamped value to integer.
11908 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11909 dl, DstVT, Clamped);
11910
11911 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11912 // which will cast to zero.
11913 if (!IsSigned)
11914 return FpToInt;
11915
11916 // Otherwise, select 0 if Src is NaN.
11917 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11918 EVT SetCCVT =
11919 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11920 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11921 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11922 }
11923
11924 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11925 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11926
11927 // Result of direct conversion. The assumption here is that the operation is
11928 // non-trapping and it's fine to apply it to an out-of-range value if we
11929 // select it away later.
11930 SDValue FpToInt =
11931 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11932
11933 SDValue Select = FpToInt;
11934
11935 EVT SetCCVT =
11936 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11937
11938 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11939 // MinInt if Src is NaN.
11940 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11941 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11942 // If Src OGT MaxFloat, select MaxInt.
11943 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11944 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11945
11946 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11947 // is already zero.
11948 if (!IsSigned)
11949 return Select;
11950
11951 // Otherwise, select 0 if Src is NaN.
11952 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11953 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11954 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11955}
11956
11958 const SDLoc &dl,
11959 SelectionDAG &DAG) const {
11960 EVT OperandVT = Op.getValueType();
11961 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11962 return Op;
11963 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11964 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11965 // can induce double-rounding which may alter the results. We can
11966 // correct for this using a trick explained in: Boldo, Sylvie, and
11967 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11968 // World Congress. 2005.
11969 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11970 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11971
11972 // We can keep the narrow value as-is if narrowing was exact (no
11973 // rounding error), the wide value was NaN (the narrow value is also
11974 // NaN and should be preserved) or if we rounded to the odd value.
11975 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11976 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11977 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11978 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11979 EVT ResultIntVTCCVT = getSetCCResultType(
11980 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11981 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11982 // The result is already odd so we don't need to do anything.
11983 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11984
11985 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11986 Op.getValueType());
11987 // We keep results which are exact, odd or NaN.
11988 SDValue KeepNarrow =
11989 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11990 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11991 // We morally performed a round-down if AbsNarrow is smaller than
11992 // AbsWide.
11993 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11994 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11995 SDValue NarrowIsRd =
11996 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11997 // If the narrow value is odd or exact, pick it.
11998 // Otherwise, narrow is even and corresponds to either the rounded-up
11999 // or rounded-down value. If narrow is the rounded-down value, we want
12000 // the rounded-up value as it will be odd.
12001 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
12002 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
12003 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
12004 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12005}
12006
12008 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12009 SDValue Op = Node->getOperand(0);
12010 EVT VT = Node->getValueType(0);
12011 SDLoc dl(Node);
12012 if (VT.getScalarType() == MVT::bf16) {
12013 if (Node->getConstantOperandVal(1) == 1) {
12014 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12015 }
12016 EVT OperandVT = Op.getValueType();
12017 SDValue IsNaN = DAG.getSetCC(
12018 dl,
12019 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12020 Op, Op, ISD::SETUO);
12021
12022 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12023 // can induce double-rounding which may alter the results. We can
12024 // correct for this using a trick explained in: Boldo, Sylvie, and
12025 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12026 // World Congress. 2005.
12027 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12028 EVT I32 = F32.changeTypeToInteger();
12029 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12030 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12031
12032 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12033 // turning into infinities.
12034 SDValue NaN =
12035 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12036
12037 // Factor in the contribution of the low 16 bits.
12038 SDValue One = DAG.getConstant(1, dl, I32);
12039 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12040 DAG.getShiftAmountConstant(16, I32, dl));
12041 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12042 SDValue RoundingBias =
12043 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
12044 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12045
12046 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12047 // 0x80000000.
12048 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12049
12050 // Now that we have rounded, shift the bits into position.
12051 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12052 DAG.getShiftAmountConstant(16, I32, dl));
12053 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12054 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12055 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12056 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12057 }
12058 return SDValue();
12059}
12060
12062 SelectionDAG &DAG) const {
12063 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12064 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12065 "Unexpected opcode!");
12066 assert(Node->getValueType(0).isScalableVector() &&
12067 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
12068
12069 EVT VT = Node->getValueType(0);
12070 SDValue V1 = Node->getOperand(0);
12071 SDValue V2 = Node->getOperand(1);
12072 uint64_t Imm = Node->getConstantOperandVal(2);
12073 SDLoc DL(Node);
12074
12075 // Expand through memory thusly:
12076 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12077 // Store V1, Ptr
12078 // Store V2, Ptr + sizeof(V1)
12079 // If (Imm < 0)
12080 // TrailingElts = -Imm
12081 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
12082 // else
12083 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
12084 // Res = Load Ptr
12085
12086 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12087
12089 VT.getVectorElementCount() * 2);
12090 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12091 EVT PtrVT = StackPtr.getValueType();
12092 auto &MF = DAG.getMachineFunction();
12093 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12094 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12095
12096 // Store the lo part of CONCAT_VECTORS(V1, V2)
12097 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
12098 // Store the hi part of CONCAT_VECTORS(V1, V2)
12099 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
12100 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
12101 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
12102
12103 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT) {
12104 // Load back the required element. getVectorElementPointer takes care of
12105 // clamping the index if it's out-of-bounds.
12106 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
12107 // Load the spliced result
12108 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
12110 }
12111
12112 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
12113 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
12114 SDValue TrailingBytes = DAG.getConstant(Imm * EltByteSize, DL, PtrVT);
12115
12116 if (Imm > VT.getVectorMinNumElements())
12117 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
12118
12119 // Calculate the start address of the spliced result.
12120 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
12121
12122 // Load the spliced result
12123 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
12125}
12126
12128 SelectionDAG &DAG) const {
12129 SDLoc DL(Node);
12130 SDValue Vec = Node->getOperand(0);
12131 SDValue Mask = Node->getOperand(1);
12132 SDValue Passthru = Node->getOperand(2);
12133
12134 EVT VecVT = Vec.getValueType();
12135 EVT ScalarVT = VecVT.getScalarType();
12136 EVT MaskVT = Mask.getValueType();
12137 EVT MaskScalarVT = MaskVT.getScalarType();
12138
12139 // Needs to be handled by targets that have scalable vector types.
12140 if (VecVT.isScalableVector())
12141 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
12142
12143 SDValue StackPtr = DAG.CreateStackTemporary(
12144 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
12145 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12146 MachinePointerInfo PtrInfo =
12148
12149 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
12150 SDValue Chain = DAG.getEntryNode();
12151 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
12152
12153 bool HasPassthru = !Passthru.isUndef();
12154
12155 // If we have a passthru vector, store it on the stack, overwrite the matching
12156 // positions and then re-write the last element that was potentially
12157 // overwritten even though mask[i] = false.
12158 if (HasPassthru)
12159 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
12160
12161 SDValue LastWriteVal;
12162 APInt PassthruSplatVal;
12163 bool IsSplatPassthru =
12164 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
12165
12166 if (IsSplatPassthru) {
12167 // As we do not know which position we wrote to last, we cannot simply
12168 // access that index from the passthru vector. So we first check if passthru
12169 // is a splat vector, to use any element ...
12170 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
12171 } else if (HasPassthru) {
12172 // ... if it is not a splat vector, we need to get the passthru value at
12173 // position = popcount(mask) and re-load it from the stack before it is
12174 // overwritten in the loop below.
12175 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12176 SDValue Popcount = DAG.getNode(
12178 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
12179 Popcount = DAG.getNode(
12181 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
12182 Popcount);
12183 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12184 SDValue LastElmtPtr =
12185 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12186 LastWriteVal = DAG.getLoad(
12187 ScalarVT, DL, Chain, LastElmtPtr,
12189 Chain = LastWriteVal.getValue(1);
12190 }
12191
12192 unsigned NumElms = VecVT.getVectorNumElements();
12193 for (unsigned I = 0; I < NumElms; I++) {
12194 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12195 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12196 Chain = DAG.getStore(
12197 Chain, DL, ValI, OutPtr,
12199
12200 // Get the mask value and add it to the current output position. This
12201 // either increments by 1 if MaskI is true or adds 0 otherwise.
12202 // Freeze in case we have poison/undef mask entries.
12203 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12204 MaskI = DAG.getFreeze(MaskI);
12205 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12206 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12207 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12208
12209 if (HasPassthru && I == NumElms - 1) {
12210 SDValue EndOfVector =
12211 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12212 SDValue AllLanesSelected =
12213 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12214 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12215 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12216
12217 // Re-write the last ValI if all lanes were selected. Otherwise,
12218 // overwrite the last write it with the passthru value.
12219 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12220 LastWriteVal, SDNodeFlags::Unpredictable);
12221 Chain = DAG.getStore(
12222 Chain, DL, LastWriteVal, OutPtr,
12224 }
12225 }
12226
12227 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12228}
12229
12231 SelectionDAG &DAG) const {
12232 SDLoc DL(N);
12233 SDValue Acc = N->getOperand(0);
12234 SDValue MulLHS = N->getOperand(1);
12235 SDValue MulRHS = N->getOperand(2);
12236 EVT AccVT = Acc.getValueType();
12237 EVT MulOpVT = MulLHS.getValueType();
12238
12239 EVT ExtMulOpVT =
12241 MulOpVT.getVectorElementCount());
12242
12243 unsigned ExtOpcLHS, ExtOpcRHS;
12244 switch (N->getOpcode()) {
12245 default:
12246 llvm_unreachable("Unexpected opcode");
12248 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12249 break;
12251 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12252 break;
12254 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12255 break;
12256 }
12257
12258 if (ExtMulOpVT != MulOpVT) {
12259 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12260 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12261 }
12262 SDValue Input = MulLHS;
12263 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12264 if (!llvm::isOneOrOneSplatFP(MulRHS))
12265 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12266 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
12267 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12268 }
12269
12270 unsigned Stride = AccVT.getVectorMinNumElements();
12271 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12272
12273 // Collect all of the subvectors
12274 std::deque<SDValue> Subvectors = {Acc};
12275 for (unsigned I = 0; I < ScaleFactor; I++)
12276 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12277
12278 unsigned FlatNode =
12279 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12280
12281 // Flatten the subvector tree
12282 while (Subvectors.size() > 1) {
12283 Subvectors.push_back(
12284 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12285 Subvectors.pop_front();
12286 Subvectors.pop_front();
12287 }
12288
12289 assert(Subvectors.size() == 1 &&
12290 "There should only be one subvector after tree flattening");
12291
12292 return Subvectors[0];
12293}
12294
12295/// Given a store node \p StoreNode, return true if it is safe to fold that node
12296/// into \p FPNode, which expands to a library call with output pointers.
12298 SDNode *FPNode) {
12300 SmallVector<const SDNode *, 8> DeferredNodes;
12302
12303 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12304 for (SDValue Op : StoreNode->ops())
12305 if (Op.getNode() != FPNode)
12306 Worklist.push_back(Op.getNode());
12307
12309 while (!Worklist.empty()) {
12310 const SDNode *Node = Worklist.pop_back_val();
12311 auto [_, Inserted] = Visited.insert(Node);
12312 if (!Inserted)
12313 continue;
12314
12315 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12316 return false;
12317
12318 // Reached the FPNode (would result in a cycle).
12319 // OR Reached CALLSEQ_START (would result in nested call sequences).
12320 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12321 return false;
12322
12323 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12324 // Defer looking into call sequences (so we can check we're outside one).
12325 // We still need to look through these for the predecessor check.
12326 DeferredNodes.push_back(Node);
12327 continue;
12328 }
12329
12330 for (SDValue Op : Node->ops())
12331 Worklist.push_back(Op.getNode());
12332 }
12333
12334 // True if we're outside a call sequence and don't have the FPNode as a
12335 // predecessor. No cycles or nested call sequences possible.
12336 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
12337 MaxSteps);
12338}
12339
12341 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12343 std::optional<unsigned> CallRetResNo) const {
12344 if (LC == RTLIB::UNKNOWN_LIBCALL)
12345 return false;
12346
12347 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12348 if (LibcallImpl == RTLIB::Unsupported)
12349 return false;
12350
12351 LLVMContext &Ctx = *DAG.getContext();
12352 EVT VT = Node->getValueType(0);
12353 unsigned NumResults = Node->getNumValues();
12354
12355 // Find users of the node that store the results (and share input chains). The
12356 // destination pointers can be used instead of creating stack allocations.
12357 SDValue StoresInChain;
12358 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12359 for (SDNode *User : Node->users()) {
12361 continue;
12362 auto *ST = cast<StoreSDNode>(User);
12363 SDValue StoreValue = ST->getValue();
12364 unsigned ResNo = StoreValue.getResNo();
12365 // Ensure the store corresponds to an output pointer.
12366 if (CallRetResNo == ResNo)
12367 continue;
12368 // Ensure the store to the default address space and not atomic or volatile.
12369 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12370 continue;
12371 // Ensure all store chains are the same (so they don't alias).
12372 if (StoresInChain && ST->getChain() != StoresInChain)
12373 continue;
12374 // Ensure the store is properly aligned.
12375 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
12376 if (ST->getAlign() <
12377 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
12378 continue;
12379 // Avoid:
12380 // 1. Creating cyclic dependencies.
12381 // 2. Expanding the node to a call within a call sequence.
12383 continue;
12384 ResultStores[ResNo] = ST;
12385 StoresInChain = ST->getChain();
12386 }
12387
12388 ArgListTy Args;
12389
12390 // Pass the arguments.
12391 for (const SDValue &Op : Node->op_values()) {
12392 EVT ArgVT = Op.getValueType();
12393 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
12394 Args.emplace_back(Op, ArgTy);
12395 }
12396
12397 // Pass the output pointers.
12398 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12400 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
12401 if (ResNo == CallRetResNo)
12402 continue;
12403 EVT ResVT = Node->getValueType(ResNo);
12404 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
12405 ResultPtrs[ResNo] = ResultPtr;
12406 Args.emplace_back(ResultPtr, PointerTy);
12407 }
12408
12409 SDLoc DL(Node);
12410
12412 // Pass the vector mask (if required).
12413 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
12414 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
12415 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
12416 }
12417
12418 Type *RetType = CallRetResNo.has_value()
12419 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
12420 : Type::getVoidTy(Ctx);
12421 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12422 SDValue Callee =
12423 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
12425 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12426 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
12427
12428 auto [Call, CallChain] = LowerCallTo(CLI);
12429
12430 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
12431 if (ResNo == CallRetResNo) {
12432 Results.push_back(Call);
12433 continue;
12434 }
12435 MachinePointerInfo PtrInfo;
12436 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
12437 ResultPtr, PtrInfo);
12438 SDValue OutChain = LoadResult.getValue(1);
12439
12440 if (StoreSDNode *ST = ResultStores[ResNo]) {
12441 // Replace store with the library call.
12442 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
12443 PtrInfo = ST->getPointerInfo();
12444 } else {
12446 DAG.getMachineFunction(),
12447 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
12448 }
12449
12450 Results.push_back(LoadResult);
12451 }
12452
12453 return true;
12454}
12455
12457 SDValue &LHS, SDValue &RHS,
12458 SDValue &CC, SDValue Mask,
12459 SDValue EVL, bool &NeedInvert,
12460 const SDLoc &dl, SDValue &Chain,
12461 bool IsSignaling) const {
12462 MVT OpVT = LHS.getSimpleValueType();
12463 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12464 NeedInvert = false;
12465 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12466 bool IsNonVP = !EVL;
12467 switch (getCondCodeAction(CCCode, OpVT)) {
12468 default:
12469 llvm_unreachable("Unknown condition code action!");
12471 // Nothing to do.
12472 break;
12475 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12476 std::swap(LHS, RHS);
12477 CC = DAG.getCondCode(InvCC);
12478 return true;
12479 }
12480 // Swapping operands didn't work. Try inverting the condition.
12481 bool NeedSwap = false;
12482 InvCC = getSetCCInverse(CCCode, OpVT);
12483 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12484 // If inverting the condition is not enough, try swapping operands
12485 // on top of it.
12486 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12487 NeedSwap = true;
12488 }
12489 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12490 CC = DAG.getCondCode(InvCC);
12491 NeedInvert = true;
12492 if (NeedSwap)
12493 std::swap(LHS, RHS);
12494 return true;
12495 }
12496
12497 // Special case: expand i1 comparisons using logical operations.
12498 if (OpVT == MVT::i1) {
12499 SDValue Ret;
12500 switch (CCCode) {
12501 default:
12502 llvm_unreachable("Unknown integer setcc!");
12503 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12504 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12505 MVT::i1);
12506 break;
12507 case ISD::SETNE: // X != Y --> (X ^ Y)
12508 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12509 break;
12510 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12511 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12512 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12513 DAG.getNOT(dl, LHS, MVT::i1));
12514 break;
12515 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12516 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12517 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12518 DAG.getNOT(dl, RHS, MVT::i1));
12519 break;
12520 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12521 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12522 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12523 DAG.getNOT(dl, LHS, MVT::i1));
12524 break;
12525 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12526 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12527 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12528 DAG.getNOT(dl, RHS, MVT::i1));
12529 break;
12530 }
12531
12532 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12533 RHS = SDValue();
12534 CC = SDValue();
12535 return true;
12536 }
12537
12539 unsigned Opc = 0;
12540 switch (CCCode) {
12541 default:
12542 llvm_unreachable("Don't know how to expand this condition!");
12543 case ISD::SETUO:
12544 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12545 CC1 = ISD::SETUNE;
12546 CC2 = ISD::SETUNE;
12547 Opc = ISD::OR;
12548 break;
12549 }
12551 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12552 NeedInvert = true;
12553 [[fallthrough]];
12554 case ISD::SETO:
12556 "If SETO is expanded, SETOEQ must be legal!");
12557 CC1 = ISD::SETOEQ;
12558 CC2 = ISD::SETOEQ;
12559 Opc = ISD::AND;
12560 break;
12561 case ISD::SETONE:
12562 case ISD::SETUEQ:
12563 // If the SETUO or SETO CC isn't legal, we might be able to use
12564 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12565 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12566 // the operands.
12567 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12568 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12569 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12570 CC1 = ISD::SETOGT;
12571 CC2 = ISD::SETOLT;
12572 Opc = ISD::OR;
12573 NeedInvert = ((unsigned)CCCode & 0x8U);
12574 break;
12575 }
12576 [[fallthrough]];
12577 case ISD::SETOEQ:
12578 case ISD::SETOGT:
12579 case ISD::SETOGE:
12580 case ISD::SETOLT:
12581 case ISD::SETOLE:
12582 case ISD::SETUNE:
12583 case ISD::SETUGT:
12584 case ISD::SETUGE:
12585 case ISD::SETULT:
12586 case ISD::SETULE:
12587 // If we are floating point, assign and break, otherwise fall through.
12588 if (!OpVT.isInteger()) {
12589 // We can use the 4th bit to tell if we are the unordered
12590 // or ordered version of the opcode.
12591 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12592 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12593 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12594 break;
12595 }
12596 // Fallthrough if we are unsigned integer.
12597 [[fallthrough]];
12598 case ISD::SETLE:
12599 case ISD::SETGT:
12600 case ISD::SETGE:
12601 case ISD::SETLT:
12602 case ISD::SETNE:
12603 case ISD::SETEQ:
12604 // If all combinations of inverting the condition and swapping operands
12605 // didn't work then we have no means to expand the condition.
12606 llvm_unreachable("Don't know how to expand this condition!");
12607 }
12608
12609 SDValue SetCC1, SetCC2;
12610 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12611 // If we aren't the ordered or unorder operation,
12612 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12613 if (IsNonVP) {
12614 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12615 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12616 } else {
12617 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12618 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12619 }
12620 } else {
12621 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12622 if (IsNonVP) {
12623 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12624 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12625 } else {
12626 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12627 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12628 }
12629 }
12630 if (Chain)
12631 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12632 SetCC2.getValue(1));
12633 if (IsNonVP)
12634 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12635 else {
12636 // Transform the binary opcode to the VP equivalent.
12637 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12638 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12639 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12640 }
12641 RHS = SDValue();
12642 CC = SDValue();
12643 return true;
12644 }
12645 }
12646 return false;
12647}
12648
12650 SelectionDAG &DAG) const {
12651 EVT VT = Node->getValueType(0);
12652 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12653 // split into two equal parts.
12654 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12655 return SDValue();
12656
12657 // Restrict expansion to cases where both parts can be concatenated.
12658 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12659 if (LoVT != HiVT || !isTypeLegal(LoVT))
12660 return SDValue();
12661
12662 SDLoc DL(Node);
12663 unsigned Opcode = Node->getOpcode();
12664
12665 // Don't expand if the result is likely to be unrolled anyway.
12666 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12667 return SDValue();
12668
12669 SmallVector<SDValue, 4> LoOps, HiOps;
12670 for (const SDValue &V : Node->op_values()) {
12671 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12672 LoOps.push_back(Lo);
12673 HiOps.push_back(Hi);
12674 }
12675
12676 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12677 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12678 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12679}
12680
12682 const SDLoc &DL,
12683 EVT InVecVT, SDValue EltNo,
12684 LoadSDNode *OriginalLoad,
12685 SelectionDAG &DAG) const {
12686 assert(OriginalLoad->isSimple());
12687
12688 EVT VecEltVT = InVecVT.getVectorElementType();
12689
12690 // If the vector element type is not a multiple of a byte then we are unable
12691 // to correctly compute an address to load only the extracted element as a
12692 // scalar.
12693 if (!VecEltVT.isByteSized())
12694 return SDValue();
12695
12696 ISD::LoadExtType ExtTy =
12697 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12698 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12699 return SDValue();
12700
12701 std::optional<unsigned> ByteOffset;
12702 Align Alignment = OriginalLoad->getAlign();
12704 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12705 int Elt = ConstEltNo->getZExtValue();
12706 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12707 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12708 Alignment = commonAlignment(Alignment, *ByteOffset);
12709 } else {
12710 // Discard the pointer info except the address space because the memory
12711 // operand can't represent this new access since the offset is variable.
12712 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12713 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12714 }
12715
12716 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12717 return SDValue();
12718
12719 unsigned IsFast = 0;
12720 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12721 OriginalLoad->getAddressSpace(), Alignment,
12722 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12723 !IsFast)
12724 return SDValue();
12725
12726 // The original DAG loaded the entire vector from memory, so arithmetic
12727 // within it must be inbounds.
12729 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12730
12731 // We are replacing a vector load with a scalar load. The new load must have
12732 // identical memory op ordering to the original.
12733 SDValue Load;
12734 if (ResultVT.bitsGT(VecEltVT)) {
12735 // If the result type of vextract is wider than the load, then issue an
12736 // extending load instead.
12737 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12739 : ISD::EXTLOAD;
12740 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12741 NewPtr, MPI, VecEltVT, Alignment,
12742 OriginalLoad->getMemOperand()->getFlags(),
12743 OriginalLoad->getAAInfo());
12744 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12745 } else {
12746 // The result type is narrower or the same width as the vector element
12747 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12748 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12749 OriginalLoad->getAAInfo());
12750 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12751 if (ResultVT.bitsLT(VecEltVT))
12752 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12753 else
12754 Load = DAG.getBitcast(ResultVT, Load);
12755 }
12756
12757 return Load;
12758}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1410
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1221
APInt bitcastToAPInt() const
Definition APFloat.h:1416
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1201
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1161
void changeSign()
Definition APFloat.h:1360
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1172
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1584
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1769
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1415
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1400
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1394
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1521
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1349
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1250
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1405
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:835
void negate()
Negate this APInt in place.
Definition APInt.h:1477
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1648
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1607
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1540
unsigned countLeadingZeros() const
Definition APInt.h:1615
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1444
unsigned logBase2() const
Definition APInt.h:1770
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:828
void setAllBits()
Set every bit to 1.
Definition APInt.h:1328
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1285
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1151
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1376
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1426
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1397
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1451
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1665
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1352
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:720
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:214
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
iterator end() const
Definition ArrayRef.h:343
iterator begin() const
Definition ArrayRef.h:342
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
iterator end() const
Definition StringRef.h:114
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:796
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:712
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3020
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:818
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:787
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:778
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:852
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:879
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:746
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:909
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:992
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:773
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:714
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:786
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:795
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:810
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, IMM) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by IMM elements and retu...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:898
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:887
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:977
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:804
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:925
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:709
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, IMM) - Shifts CONCAT_VECTORS(VEC1, VEC2) right by IMM elements and re...
Definition ISDOpcodes.h:656
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:958
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:920
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:944
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:832
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:721
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
void stable_sort(R &&Range)
Definition STLExtras.h:2106
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1595
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1775
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1632
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:113
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:314
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:189
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:268
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:255
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:99
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:164
KnownBits byteSwap() const
Definition KnownBits.h:532
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:302
KnownBits reverseBits() const
Definition KnownBits.h:536
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:246
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:175
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:334
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:324
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:183
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:261
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:60
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:170
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:299
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...